From 2162abb7b259670c3328d783f89b2f0224932a69 Mon Sep 17 00:00:00 2001 From: msglm Date: Thu, 25 May 2023 20:17:01 -0500 Subject: v1.1.3 Added configurable count ups for timeouts Bumped version number Using XPaths for description parsing now --- indeedwatcher.nimble | 2 +- src/indeedwatcher.nim | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/indeedwatcher.nimble b/indeedwatcher.nimble index 30318a4..7ff3848 100644 --- a/indeedwatcher.nimble +++ b/indeedwatcher.nimble @@ -1,6 +1,6 @@ # Package -version = "1.1.0" +version = "1.1.3" author = "msglm" description = "Watches indeed for job updates." license = "AGPL-3.0-only" diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index cd3e220..ce5c2b1 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -24,6 +24,8 @@ if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"): outdir = "" #Port you wish chromedriver to use port = 9515 + #Number of times to retry before failing + retryNum = 30 #Location of chromedriver chromedriver = "/usr/bin/chromedriver" #If you would like headless mode enabled or not @@ -50,6 +52,7 @@ let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml") ##Main section of config let feeds = config["feeds"].getElems().mapIt(it.getStr()) let outdir = config["outdir"].getStr() +let retryNum = config["retryNum"].getInt() let driverURL = "http://localhost:" & config["port"].getInt().intToStr() ##Cache section of config let chromedriverloc = config["chromedriver"].getStr() @@ -114,9 +117,10 @@ for feed in feeds: echo "now reading " & feed rssFeedReply = getRSS(feed) except: - if attempt < 3: - echo "Recieved an error: trying again in 30 seconds..." - sleep 30000 + if attempt < 30: + var attemptTime = 10000*attempt + echo "Recieved an error: trying again in " & $(attemptTime/1000) & " seconds..." + sleep attemptTime continue else: raise @@ -139,12 +143,12 @@ for feed in feeds: #and also open it as reading. Therefore, this blunder, which #creates a new file in memory, is used instead. if not readFile(cachefileloc).contains(posting.URLID) and not postValid(posting): - for attempt in countup(0,3): + for attempt in countup(0,retryNum): try: echo "Telling chromium to navigate to " & posting.URL session.navigate(posting.URL) except: - if attempt < 3: + if attempt < retryNum: echo "Recieved an error: trying again..." continue else: @@ -153,15 +157,15 @@ for feed in feeds: urlcounter = urlcounter + 1 #HTML Parser - echo "Beginning to parse..." - let fullDesc = session.findElement("#jobDescriptionText").get().getText() + echo "Beginning to parse desc..." + var fullDesc = session.findElement("/html/body/div/div[2]/div/div[4]/div/div/div[1]/div[1]/div[5]/div[5]", strategy=XPathSelector).get().getText() + echo "Beginning to parse salary info and job type..." var salaryInfoAndJobType: string try: salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext() except UnpackDefect: salaryInfoAndJobType = "None Listed" - echo "Finishing the parse..." echo "Beginning to write to file..." #Output -- cgit v1.2.3