diff options
author | msglm <msglm@techchud.xyz> | 2023-05-25 20:17:01 -0500 |
---|---|---|
committer | msglm <msglm@techchud.xyz> | 2023-05-25 20:17:01 -0500 |
commit | 2162abb7b259670c3328d783f89b2f0224932a69 (patch) | |
tree | 604cd567bddc75652cf7e7aaba5c293d2cbbe4f7 /src | |
parent | e799ce9ee8bde36d85c7fcdfbe73d63e8866b12e (diff) | |
download | indeedwatcher-2162abb7b259670c3328d783f89b2f0224932a69.tar.gz indeedwatcher-2162abb7b259670c3328d783f89b2f0224932a69.tar.bz2 indeedwatcher-2162abb7b259670c3328d783f89b2f0224932a69.zip |
Added configurable count ups for timeouts
Bumped version number
Using XPaths for description parsing now
Diffstat (limited to 'src')
-rw-r--r-- | src/indeedwatcher.nim | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index cd3e220..ce5c2b1 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -24,6 +24,8 @@ if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"): outdir = "" #Port you wish chromedriver to use port = 9515 + #Number of times to retry before failing + retryNum = 30 #Location of chromedriver chromedriver = "/usr/bin/chromedriver" #If you would like headless mode enabled or not @@ -50,6 +52,7 @@ let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml") ##Main section of config let feeds = config["feeds"].getElems().mapIt(it.getStr()) let outdir = config["outdir"].getStr() +let retryNum = config["retryNum"].getInt() let driverURL = "http://localhost:" & config["port"].getInt().intToStr() ##Cache section of config let chromedriverloc = config["chromedriver"].getStr() @@ -114,9 +117,10 @@ for feed in feeds: echo "now reading " & feed rssFeedReply = getRSS(feed) except: - if attempt < 3: - echo "Recieved an error: trying again in 30 seconds..." - sleep 30000 + if attempt < 30: + var attemptTime = 10000*attempt + echo "Recieved an error: trying again in " & $(attemptTime/1000) & " seconds..." + sleep attemptTime continue else: raise @@ -139,12 +143,12 @@ for feed in feeds: #and also open it as reading. Therefore, this blunder, which #creates a new file in memory, is used instead. if not readFile(cachefileloc).contains(posting.URLID) and not postValid(posting): - for attempt in countup(0,3): + for attempt in countup(0,retryNum): try: echo "Telling chromium to navigate to " & posting.URL session.navigate(posting.URL) except: - if attempt < 3: + if attempt < retryNum: echo "Recieved an error: trying again..." continue else: @@ -153,15 +157,15 @@ for feed in feeds: urlcounter = urlcounter + 1 #HTML Parser - echo "Beginning to parse..." - let fullDesc = session.findElement("#jobDescriptionText").get().getText() + echo "Beginning to parse desc..." + var fullDesc = session.findElement("/html/body/div/div[2]/div/div[4]/div/div/div[1]/div[1]/div[5]/div[5]", strategy=XPathSelector).get().getText() + echo "Beginning to parse salary info and job type..." var salaryInfoAndJobType: string try: salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext() except UnpackDefect: salaryInfoAndJobType = "None Listed" - echo "Finishing the parse..." echo "Beginning to write to file..." #Output |