summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authormsglm <msglm@techchud.xyz>2023-05-25 20:17:01 -0500
committermsglm <msglm@techchud.xyz>2023-05-25 20:17:01 -0500
commit2162abb7b259670c3328d783f89b2f0224932a69 (patch)
tree604cd567bddc75652cf7e7aaba5c293d2cbbe4f7 /src
parente799ce9ee8bde36d85c7fcdfbe73d63e8866b12e (diff)
downloadindeedwatcher-2162abb7b259670c3328d783f89b2f0224932a69.tar.gz
indeedwatcher-2162abb7b259670c3328d783f89b2f0224932a69.tar.bz2
indeedwatcher-2162abb7b259670c3328d783f89b2f0224932a69.zip
Added configurable count ups for timeouts Bumped version number Using XPaths for description parsing now
Diffstat (limited to 'src')
-rw-r--r--src/indeedwatcher.nim20
1 files changed, 12 insertions, 8 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index cd3e220..ce5c2b1 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -24,6 +24,8 @@ if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
outdir = ""
#Port you wish chromedriver to use
port = 9515
+ #Number of times to retry before failing
+ retryNum = 30
#Location of chromedriver
chromedriver = "/usr/bin/chromedriver"
#If you would like headless mode enabled or not
@@ -50,6 +52,7 @@ let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml")
##Main section of config
let feeds = config["feeds"].getElems().mapIt(it.getStr())
let outdir = config["outdir"].getStr()
+let retryNum = config["retryNum"].getInt()
let driverURL = "http://localhost:" & config["port"].getInt().intToStr()
##Cache section of config
let chromedriverloc = config["chromedriver"].getStr()
@@ -114,9 +117,10 @@ for feed in feeds:
echo "now reading " & feed
rssFeedReply = getRSS(feed)
except:
- if attempt < 3:
- echo "Recieved an error: trying again in 30 seconds..."
- sleep 30000
+ if attempt < 30:
+ var attemptTime = 10000*attempt
+ echo "Recieved an error: trying again in " & $(attemptTime/1000) & " seconds..."
+ sleep attemptTime
continue
else:
raise
@@ -139,12 +143,12 @@ for feed in feeds:
#and also open it as reading. Therefore, this blunder, which
#creates a new file in memory, is used instead.
if not readFile(cachefileloc).contains(posting.URLID) and not postValid(posting):
- for attempt in countup(0,3):
+ for attempt in countup(0,retryNum):
try:
echo "Telling chromium to navigate to " & posting.URL
session.navigate(posting.URL)
except:
- if attempt < 3:
+ if attempt < retryNum:
echo "Recieved an error: trying again..."
continue
else:
@@ -153,15 +157,15 @@ for feed in feeds:
urlcounter = urlcounter + 1
#HTML Parser
- echo "Beginning to parse..."
- let fullDesc = session.findElement("#jobDescriptionText").get().getText()
+ echo "Beginning to parse desc..."
+ var fullDesc = session.findElement("/html/body/div/div[2]/div/div[4]/div/div/div[1]/div[1]/div[5]/div[5]", strategy=XPathSelector).get().getText()
+ echo "Beginning to parse salary info and job type..."
var salaryInfoAndJobType: string
try:
salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext()
except UnpackDefect:
salaryInfoAndJobType = "None Listed"
- echo "Finishing the parse..."
echo "Beginning to write to file..."
#Output