summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/indeedwatcher.nim38
1 files changed, 25 insertions, 13 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index 083d0ae..347e419 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -40,7 +40,7 @@ let outdir = config["outdir"].getStr()
##Cache section of config
let chromedriverloc = config["chromedriver"].getStr()
let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache"
-var cache: seq[string]
+let cache = open(cachefileloc, fmAppend)
##Filtering section of config
let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr())
@@ -52,13 +52,14 @@ sleep 5000
echo "connecting"
#TODO make the port configurable, some users may have something running here
let driver = newWebDriver("http://localhost:9515")
-let session = driver.createSession()
+var session: Session
var counter = 0
+#Behavior when CTRL+C
proc terminate() {.noconv.} =
echo "\nAcknowledged termination attempt..."
- echo "Writing Cache..."
- writeFile(cachefileloc, cache.join("\n"))
+ echo "Closing the Cache..."
+ cache.close()
echo "Closing Session..."
session.close()
echo "Killing Chromedriver..."
@@ -68,9 +69,8 @@ proc terminate() {.noconv.} =
setControlCHook(terminate)
for feed in feeds:
-
- cache = splitLines(readFile(cachefileloc))
-
+ session = driver.createSession()
+ sleep 3000
#Getting the listing URLs from the feeds
var rssFeedReply: RSS
for attempt in countup(0,3):
@@ -95,7 +95,12 @@ for feed in feeds:
#Don't even bother visiting it if its in the cache
var URL = entry.link
let URLID = entry.link.split('&')[3]
- if not any(cache, proc (input: string): bool = input.contains(URLID)):
+
+ #This isn't cache.readFile().contains(URLID)
+ #because nim has no way to both open a file in append mode
+ #and also open it as reading. Therefore, this blunder, which
+ #creates a new file in memory, is used instead.
+ if not readFile(cachefileloc).contains(URLID):
for attempt in countup(0,3):
try:
echo "Telling chromium to navigate to " & URL
@@ -110,12 +115,15 @@ for feed in feeds:
counter = counter + 1
#HTML Parser
+ echo "Beginning to parse..."
let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText()
let fullDesc = session.findElement("#jobDescriptionText").get().getText()
var employer: string
try:
- employer = session.findElement(".jobsearch-InlineCompanyRating-companyHeader").get().getText()
+ #This takes the location from the URL, removes all the junk around it, and replaced the URL pluses with actual spaces
+ #perhaps, a URL parsing library could have been used for this.
+ employer = entry.link.split('&')[1][2..^1].replace("+"," ")
except UnpackDefect:
employer = "None Listed"
@@ -124,9 +132,11 @@ for feed in feeds:
salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext()
except UnpackDefect:
salaryInfoAndJobType = "None Listed"
+ echo "Finishing the parse..."
#Filtering
if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)):
+ echo "Beginning to write to file..."
#Output
var output = """
Title: $1
@@ -137,15 +147,17 @@ for feed in feeds:
$5
""" % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
writeFile(outdir & jobTitle.replace("/") & ".txt", output)
- cache.add(URL)
echo "Wrote job to file!"
+ cache.writeLine(URL)
+ echo "Wrote listing to cache!"
else:
echo "Trigger was hit, discarding " & URL
else:
echo URL & " was in cache, discarding"
- echo "wrote cache to cache..."
- writeFile(cachefileloc, cache.join("\n"))
+ session.close()
+
-session.close()
+cache.close()
+#session.close()
terminate(chromedriver)