From da7c870b947cc147e73937f7add62a73a26eae4f Mon Sep 17 00:00:00 2001 From: msglm Date: Sun, 5 Feb 2023 03:22:06 -0600 Subject: 1.0.4 improvements Fixed cache system Fixed Company names not appearing vastly simplified the cache system improved stability Version Bump --- src/indeedwatcher.nim | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index 083d0ae..347e419 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -40,7 +40,7 @@ let outdir = config["outdir"].getStr() ##Cache section of config let chromedriverloc = config["chromedriver"].getStr() let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache" -var cache: seq[string] +let cache = open(cachefileloc, fmAppend) ##Filtering section of config let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr()) @@ -52,13 +52,14 @@ sleep 5000 echo "connecting" #TODO make the port configurable, some users may have something running here let driver = newWebDriver("http://localhost:9515") -let session = driver.createSession() +var session: Session var counter = 0 +#Behavior when CTRL+C proc terminate() {.noconv.} = echo "\nAcknowledged termination attempt..." - echo "Writing Cache..." - writeFile(cachefileloc, cache.join("\n")) + echo "Closing the Cache..." + cache.close() echo "Closing Session..." session.close() echo "Killing Chromedriver..." @@ -68,9 +69,8 @@ proc terminate() {.noconv.} = setControlCHook(terminate) for feed in feeds: - - cache = splitLines(readFile(cachefileloc)) - + session = driver.createSession() + sleep 3000 #Getting the listing URLs from the feeds var rssFeedReply: RSS for attempt in countup(0,3): @@ -95,7 +95,12 @@ for feed in feeds: #Don't even bother visiting it if its in the cache var URL = entry.link let URLID = entry.link.split('&')[3] - if not any(cache, proc (input: string): bool = input.contains(URLID)): + + #This isn't cache.readFile().contains(URLID) + #because nim has no way to both open a file in append mode + #and also open it as reading. Therefore, this blunder, which + #creates a new file in memory, is used instead. + if not readFile(cachefileloc).contains(URLID): for attempt in countup(0,3): try: echo "Telling chromium to navigate to " & URL @@ -110,12 +115,15 @@ for feed in feeds: counter = counter + 1 #HTML Parser + echo "Beginning to parse..." let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText() let fullDesc = session.findElement("#jobDescriptionText").get().getText() var employer: string try: - employer = session.findElement(".jobsearch-InlineCompanyRating-companyHeader").get().getText() + #This takes the location from the URL, removes all the junk around it, and replaced the URL pluses with actual spaces + #perhaps, a URL parsing library could have been used for this. + employer = entry.link.split('&')[1][2..^1].replace("+"," ") except UnpackDefect: employer = "None Listed" @@ -124,9 +132,11 @@ for feed in feeds: salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext() except UnpackDefect: salaryInfoAndJobType = "None Listed" + echo "Finishing the parse..." #Filtering if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)): + echo "Beginning to write to file..." #Output var output = """ Title: $1 @@ -137,15 +147,17 @@ for feed in feeds: $5 """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] writeFile(outdir & jobTitle.replace("/") & ".txt", output) - cache.add(URL) echo "Wrote job to file!" + cache.writeLine(URL) + echo "Wrote listing to cache!" else: echo "Trigger was hit, discarding " & URL else: echo URL & " was in cache, discarding" - echo "wrote cache to cache..." - writeFile(cachefileloc, cache.join("\n")) + session.close() + -session.close() +cache.close() +#session.close() terminate(chromedriver) -- cgit v1.2.3