diff options
Diffstat (limited to 'src/indeedwatcher.nim')
-rw-r--r-- | src/indeedwatcher.nim | 69 |
1 files changed, 33 insertions, 36 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index bc52138..8c4a71e 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -6,6 +6,7 @@ import os import strutils import parsetoml import sequtils +import uri if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"): createDir(getConfigDir() & "/indeedwatcher/") @@ -53,7 +54,8 @@ echo "connecting" #TODO make the port configurable, some users may have something running here let driver = newWebDriver("http://localhost:9515") var session: Session -var counter = 0 +var feedcounter = 0 +var urlcounter = 0 #Behavior when CTRL+C proc terminate() {.noconv.} = @@ -70,7 +72,10 @@ setControlCHook(terminate) for feed in feeds: session = driver.createSession() - sleep 3000 + if feedcounter > 3: + echo "resting for 20 seconds ..." + sleep 20000 + feedcounter = 0 #Getting the listing URLs from the feeds var rssFeedReply: RSS for attempt in countup(0,3): @@ -87,20 +92,24 @@ for feed in feeds: for entry in rssFeedReply.items: #Sleep so indeed.com doesn't freak out - if counter > 7: + if urlcounter > 7: echo "resting for 10 seconds ..." sleep 10000 - counter = 0 + urlcounter = 0 - #Don't even bother visiting it if its in the cache + #Don't even bother visiting it if its in the cache or hits a trigger word var URL = entry.link - let URLID = entry.link.split('&')[3] + let queries = URL.parseUri.query.decodeQuery().toSeq() + let jobName = queries[0].value + let employer = queries[1].value + let location = queries[2].value + let URLID = queries[3].value #This isn't cache.readFile().contains(URLID) #because nim has no way to both open a file in append mode #and also open it as reading. Therefore, this blunder, which #creates a new file in memory, is used instead. - if not readFile(cachefileloc).contains(URLID): + if not readFile(cachefileloc).contains(URLID) or not any(titleblacklist, proc (input: string): bool = jobName.contains(input)): for attempt in countup(0,3): try: echo "Telling chromium to navigate to " & URL @@ -112,21 +121,12 @@ for feed in feeds: else: raise break - counter = counter + 1 + urlcounter = urlcounter + 1 #HTML Parser echo "Beginning to parse..." - let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText() let fullDesc = session.findElement("#jobDescriptionText").get().getText() - var employer: string - try: - #This takes the location from the URL, removes all the junk around it, and replaced the URL pluses with actual spaces - #perhaps, a URL parsing library could have been used for this. - employer = entry.link.split('&')[1][2..^1].replace("+"," ") - except UnpackDefect: - employer = "None Listed" - var salaryInfoAndJobType: string try: salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext() @@ -134,26 +134,23 @@ for feed in feeds: salaryInfoAndJobType = "None Listed" echo "Finishing the parse..." - #Filtering - if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)): - echo "Beginning to write to file..." - #Output - var output = """ - Title: $1 - Company: $2 - Salary Info and Job Type: $3 - URL : $4 - Description: - $5 - """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] - writeFile(outdir & jobTitle.replace("/") & ".txt", output) - echo "Wrote job to file!" - cache.writeLine(URL) - echo "Wrote listing to cache!" - else: - echo "Trigger was hit, discarding " & URL + echo "Beginning to write to file..." + #Output + var output = """ + Title: $1 + Employer: $2 + Location: $3 + Salary Info and Job Type: $4 + URL : $5 + Description: + $6 + """ % [jobName, employer, location, salaryInfoAndJobType, URL, fullDesc] + writeFile(outdir & jobName.replace("/") & ".txt", output) + echo "Wrote job to file!" + cache.writeLine(URL) + echo "Wrote listing to cache!" else: - echo URL & " was in cache, discarding" + echo URL & " was in cache or hit a trigger word, discarding" session.close() |