From 7c32a833c399742c1dd52537a68799a7d1585742 Mon Sep 17 00:00:00 2001 From: msglm Date: Fri, 17 Feb 2023 04:19:44 -0600 Subject: added more ways to blacklist undesired content --- src/indeedwatcher.nim | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index 0fc8af4..a8d6bfc 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -9,6 +9,14 @@ import sequtils import uri import json +type + indeedJobDesc = object + URL: string + jobName: string + employer: string + location: string + URLID: string + if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"): createDir(getConfigDir() & "/indeedwatcher/") let defaultConfig = """ @@ -26,6 +34,8 @@ if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"): #Phrases that, if they appear, will cause the job to be instantly thrown out [blacklist] title= [\"Senior\", \"Sr.\"] + employer= [\"NSA\"] + location= [\"Ohio\"] """ writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig) @@ -48,6 +58,8 @@ let cache = open(cachefileloc, fmAppend) ##Filtering section of config let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr()) +let employerblacklist = config["blacklist"]["employer"].getElems().mapIt(it.getStr()) +let locationblacklist = config["blacklist"]["location"].getElems().mapIt(it.getStr()) ##Does the user desire headlessness? var args: JsonNode @@ -80,6 +92,14 @@ proc terminate() {.noconv.} = quit() setControlCHook(terminate) +proc postValid(posting: indeedJobDesc) : bool = + if any(titleblacklist, proc (input: string): bool = posting.jobName.contains(input)) and + any(employerblacklist, proc (input: string): bool = posting.employer.contains(input)) and + any(locationblacklist, proc (input: string): bool = posting.location.contains(input)): + return true + else: + return false + for feed in feeds: #let args = %*{"desiredCapabilities":{"browserName":"chromium"}} session = driver.createSession(args) @@ -110,22 +130,19 @@ for feed in feeds: urlcounter = 0 #Don't even bother visiting it if its in the cache or hits a trigger word - var URL = entry.link - let queries = URL.parseUri.query.decodeQuery().toSeq() - let jobName = queries[0].value - let employer = queries[1].value - let location = queries[2].value - let URLID = queries[3].value + let queries = entry.link.parseUri.query.decodeQuery().toSeq() + var posting: indeedJobDesc + posting = indeedJobDesc(URL: entry.link, jobName: queries[0].value, employer: queries[1].value, location: queries[2].value, URLID: queries[3].value) #This isn't cache.readFile().contains(URLID) #because nim has no way to both open a file in append mode #and also open it as reading. Therefore, this blunder, which #creates a new file in memory, is used instead. - if not readFile(cachefileloc).contains(URLID) and not any(titleblacklist, proc (input: string): bool = jobName.contains(input)): + if not readFile(cachefileloc).contains(posting.URLID) and not postValid(posting): for attempt in countup(0,3): try: - echo "Telling chromium to navigate to " & URL - session.navigate(URL) + echo "Telling chromium to navigate to " & posting.URL + session.navigate(posting.URL) except: if attempt < 3: echo "Recieved an error: trying again..." @@ -156,17 +173,15 @@ for feed in feeds: URL : $5 Description: $6 - """ % [jobName, employer, location, salaryInfoAndJobType, URL, fullDesc] - writeFile(outdir & jobName.replace("/") & ".txt", output) + """ % [posting.jobName, posting.employer, posting.location, salaryInfoAndJobType, posting.URL, fullDesc] + writeFile(outdir & posting.jobName.replace("/") & ".txt", output) echo "Wrote job to file!" - cache.writeLine(URL) + cache.writeLine(posting.URL) echo "Wrote listing to cache!" else: - echo URL & " was in cache or hit a trigger word, discarding" + echo posting.URL & " was in cache or hit a trigger word, discarding" session.close() cache.close() -#session.close() terminate(chromedriver) - -- cgit v1.2.3