diff options
author | msglm <msglm@techchud.xyz> | 2023-01-11 20:49:28 -0600 |
---|---|---|
committer | msglm <msglm@techchud.xyz> | 2023-01-11 20:49:28 -0600 |
commit | b9d8d41e2f1ad23ebe5d43d96f283822209f49b9 (patch) | |
tree | fe0fd0051dba248e5adfcdfa3a636f959ec0f40c /src/indeedwatcher.nim | |
parent | 466e5376d61f004169fce19c8e20d192d272b59e (diff) | |
download | indeedwatcher-b9d8d41e2f1ad23ebe5d43d96f283822209f49b9.tar.gz indeedwatcher-b9d8d41e2f1ad23ebe5d43d96f283822209f49b9.tar.bz2 indeedwatcher-b9d8d41e2f1ad23ebe5d43d96f283822209f49b9.zip |
4 big improvements
config, if it doesn't exist, is created
cache, if it doesn't exist, is created
nimble build system added
you can now blacklist words from job titles
Diffstat (limited to 'src/indeedwatcher.nim')
-rw-r--r-- | src/indeedwatcher.nim | 82 |
1 files changed, 62 insertions, 20 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index 0f86a18..c1946dc 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -7,16 +7,49 @@ import strutils import parsetoml import sequtils +if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"): + createDir(getConfigDir() & "/indeedwatcher/") + let defaultConfig = """ + #Output directory of your porential job listings + outdir = "" + #Port you wish chromedriver to use + port = 9515 + #Location of chromedriver + chromedriver = "" + #Array of RSS urls that you wish the program to parse + feeds = [ \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=contract&explvl=entry_level\", \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=temporary&explvl=entry_level\"] + + #Phrases that, if they appear, will cause the job to be instantly thrown out + [blacklist] + title= [\"Senior\", \"Sr.\"] + """ + writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig) + +if not fileExists(getCacheDir() & "/indeedwatcher/config.toml"): + createDir(getCacheDir() & "/indeedwatcher/") + writeFile(getCacheDir() & "/indeedwatcher/listings.cache", "") + + #TODO make this create folders and files for this automatically upon first start up + +#Reading the config file let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml") + +##Main section of config let feeds = config["feeds"].getElems().mapIt(it.getStr()) let outdir = config["outdir"].getStr() + +##Cache section of config let chromedriverloc = config["chromedriver"].getStr() let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache" var cache = splitLines(readFile(cachefileloc)) +##Filtering section of config +let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr()) + + #Webdriver let chromedriver = startProcess(chromedriverloc, "", ["--headless"]) sleep 5000 @@ -42,9 +75,18 @@ for feed in feeds: #Getting the listing URLs from the feeds echo "now reading " & feed - sleep 1000 - var rssFeedReply = getRSS(feed) - + var rssFeedReply: RSS + for attempt in countup(0,5): + try: + rssFeedReply = getRSS(feed) + except: + if attempt < 5 - 1: + continue + else: + raise + break + + for entry in rssFeedReply.items: #Sleep so indeed.com doesn't freak out if counter > 7: @@ -55,9 +97,6 @@ for feed in feeds: #Don't even bother visiting it if its in the cache var URL = entry.link let URLID = entry.link.split('&')[3] - echo URL - echo URLID - echo any(cache, proc (input: string): bool = input.contains(URLID)) if not any(cache, proc (input: string): bool = input.contains(URLID)): session.navigate(URL) counter = counter + 1 @@ -78,20 +117,23 @@ for feed in feeds: except UnpackDefect: salaryInfoAndJobType = "None Listed" - - #Job Value Scorer - - #Output - var output = """ - Title: $1 - Company: $2 - Salary Info and Job Type: $3 - URL : $4 - Description: - $5 - """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] - writeFile(outdir & jobTitle.replace("/") & ".txt", output) - cache.add(URL) + #Filtering + if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)): + #Output + var output = """ + Title: $1 + Company: $2 + Salary Info and Job Type: $3 + URL : $4 + Description: + $5 + """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] + writeFile(outdir & jobTitle.replace("/") & ".txt", output) + cache.add(URL) + else: + echo "Trigger was hit, discarding " & URL + else: + echo URL & " was in cache, discarding" session.close() terminate(chromedriver) |