From b9d8d41e2f1ad23ebe5d43d96f283822209f49b9 Mon Sep 17 00:00:00 2001 From: msglm Date: Wed, 11 Jan 2023 20:49:28 -0600 Subject: 4 big improvements config, if it doesn't exist, is created cache, if it doesn't exist, is created nimble build system added you can now blacklist words from job titles --- .gitignore | 1 + config/config.toml | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++ nim.cfg | 1 + nims.cfg | 1 + src/indeedwatcher.nim | 82 ++++++++++++++++------ 5 files changed, 255 insertions(+), 20 deletions(-) create mode 100644 .gitignore create mode 100644 config/config.toml create mode 100644 nim.cfg create mode 100644 nims.cfg diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e660fd9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +bin/ diff --git a/config/config.toml b/config/config.toml new file mode 100644 index 0000000..70458f5 --- /dev/null +++ b/config/config.toml @@ -0,0 +1,190 @@ +outdir = "/home/joybuke/Documents/Jobs/Potential/" +port = 9515 +chromedriver = "/usr/bin/chromedriver" +feeds = [ +"https://rss.indeed.com/rss?q=Information%20Technology&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=IT&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=IT&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=IT&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=IT&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=IT&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=IT&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=IT&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=IT&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=IT&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=IT&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Python&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Python&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Python&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Python&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Python&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Python&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Python&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Python&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Python&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Python&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Software%20Quality%20Assurance&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Linux&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Linux&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Linux&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Linux&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Linux&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Linux&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Linux&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Linux&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Linux&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Linux&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Operations%20Engineer&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Infrastructure%20Reliability%20Engineer&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=UNIX&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=UNIX&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=UNIX&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=UNIX&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=UNIX&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=UNIX&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=UNIX&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=UNIX&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=UNIX&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=UNIX&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=System%20Administrator%20Linux&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Open%20Source%20Linux&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Red%20Hat%20Certified%20System%20Administrator&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=RHEL&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=RHEL&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=RHEL&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=RHEL&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=RHEL&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=RHEL&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=RHEL&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=RHEL&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=RHEL&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=RHEL&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Ubuntu&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Debian&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Debian&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Debian&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Debian&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Debian&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Debian&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Debian&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Debian&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Debian&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Debian&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=Nginx&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Nginx&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Nginx&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Nginx&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=Nginx&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=Nginx&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=Nginx&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=Nginx&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=Nginx&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=Nginx&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=DevOps&l=Arkansas&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=DevOps&l=Arkansas&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=DevOps&l=Arkansas&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=DevOps&l=Arkansas&explvl=entry_level", +"https://rss.indeed.com/rss?q=DevOps&l=Arkansas&explvl=mid_level", +"https://rss.indeed.com/rss?q=DevOps&l=Remote&jt=contract&explvl=entry_level", +"https://rss.indeed.com/rss?q=DevOps&l=Remote&jt=temporary&explvl=entry_level", +"https://rss.indeed.com/rss?q=DevOps&l=Remote&jt=parttime&explvl=entry_level", +"https://rss.indeed.com/rss?q=DevOps&l=Remote&explvl=entry_level", +"https://rss.indeed.com/rss?q=DevOps&l=Remote&explvl=mid_level", +"https://rss.indeed.com/rss?q=intership%20IT&l=Arkansas", +"https://rss.indeed.com/rss?q=intership%20IT&l=Remote", +] + +[blacklist] +title= ["Senior", "Sr.", "Solid Waste"] diff --git a/nim.cfg b/nim.cfg new file mode 100644 index 0000000..a0de51d --- /dev/null +++ b/nim.cfg @@ -0,0 +1 @@ +-d:release -d:ssl diff --git a/nims.cfg b/nims.cfg new file mode 100644 index 0000000..de09037 --- /dev/null +++ b/nims.cfg @@ -0,0 +1 @@ +switch("define", "ssl") diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index 0f86a18..c1946dc 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -7,16 +7,49 @@ import strutils import parsetoml import sequtils +if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"): + createDir(getConfigDir() & "/indeedwatcher/") + let defaultConfig = """ + #Output directory of your porential job listings + outdir = "" + #Port you wish chromedriver to use + port = 9515 + #Location of chromedriver + chromedriver = "" + #Array of RSS urls that you wish the program to parse + feeds = [ \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=contract&explvl=entry_level\", \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=temporary&explvl=entry_level\"] + + #Phrases that, if they appear, will cause the job to be instantly thrown out + [blacklist] + title= [\"Senior\", \"Sr.\"] + """ + writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig) + +if not fileExists(getCacheDir() & "/indeedwatcher/config.toml"): + createDir(getCacheDir() & "/indeedwatcher/") + writeFile(getCacheDir() & "/indeedwatcher/listings.cache", "") + + #TODO make this create folders and files for this automatically upon first start up + +#Reading the config file let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml") + +##Main section of config let feeds = config["feeds"].getElems().mapIt(it.getStr()) let outdir = config["outdir"].getStr() + +##Cache section of config let chromedriverloc = config["chromedriver"].getStr() let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache" var cache = splitLines(readFile(cachefileloc)) +##Filtering section of config +let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr()) + + #Webdriver let chromedriver = startProcess(chromedriverloc, "", ["--headless"]) sleep 5000 @@ -42,9 +75,18 @@ for feed in feeds: #Getting the listing URLs from the feeds echo "now reading " & feed - sleep 1000 - var rssFeedReply = getRSS(feed) - + var rssFeedReply: RSS + for attempt in countup(0,5): + try: + rssFeedReply = getRSS(feed) + except: + if attempt < 5 - 1: + continue + else: + raise + break + + for entry in rssFeedReply.items: #Sleep so indeed.com doesn't freak out if counter > 7: @@ -55,9 +97,6 @@ for feed in feeds: #Don't even bother visiting it if its in the cache var URL = entry.link let URLID = entry.link.split('&')[3] - echo URL - echo URLID - echo any(cache, proc (input: string): bool = input.contains(URLID)) if not any(cache, proc (input: string): bool = input.contains(URLID)): session.navigate(URL) counter = counter + 1 @@ -78,20 +117,23 @@ for feed in feeds: except UnpackDefect: salaryInfoAndJobType = "None Listed" - - #Job Value Scorer - - #Output - var output = """ - Title: $1 - Company: $2 - Salary Info and Job Type: $3 - URL : $4 - Description: - $5 - """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] - writeFile(outdir & jobTitle.replace("/") & ".txt", output) - cache.add(URL) + #Filtering + if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)): + #Output + var output = """ + Title: $1 + Company: $2 + Salary Info and Job Type: $3 + URL : $4 + Description: + $5 + """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] + writeFile(outdir & jobTitle.replace("/") & ".txt", output) + cache.add(URL) + else: + echo "Trigger was hit, discarding " & URL + else: + echo URL & " was in cache, discarding" session.close() terminate(chromedriver) -- cgit v1.2.3