summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/indeedwatcher.nim82
1 files changed, 62 insertions, 20 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index 0f86a18..c1946dc 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -7,16 +7,49 @@ import strutils
import parsetoml
import sequtils
+if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
+ createDir(getConfigDir() & "/indeedwatcher/")
+ let defaultConfig = """
+ #Output directory of your porential job listings
+ outdir = ""
+ #Port you wish chromedriver to use
+ port = 9515
+ #Location of chromedriver
+ chromedriver = ""
+ #Array of RSS urls that you wish the program to parse
+ feeds = [ \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=contract&explvl=entry_level\", \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=temporary&explvl=entry_level\"]
+
+ #Phrases that, if they appear, will cause the job to be instantly thrown out
+ [blacklist]
+ title= [\"Senior\", \"Sr.\"]
+ """
+ writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig)
+
+if not fileExists(getCacheDir() & "/indeedwatcher/config.toml"):
+ createDir(getCacheDir() & "/indeedwatcher/")
+ writeFile(getCacheDir() & "/indeedwatcher/listings.cache", "")
+
+
#TODO make this create folders and files for this automatically upon first start up
+
+#Reading the config file
let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml")
+
+##Main section of config
let feeds = config["feeds"].getElems().mapIt(it.getStr())
let outdir = config["outdir"].getStr()
+
+##Cache section of config
let chromedriverloc = config["chromedriver"].getStr()
let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache"
var cache = splitLines(readFile(cachefileloc))
+##Filtering section of config
+let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr())
+
+
#Webdriver
let chromedriver = startProcess(chromedriverloc, "", ["--headless"])
sleep 5000
@@ -42,9 +75,18 @@ for feed in feeds:
#Getting the listing URLs from the feeds
echo "now reading " & feed
- sleep 1000
- var rssFeedReply = getRSS(feed)
-
+ var rssFeedReply: RSS
+ for attempt in countup(0,5):
+ try:
+ rssFeedReply = getRSS(feed)
+ except:
+ if attempt < 5 - 1:
+ continue
+ else:
+ raise
+ break
+
+
for entry in rssFeedReply.items:
#Sleep so indeed.com doesn't freak out
if counter > 7:
@@ -55,9 +97,6 @@ for feed in feeds:
#Don't even bother visiting it if its in the cache
var URL = entry.link
let URLID = entry.link.split('&')[3]
- echo URL
- echo URLID
- echo any(cache, proc (input: string): bool = input.contains(URLID))
if not any(cache, proc (input: string): bool = input.contains(URLID)):
session.navigate(URL)
counter = counter + 1
@@ -78,20 +117,23 @@ for feed in feeds:
except UnpackDefect:
salaryInfoAndJobType = "None Listed"
-
- #Job Value Scorer
-
- #Output
- var output = """
- Title: $1
- Company: $2
- Salary Info and Job Type: $3
- URL : $4
- Description:
- $5
- """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
- writeFile(outdir & jobTitle.replace("/") & ".txt", output)
- cache.add(URL)
+ #Filtering
+ if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)):
+ #Output
+ var output = """
+ Title: $1
+ Company: $2
+ Salary Info and Job Type: $3
+ URL : $4
+ Description:
+ $5
+ """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
+ writeFile(outdir & jobTitle.replace("/") & ".txt", output)
+ cache.add(URL)
+ else:
+ echo "Trigger was hit, discarding " & URL
+ else:
+ echo URL & " was in cache, discarding"
session.close()
terminate(chromedriver)