summaryrefslogtreecommitdiffstats
path: root/src/indeedwatcher.nim
diff options
context:
space:
mode:
authormsglm <msglm@techchud.xyz>2023-01-11 20:49:28 -0600
committermsglm <msglm@techchud.xyz>2023-01-11 20:49:28 -0600
commitb9d8d41e2f1ad23ebe5d43d96f283822209f49b9 (patch)
treefe0fd0051dba248e5adfcdfa3a636f959ec0f40c /src/indeedwatcher.nim
parent466e5376d61f004169fce19c8e20d192d272b59e (diff)
downloadindeedwatcher-b9d8d41e2f1ad23ebe5d43d96f283822209f49b9.tar.gz
indeedwatcher-b9d8d41e2f1ad23ebe5d43d96f283822209f49b9.tar.bz2
indeedwatcher-b9d8d41e2f1ad23ebe5d43d96f283822209f49b9.zip
4 big improvements
config, if it doesn't exist, is created cache, if it doesn't exist, is created nimble build system added you can now blacklist words from job titles
Diffstat (limited to 'src/indeedwatcher.nim')
-rw-r--r--src/indeedwatcher.nim82
1 files changed, 62 insertions, 20 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index 0f86a18..c1946dc 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -7,16 +7,49 @@ import strutils
import parsetoml
import sequtils
+if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
+ createDir(getConfigDir() & "/indeedwatcher/")
+ let defaultConfig = """
+ #Output directory of your porential job listings
+ outdir = ""
+ #Port you wish chromedriver to use
+ port = 9515
+ #Location of chromedriver
+ chromedriver = ""
+ #Array of RSS urls that you wish the program to parse
+ feeds = [ \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=contract&explvl=entry_level\", \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=temporary&explvl=entry_level\"]
+
+ #Phrases that, if they appear, will cause the job to be instantly thrown out
+ [blacklist]
+ title= [\"Senior\", \"Sr.\"]
+ """
+ writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig)
+
+if not fileExists(getCacheDir() & "/indeedwatcher/config.toml"):
+ createDir(getCacheDir() & "/indeedwatcher/")
+ writeFile(getCacheDir() & "/indeedwatcher/listings.cache", "")
+
+
#TODO make this create folders and files for this automatically upon first start up
+
+#Reading the config file
let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml")
+
+##Main section of config
let feeds = config["feeds"].getElems().mapIt(it.getStr())
let outdir = config["outdir"].getStr()
+
+##Cache section of config
let chromedriverloc = config["chromedriver"].getStr()
let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache"
var cache = splitLines(readFile(cachefileloc))
+##Filtering section of config
+let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr())
+
+
#Webdriver
let chromedriver = startProcess(chromedriverloc, "", ["--headless"])
sleep 5000
@@ -42,9 +75,18 @@ for feed in feeds:
#Getting the listing URLs from the feeds
echo "now reading " & feed
- sleep 1000
- var rssFeedReply = getRSS(feed)
-
+ var rssFeedReply: RSS
+ for attempt in countup(0,5):
+ try:
+ rssFeedReply = getRSS(feed)
+ except:
+ if attempt < 5 - 1:
+ continue
+ else:
+ raise
+ break
+
+
for entry in rssFeedReply.items:
#Sleep so indeed.com doesn't freak out
if counter > 7:
@@ -55,9 +97,6 @@ for feed in feeds:
#Don't even bother visiting it if its in the cache
var URL = entry.link
let URLID = entry.link.split('&')[3]
- echo URL
- echo URLID
- echo any(cache, proc (input: string): bool = input.contains(URLID))
if not any(cache, proc (input: string): bool = input.contains(URLID)):
session.navigate(URL)
counter = counter + 1
@@ -78,20 +117,23 @@ for feed in feeds:
except UnpackDefect:
salaryInfoAndJobType = "None Listed"
-
- #Job Value Scorer
-
- #Output
- var output = """
- Title: $1
- Company: $2
- Salary Info and Job Type: $3
- URL : $4
- Description:
- $5
- """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
- writeFile(outdir & jobTitle.replace("/") & ".txt", output)
- cache.add(URL)
+ #Filtering
+ if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)):
+ #Output
+ var output = """
+ Title: $1
+ Company: $2
+ Salary Info and Job Type: $3
+ URL : $4
+ Description:
+ $5
+ """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
+ writeFile(outdir & jobTitle.replace("/") & ".txt", output)
+ cache.add(URL)
+ else:
+ echo "Trigger was hit, discarding " & URL
+ else:
+ echo URL & " was in cache, discarding"
session.close()
terminate(chromedriver)