summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authormsglm <msglm@techchud.xyz>2025-08-17 19:54:24 -0500
committermsglm <msglm@techchud.xyz>2025-08-17 19:54:24 -0500
commit565e074fcb92c7e0c77e2f858a806f12a4c4e74a (patch)
treefe0fd0051dba248e5adfcdfa3a636f959ec0f40c /src
parente743843ba351d586dbd4a0f47fc573024844abb3 (diff)
downloadindeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.tar.gz
indeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.tar.bz2
indeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.zip
4 big improvements
config, if it doesn't exist, is created cache, if it doesn't exist, is created nimble build system added you can now blacklist words from job titles
Diffstat (limited to 'src')
-rw-r--r--src/indeedwatcher.nim82
1 files changed, 62 insertions, 20 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index 0f86a18..c1946dc 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -7,16 +7,49 @@ import strutils
import parsetoml
import sequtils
+if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
+ createDir(getConfigDir() & "/indeedwatcher/")
+ let defaultConfig = """
+ #Output directory of your porential job listings
+ outdir = ""
+ #Port you wish chromedriver to use
+ port = 9515
+ #Location of chromedriver
+ chromedriver = ""
+ #Array of RSS urls that you wish the program to parse
+ feeds = [ \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=contract&explvl=entry_level\", \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=temporary&explvl=entry_level\"]
+
+ #Phrases that, if they appear, will cause the job to be instantly thrown out
+ [blacklist]
+ title= [\"Senior\", \"Sr.\"]
+ """
+ writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig)
+
+if not fileExists(getCacheDir() & "/indeedwatcher/config.toml"):
+ createDir(getCacheDir() & "/indeedwatcher/")
+ writeFile(getCacheDir() & "/indeedwatcher/listings.cache", "")
+
+
#TODO make this create folders and files for this automatically upon first start up
+
+#Reading the config file
let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml")
+
+##Main section of config
let feeds = config["feeds"].getElems().mapIt(it.getStr())
let outdir = config["outdir"].getStr()
+
+##Cache section of config
let chromedriverloc = config["chromedriver"].getStr()
let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache"
var cache = splitLines(readFile(cachefileloc))
+##Filtering section of config
+let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr())
+
+
#Webdriver
let chromedriver = startProcess(chromedriverloc, "", ["--headless"])
sleep 5000
@@ -42,9 +75,18 @@ for feed in feeds:
#Getting the listing URLs from the feeds
echo "now reading " & feed
- sleep 1000
- var rssFeedReply = getRSS(feed)
-
+ var rssFeedReply: RSS
+ for attempt in countup(0,5):
+ try:
+ rssFeedReply = getRSS(feed)
+ except:
+ if attempt < 5 - 1:
+ continue
+ else:
+ raise
+ break
+
+
for entry in rssFeedReply.items:
#Sleep so indeed.com doesn't freak out
if counter > 7:
@@ -55,9 +97,6 @@ for feed in feeds:
#Don't even bother visiting it if its in the cache
var URL = entry.link
let URLID = entry.link.split('&')[3]
- echo URL
- echo URLID
- echo any(cache, proc (input: string): bool = input.contains(URLID))
if not any(cache, proc (input: string): bool = input.contains(URLID)):
session.navigate(URL)
counter = counter + 1
@@ -78,20 +117,23 @@ for feed in feeds:
except UnpackDefect:
salaryInfoAndJobType = "None Listed"
-
- #Job Value Scorer
-
- #Output
- var output = """
- Title: $1
- Company: $2
- Salary Info and Job Type: $3
- URL : $4
- Description:
- $5
- """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
- writeFile(outdir & jobTitle.replace("/") & ".txt", output)
- cache.add(URL)
+ #Filtering
+ if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)):
+ #Output
+ var output = """
+ Title: $1
+ Company: $2
+ Salary Info and Job Type: $3
+ URL : $4
+ Description:
+ $5
+ """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
+ writeFile(outdir & jobTitle.replace("/") & ".txt", output)
+ cache.add(URL)
+ else:
+ echo "Trigger was hit, discarding " & URL
+ else:
+ echo URL & " was in cache, discarding"
session.close()
terminate(chromedriver)