4 big improvements

config, if it doesn't exist, is created cache, if it doesn't exist, is created nimble build system added you can now blacklist words from job titles
author: msglm <msglm@techchud.xyz> 2025-08-17 19:54:24 -0500
committer: msglm <msglm@techchud.xyz> 2025-08-17 19:54:24 -0500
commit: 565e074fcb92c7e0c77e2f858a806f12a4c4e74a (patch)
tree: fe0fd0051dba248e5adfcdfa3a636f959ec0f40c /src
parent: e743843ba351d586dbd4a0f47fc573024844abb3 (diff)
download: indeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.tar.gz
indeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.tar.bz2
indeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.zip
1 files changed, 62 insertions, 20 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index 0f86a18..c1946dc 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -7,16 +7,49 @@ import strutils
 import parsetoml
 import sequtils
 
+if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
+    createDir(getConfigDir() & "/indeedwatcher/")
+    let defaultConfig = """
+    #Output directory of your porential job listings
+    outdir = ""
+    #Port you wish chromedriver to use
+    port = 9515
+    #Location of chromedriver
+    chromedriver = ""
+    #Array of RSS urls that you wish the program to parse
+    feeds = [ \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=contract&explvl=entry_level\", \"https://rss.indeed.com/rss?q=Information%20Technology&l=Remote&jt=temporary&explvl=entry_level\"]
+    
+    #Phrases that, if they appear, will cause the job to be instantly thrown out
+    [blacklist]
+    title= [\"Senior\", \"Sr.\"]
+    """
+    writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig)
+
+if not fileExists(getCacheDir() & "/indeedwatcher/config.toml"):
+    createDir(getCacheDir() & "/indeedwatcher/")
+    writeFile(getCacheDir() & "/indeedwatcher/listings.cache", "")
+
+
 
 
 #TODO make this create folders and files for this automatically upon first start up
+
+#Reading the config file
 let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml")
+
+##Main section of config
 let feeds = config["feeds"].getElems().mapIt(it.getStr())
 let outdir = config["outdir"].getStr()
+
+##Cache section of config
 let chromedriverloc = config["chromedriver"].getStr()
 let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache" 
 var cache = splitLines(readFile(cachefileloc))
 
+##Filtering section of config
+let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr())
+
+
 #Webdriver
 let chromedriver = startProcess(chromedriverloc, "", ["--headless"])
 sleep 5000
@@ -42,9 +75,18 @@ for feed in feeds:
 
     #Getting the listing URLs from the feeds
     echo "now reading " & feed
-    sleep 1000
-    var rssFeedReply = getRSS(feed)
-    
+    var rssFeedReply: RSS
+    for attempt in countup(0,5):
+        try: 
+            rssFeedReply = getRSS(feed)
+        except:
+            if attempt < 5 - 1:
+                continue
+            else: 
+                raise
+            break
+
+
     for entry in rssFeedReply.items:
         #Sleep so indeed.com doesn't freak out
         if counter > 7:
@@ -55,9 +97,6 @@ for feed in feeds:
         #Don't even bother visiting it if its in the cache
         var URL = entry.link
         let URLID = entry.link.split('&')[3]
-        echo URL
-        echo URLID
-        echo any(cache, proc (input: string): bool = input.contains(URLID))
         if not any(cache, proc (input: string): bool = input.contains(URLID)):
             session.navigate(URL)
             counter = counter + 1
@@ -78,20 +117,23 @@ for feed in feeds:
             except UnpackDefect:
                 salaryInfoAndJobType = "None Listed"
             
-            
-            #Job Value Scorer
-            
-            #Output
-            var output = """
-            Title: $1
-            Company: $2
-            Salary Info and Job Type: $3
-            URL : $4
-            Description:
-            $5
-            """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
-            writeFile(outdir & jobTitle.replace("/") & ".txt", output)
-            cache.add(URL)
+            #Filtering
+            if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)):
+                 #Output
+                 var output = """
+                 Title: $1
+                 Company: $2
+                 Salary Info and Job Type: $3
+                 URL : $4
+                 Description:
+                 $5
+                 """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
+                 writeFile(outdir & jobTitle.replace("/") & ".txt", output)
+                 cache.add(URL)
+            else:
+                echo "Trigger was hit, discarding " & URL
+        else:
+            echo URL & " was in cache, discarding"
 
 session.close()
 terminate(chromedriver)
author	msglm <msglm@techchud.xyz>	2025-08-17 19:54:24 -0500
committer	msglm <msglm@techchud.xyz>	2025-08-17 19:54:24 -0500
commit	565e074fcb92c7e0c77e2f858a806f12a4c4e74a (patch)
tree	fe0fd0051dba248e5adfcdfa3a636f959ec0f40c /src
parent	e743843ba351d586dbd4a0f47fc573024844abb3 (diff)
download	indeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.tar.gz indeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.tar.bz2 indeed-watcher-565e074fcb92c7e0c77e2f858a806f12a4c4e74a.zip