1 files changed, 33 insertions, 36 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index bc52138..8c4a71e 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -6,6 +6,7 @@ import os
 import strutils
 import parsetoml
 import sequtils
+import uri
 
 if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
     createDir(getConfigDir() & "/indeedwatcher/")
@@ -53,7 +54,8 @@ echo "connecting"
 #TODO make the port configurable, some users may have something running here
 let driver = newWebDriver("http://localhost:9515")
 var session: Session
-var counter = 0
+var feedcounter = 0
+var urlcounter = 0
 
 #Behavior when CTRL+C
 proc terminate() {.noconv.} = 
@@ -70,7 +72,10 @@ setControlCHook(terminate)
 
 for feed in feeds:
     session = driver.createSession()
-    sleep 3000
+    if feedcounter > 3:
+        echo "resting for 20 seconds ..."
+        sleep 20000
+        feedcounter = 0
     #Getting the listing URLs from the feeds
     var rssFeedReply: RSS
     for attempt in countup(0,3):
@@ -87,20 +92,24 @@ for feed in feeds:
 
     for entry in rssFeedReply.items:
         #Sleep so indeed.com doesn't freak out
-        if counter > 7:
+        if urlcounter > 7:
             echo "resting for 10 seconds ..."
             sleep 10000
-            counter = 0
+            urlcounter = 0
 
-        #Don't even bother visiting it if its in the cache
+        #Don't even bother visiting it if its in the cache or hits a trigger word
         var URL = entry.link
-        let URLID = entry.link.split('&')[3]
+        let queries = URL.parseUri.query.decodeQuery().toSeq()
+        let jobName = queries[0].value
+        let employer = queries[1].value
+        let location = queries[2].value
+        let URLID = queries[3].value
 
         #This isn't cache.readFile().contains(URLID)
         #because nim has no way to both open a file in append mode
         #and also open it as reading. Therefore, this blunder, which
         #creates a new file in memory, is used instead.
-        if not readFile(cachefileloc).contains(URLID):
+        if not readFile(cachefileloc).contains(URLID) or not any(titleblacklist, proc (input: string): bool = jobName.contains(input)):
             for attempt in countup(0,3):
                 try:
                     echo "Telling chromium to navigate to " & URL
@@ -112,21 +121,12 @@ for feed in feeds:
                     else:
                         raise
                 break
-            counter = counter + 1
+            urlcounter = urlcounter + 1
         
             #HTML Parser
             echo "Beginning to parse..."
-            let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText()
             let fullDesc = session.findElement("#jobDescriptionText").get().getText()
             
-            var employer: string
-            try:
-                #This takes the location from the URL, removes all the junk around it, and replaced the URL pluses with actual spaces
-                #perhaps, a URL parsing library could have been used for this.
-                employer = entry.link.split('&')[1][2..^1].replace("+"," ")
-            except UnpackDefect:
-                employer = "None Listed"
-            
             var salaryInfoAndJobType: string
             try:
                 salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext()
@@ -134,26 +134,23 @@ for feed in feeds:
                 salaryInfoAndJobType = "None Listed"
             echo "Finishing the parse..."
             
-            #Filtering
-            if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)):
-                 echo "Beginning to write to file..."
-                 #Output
-                 var output = """
-                 Title: $1
-                 Company: $2
-                 Salary Info and Job Type: $3
-                 URL : $4
-                 Description:
-                 $5
-                 """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
-                 writeFile(outdir & jobTitle.replace("/") & ".txt", output)
-                 echo "Wrote job to file!"
-                 cache.writeLine(URL)
-                 echo "Wrote listing to cache!"
-            else:
-                echo "Trigger was hit, discarding " & URL
+            echo "Beginning to write to file..."
+            #Output
+            var output = """
+            Title: $1
+            Employer: $2
+            Location: $3
+            Salary Info and Job Type: $4
+            URL : $5
+            Description:
+            $6
+            """ % [jobName, employer, location, salaryInfoAndJobType, URL, fullDesc]
+            writeFile(outdir & jobName.replace("/") & ".txt", output)
+            echo "Wrote job to file!"
+            cache.writeLine(URL)
+            echo "Wrote listing to cache!"
         else:
-            echo URL & " was in cache, discarding"
+            echo URL & " was in cache or hit a trigger word, discarding"
     session.close()