summaryrefslogtreecommitdiffstats
path: root/src/indeedwatcher.nim
diff options
context:
space:
mode:
Diffstat (limited to 'src/indeedwatcher.nim')
-rw-r--r--src/indeedwatcher.nim69
1 files changed, 33 insertions, 36 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index bc52138..8c4a71e 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -6,6 +6,7 @@ import os
import strutils
import parsetoml
import sequtils
+import uri
if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
createDir(getConfigDir() & "/indeedwatcher/")
@@ -53,7 +54,8 @@ echo "connecting"
#TODO make the port configurable, some users may have something running here
let driver = newWebDriver("http://localhost:9515")
var session: Session
-var counter = 0
+var feedcounter = 0
+var urlcounter = 0
#Behavior when CTRL+C
proc terminate() {.noconv.} =
@@ -70,7 +72,10 @@ setControlCHook(terminate)
for feed in feeds:
session = driver.createSession()
- sleep 3000
+ if feedcounter > 3:
+ echo "resting for 20 seconds ..."
+ sleep 20000
+ feedcounter = 0
#Getting the listing URLs from the feeds
var rssFeedReply: RSS
for attempt in countup(0,3):
@@ -87,20 +92,24 @@ for feed in feeds:
for entry in rssFeedReply.items:
#Sleep so indeed.com doesn't freak out
- if counter > 7:
+ if urlcounter > 7:
echo "resting for 10 seconds ..."
sleep 10000
- counter = 0
+ urlcounter = 0
- #Don't even bother visiting it if its in the cache
+ #Don't even bother visiting it if its in the cache or hits a trigger word
var URL = entry.link
- let URLID = entry.link.split('&')[3]
+ let queries = URL.parseUri.query.decodeQuery().toSeq()
+ let jobName = queries[0].value
+ let employer = queries[1].value
+ let location = queries[2].value
+ let URLID = queries[3].value
#This isn't cache.readFile().contains(URLID)
#because nim has no way to both open a file in append mode
#and also open it as reading. Therefore, this blunder, which
#creates a new file in memory, is used instead.
- if not readFile(cachefileloc).contains(URLID):
+ if not readFile(cachefileloc).contains(URLID) or not any(titleblacklist, proc (input: string): bool = jobName.contains(input)):
for attempt in countup(0,3):
try:
echo "Telling chromium to navigate to " & URL
@@ -112,21 +121,12 @@ for feed in feeds:
else:
raise
break
- counter = counter + 1
+ urlcounter = urlcounter + 1
#HTML Parser
echo "Beginning to parse..."
- let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText()
let fullDesc = session.findElement("#jobDescriptionText").get().getText()
- var employer: string
- try:
- #This takes the location from the URL, removes all the junk around it, and replaced the URL pluses with actual spaces
- #perhaps, a URL parsing library could have been used for this.
- employer = entry.link.split('&')[1][2..^1].replace("+"," ")
- except UnpackDefect:
- employer = "None Listed"
-
var salaryInfoAndJobType: string
try:
salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext()
@@ -134,26 +134,23 @@ for feed in feeds:
salaryInfoAndJobType = "None Listed"
echo "Finishing the parse..."
- #Filtering
- if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)):
- echo "Beginning to write to file..."
- #Output
- var output = """
- Title: $1
- Company: $2
- Salary Info and Job Type: $3
- URL : $4
- Description:
- $5
- """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
- writeFile(outdir & jobTitle.replace("/") & ".txt", output)
- echo "Wrote job to file!"
- cache.writeLine(URL)
- echo "Wrote listing to cache!"
- else:
- echo "Trigger was hit, discarding " & URL
+ echo "Beginning to write to file..."
+ #Output
+ var output = """
+ Title: $1
+ Employer: $2
+ Location: $3
+ Salary Info and Job Type: $4
+ URL : $5
+ Description:
+ $6
+ """ % [jobName, employer, location, salaryInfoAndJobType, URL, fullDesc]
+ writeFile(outdir & jobName.replace("/") & ".txt", output)
+ echo "Wrote job to file!"
+ cache.writeLine(URL)
+ echo "Wrote listing to cache!"
else:
- echo URL & " was in cache, discarding"
+ echo URL & " was in cache or hit a trigger word, discarding"
session.close()