summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authormsglm <msglm@techchud.xyz>2023-02-14 23:31:33 -0600
committermsglm <msglm@techchud.xyz>2023-02-14 23:31:33 -0600
commitc0f5799ae651b0ee3651e20302446475bcac17e0 (patch)
tree9bdacbb69561ea3d18473cff4473e072f2c3b7ea /src
parent9e541a3e96cdd2103825b6ce7079a2c58d086fc3 (diff)
downloadindeedwatcher-1.0.5.tar.gz
indeedwatcher-1.0.5.tar.bz2
indeedwatcher-1.0.5.zip
release 1.0.5v1.0.5
Trigger words discard much earlier using the job name in the URL, increasing stability and speed. deprecated old way of parsing urls for nim's uri library for readability's sake. longer waiting times for both rss feeds and opening URLs. cache no longer is overwritten on start up
Diffstat (limited to 'src')
-rw-r--r--src/indeedwatcher.nim69
1 files changed, 33 insertions, 36 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index bc52138..8c4a71e 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -6,6 +6,7 @@ import os
import strutils
import parsetoml
import sequtils
+import uri
if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
createDir(getConfigDir() & "/indeedwatcher/")
@@ -53,7 +54,8 @@ echo "connecting"
#TODO make the port configurable, some users may have something running here
let driver = newWebDriver("http://localhost:9515")
var session: Session
-var counter = 0
+var feedcounter = 0
+var urlcounter = 0
#Behavior when CTRL+C
proc terminate() {.noconv.} =
@@ -70,7 +72,10 @@ setControlCHook(terminate)
for feed in feeds:
session = driver.createSession()
- sleep 3000
+ if feedcounter > 3:
+ echo "resting for 20 seconds ..."
+ sleep 20000
+ feedcounter = 0
#Getting the listing URLs from the feeds
var rssFeedReply: RSS
for attempt in countup(0,3):
@@ -87,20 +92,24 @@ for feed in feeds:
for entry in rssFeedReply.items:
#Sleep so indeed.com doesn't freak out
- if counter > 7:
+ if urlcounter > 7:
echo "resting for 10 seconds ..."
sleep 10000
- counter = 0
+ urlcounter = 0
- #Don't even bother visiting it if its in the cache
+ #Don't even bother visiting it if its in the cache or hits a trigger word
var URL = entry.link
- let URLID = entry.link.split('&')[3]
+ let queries = URL.parseUri.query.decodeQuery().toSeq()
+ let jobName = queries[0].value
+ let employer = queries[1].value
+ let location = queries[2].value
+ let URLID = queries[3].value
#This isn't cache.readFile().contains(URLID)
#because nim has no way to both open a file in append mode
#and also open it as reading. Therefore, this blunder, which
#creates a new file in memory, is used instead.
- if not readFile(cachefileloc).contains(URLID):
+ if not readFile(cachefileloc).contains(URLID) or not any(titleblacklist, proc (input: string): bool = jobName.contains(input)):
for attempt in countup(0,3):
try:
echo "Telling chromium to navigate to " & URL
@@ -112,21 +121,12 @@ for feed in feeds:
else:
raise
break
- counter = counter + 1
+ urlcounter = urlcounter + 1
#HTML Parser
echo "Beginning to parse..."
- let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText()
let fullDesc = session.findElement("#jobDescriptionText").get().getText()
- var employer: string
- try:
- #This takes the location from the URL, removes all the junk around it, and replaced the URL pluses with actual spaces
- #perhaps, a URL parsing library could have been used for this.
- employer = entry.link.split('&')[1][2..^1].replace("+"," ")
- except UnpackDefect:
- employer = "None Listed"
-
var salaryInfoAndJobType: string
try:
salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext()
@@ -134,26 +134,23 @@ for feed in feeds:
salaryInfoAndJobType = "None Listed"
echo "Finishing the parse..."
- #Filtering
- if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)):
- echo "Beginning to write to file..."
- #Output
- var output = """
- Title: $1
- Company: $2
- Salary Info and Job Type: $3
- URL : $4
- Description:
- $5
- """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
- writeFile(outdir & jobTitle.replace("/") & ".txt", output)
- echo "Wrote job to file!"
- cache.writeLine(URL)
- echo "Wrote listing to cache!"
- else:
- echo "Trigger was hit, discarding " & URL
+ echo "Beginning to write to file..."
+ #Output
+ var output = """
+ Title: $1
+ Employer: $2
+ Location: $3
+ Salary Info and Job Type: $4
+ URL : $5
+ Description:
+ $6
+ """ % [jobName, employer, location, salaryInfoAndJobType, URL, fullDesc]
+ writeFile(outdir & jobName.replace("/") & ".txt", output)
+ echo "Wrote job to file!"
+ cache.writeLine(URL)
+ echo "Wrote listing to cache!"
else:
- echo URL & " was in cache, discarding"
+ echo URL & " was in cache or hit a trigger word, discarding"
session.close()