summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormsglm <msglm@techchud.xyz>2023-02-17 04:19:44 -0600
committermsglm <msglm@techchud.xyz>2023-02-17 04:19:44 -0600
commit7c32a833c399742c1dd52537a68799a7d1585742 (patch)
treec4b56b225b8ed571fe12da483371b87fe6141877
parente3d871381ffd4414f895ee2590bbb44e2fa04dde (diff)
downloadindeedwatcher-7c32a833c399742c1dd52537a68799a7d1585742.tar.gz
indeedwatcher-7c32a833c399742c1dd52537a68799a7d1585742.tar.bz2
indeedwatcher-7c32a833c399742c1dd52537a68799a7d1585742.zip
added more ways to blacklist undesired content
-rw-r--r--src/indeedwatcher.nim45
1 files changed, 30 insertions, 15 deletions
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index 0fc8af4..a8d6bfc 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -9,6 +9,14 @@ import sequtils
import uri
import json
+type
+ indeedJobDesc = object
+ URL: string
+ jobName: string
+ employer: string
+ location: string
+ URLID: string
+
if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
createDir(getConfigDir() & "/indeedwatcher/")
let defaultConfig = """
@@ -26,6 +34,8 @@ if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):
#Phrases that, if they appear, will cause the job to be instantly thrown out
[blacklist]
title= [\"Senior\", \"Sr.\"]
+ employer= [\"NSA\"]
+ location= [\"Ohio\"]
"""
writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig)
@@ -48,6 +58,8 @@ let cache = open(cachefileloc, fmAppend)
##Filtering section of config
let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr())
+let employerblacklist = config["blacklist"]["employer"].getElems().mapIt(it.getStr())
+let locationblacklist = config["blacklist"]["location"].getElems().mapIt(it.getStr())
##Does the user desire headlessness?
var args: JsonNode
@@ -80,6 +92,14 @@ proc terminate() {.noconv.} =
quit()
setControlCHook(terminate)
+proc postValid(posting: indeedJobDesc) : bool =
+ if any(titleblacklist, proc (input: string): bool = posting.jobName.contains(input)) and
+ any(employerblacklist, proc (input: string): bool = posting.employer.contains(input)) and
+ any(locationblacklist, proc (input: string): bool = posting.location.contains(input)):
+ return true
+ else:
+ return false
+
for feed in feeds:
#let args = %*{"desiredCapabilities":{"browserName":"chromium"}}
session = driver.createSession(args)
@@ -110,22 +130,19 @@ for feed in feeds:
urlcounter = 0
#Don't even bother visiting it if its in the cache or hits a trigger word
- var URL = entry.link
- let queries = URL.parseUri.query.decodeQuery().toSeq()
- let jobName = queries[0].value
- let employer = queries[1].value
- let location = queries[2].value
- let URLID = queries[3].value
+ let queries = entry.link.parseUri.query.decodeQuery().toSeq()
+ var posting: indeedJobDesc
+ posting = indeedJobDesc(URL: entry.link, jobName: queries[0].value, employer: queries[1].value, location: queries[2].value, URLID: queries[3].value)
#This isn't cache.readFile().contains(URLID)
#because nim has no way to both open a file in append mode
#and also open it as reading. Therefore, this blunder, which
#creates a new file in memory, is used instead.
- if not readFile(cachefileloc).contains(URLID) and not any(titleblacklist, proc (input: string): bool = jobName.contains(input)):
+ if not readFile(cachefileloc).contains(posting.URLID) and not postValid(posting):
for attempt in countup(0,3):
try:
- echo "Telling chromium to navigate to " & URL
- session.navigate(URL)
+ echo "Telling chromium to navigate to " & posting.URL
+ session.navigate(posting.URL)
except:
if attempt < 3:
echo "Recieved an error: trying again..."
@@ -156,17 +173,15 @@ for feed in feeds:
URL : $5
Description:
$6
- """ % [jobName, employer, location, salaryInfoAndJobType, URL, fullDesc]
- writeFile(outdir & jobName.replace("/") & ".txt", output)
+ """ % [posting.jobName, posting.employer, posting.location, salaryInfoAndJobType, posting.URL, fullDesc]
+ writeFile(outdir & posting.jobName.replace("/") & ".txt", output)
echo "Wrote job to file!"
- cache.writeLine(URL)
+ cache.writeLine(posting.URL)
echo "Wrote listing to cache!"
else:
- echo URL & " was in cache or hit a trigger word, discarding"
+ echo posting.URL & " was in cache or hit a trigger word, discarding"
session.close()
cache.close()
-#session.close()
terminate(chromedriver)
-