summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormsglm <msglm@techchud.xyz>2023-01-11 20:49:27 -0600
committermsglm <msglm@techchud.xyz>2023-01-11 20:49:27 -0600
commit40fc1eb64fb8da26544bd2e2b4855813ffb2f244 (patch)
tree38247b31b502881ad026de586081d5f16c13cd19
parent07e77fb6dc8a5ab9472db86f269c3a844199add1 (diff)
downloadindeedwatcher-40fc1eb64fb8da26544bd2e2b4855813ffb2f244.tar.gz
indeedwatcher-40fc1eb64fb8da26544bd2e2b4855813ffb2f244.tar.bz2
indeedwatcher-40fc1eb64fb8da26544bd2e2b4855813ffb2f244.zip
cache system actually works now
-rw-r--r--spec/flowchart.gv3
-rw-r--r--spec/flowchart.pngbin134627 -> 120196 bytes
-rw-r--r--src/indeedwatcher.nim148
3 files changed, 80 insertions, 71 deletions
diff --git a/spec/flowchart.gv b/spec/flowchart.gv
index 6d181ec..1a41936 100644
--- a/spec/flowchart.gv
+++ b/spec/flowchart.gv
@@ -16,8 +16,7 @@ config -> "Job Value Scorer" [label="Priorities"]
config -> "Cache System" [label="Cache location"]
"Job Value Scorer" -> "Cache System" [label="URL of currently parsing job post"]
"Cache System" -> "Job Value Scorer" [label="True or False to if URL is in database"]
-"Job Value Scorer" -> "latexdsl" [label="Formatted job data"]
-"latexdsl" -> "Saver" [label="PDF file"]
+"Job Value Scorer" -> "Saver" [label="Formatted job data"]
config -> "Saver" [label="save location"]
"Saver" -> "Filesystem" [label="PDF file"]
diff --git a/spec/flowchart.png b/spec/flowchart.png
index 8c6bc0d..e74f0fe 100644
--- a/spec/flowchart.png
+++ b/spec/flowchart.png
Binary files differ
diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim
index 1807e93..2e5ff87 100644
--- a/src/indeedwatcher.nim
+++ b/src/indeedwatcher.nim
@@ -4,83 +4,93 @@ import osproc
import options
import os
import strutils
+import parsetoml
+import sequtils
-#Feednim
-var test = getRSS("https://rss.indeed.com/rss?q=Linux&l=Arkansas&explvl=mid_level")
-var URL = test.items[5].link
+
+
+#TODO make this create folders and files for this automatically upon first start up
+let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml")
+let feeds = config["feeds"].getElems().mapIt(it.getStr())
+let outdir = config["outdir"].getStr()
+let chromedriverloc = config["chromedriver"].getStr()
+let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache"
+var cache = splitLines(readFile(cachefileloc))
#Webdriver
-#TODO put location of chromedriver into config
-let chromedriver = startProcess("/usr/bin/chromedriver")
+let chromedriver = startProcess(chromedriverloc, "", ["--headless", "--disable-gpu"])
sleep 5000
echo "connecting"
+#TODO make the port configurable, some users may have something running here
let driver = newWebDriver("http://localhost:9515")
let session = driver.createSession()
-session.navigate(URL)
-
-#HTML Parser
-var jobTimes: string
-var salaryGuide: string
-
-let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText()
-let employer = session.findElement(".jobsearch-CompanyReview--heading").get().getText()
-
-try:
- jobTimes = session.findElement(".jobsearch-JobDescriptionSection-sectionItem").get().getText()
-except UnpackDefect:
- jobTimes = ""
+var counter = 0
+
+proc terminate() {.noconv.} =
+ echo "\nAcknowledged termination attempt..."
+ echo "Writing Cache..."
+ writeFile(cachefileloc, cache.join("\n"))
+ echo "Closing Session..."
+ session.close()
+ echo "Killing Chromedriver..."
+ terminate(chromedriver)
+ echo "Dying!"
+ quit()
+setControlCHook(terminate)
+
+for feed in feeds:
+ echo "now reading " & feed
+
+ #Feednim
+ var rssFeedReply = getRSS(feed)
+
+ for entry in rssFeedReply.items:
+ echo entry.link
+ #Logging
+
+ if counter > 7:
+ echo "resting for 7 seconds ..."
+ sleep 7000
+ counter = 0
+ var URL = entry.link
+ let URLID = entry.link.split('&')[4]
+ echo any(cache, proc (input: string): bool = input.contains(URLID))
+ if not any(cache, proc (input: string): bool = input.contains(URLID)):
+ session.navigate(URL)
+ counter = counter + 1
+
+ #HTML Parser
+ var salaryInfoAndJobType: string
+ var employer: string
+ let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText()
+
+ try:
+ employer = session.findElement(".jobsearch-InlineCompanyRating-companyHeader").get().getText()
+ except UnpackDefect:
+ salaryInfoAndJobType = "None Listed"
+
+ try:
+ salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext()
+ except UnpackDefect:
+ salaryInfoAndJobType = "None Listed"
+
+ let fullDesc = session.findElement("#jobDescriptionText").get().getText()
+
+ #Job Value Scorer
+
+ #Output
+ var output = """
+ Title: $1
+ Company: $2
+ Salary Info and Job Type: $3
+ URL : $4
+ Description:
+ $5
+ """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc]
+ writeFile(outdir & jobTitle.replace("/") & ".txt", output)
+ cache.add(URL)
-try:
- salaryGuide = session.findelement("#salaryGuide").get().gettext()
-except UnpackDefect:
- salaryGuide = "None Listed"
-
-if salaryGuide.contains("Not provided by employer"):
- salaryGuide = "None Listed"
-
-let fullDesc = session.findElement("#jobDescriptionText").get().getText()
session.close()
terminate(chromedriver)
+writeFile(cachefileloc, cache.join("\n"))
-#Job Value Scorer
-
-#Parsing Salary
-
-#Output
-var output = """
-\documentclass{article}
-\usepackage[margin=0.7in]{geometry}
-\usepackage{pdfpages}
-\usepackage{hyperref}
-\hypersetup{
- colorlinks=true,
- linkcolor=black,
- filecolor=magenta,
- urlcolor=blue,
- }
-
-
-\begin{document}
-
-\begin{center}
- \Huge{$1}
-\end{center}
-\hrulefill
-
- \large{$2}
- \hspace{3cm}
- \large{$3}
- \hspace{3cm}
- \large{\href{$4}{URL}}
- \hspace{3cm}
- \large{$5}
-
-\hrulefill
-
-$6
-
-\end{document}
-""" % [jobTitle, employer, salaryGuide, URL.replace("&","\\&"), jobTimes, fullDesc.replace("#", "\\#").replace("&", "\\&").replace("\"", "\\\"").replace("'","\\'")]
-
-writeFile("/tmp/test.tex", output)
-discard execCmd( "latexmk -pdf -pvc /tmp/test.tex")