From 40fc1eb64fb8da26544bd2e2b4855813ffb2f244 Mon Sep 17 00:00:00 2001 From: msglm Date: Wed, 11 Jan 2023 20:49:27 -0600 Subject: cache system actually works now --- src/indeedwatcher.nim | 148 +++++++++++++++++++++++++++----------------------- 1 file changed, 79 insertions(+), 69 deletions(-) (limited to 'src') diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index 1807e93..2e5ff87 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -4,83 +4,93 @@ import osproc import options import os import strutils +import parsetoml +import sequtils -#Feednim -var test = getRSS("https://rss.indeed.com/rss?q=Linux&l=Arkansas&explvl=mid_level") -var URL = test.items[5].link + + +#TODO make this create folders and files for this automatically upon first start up +let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml") +let feeds = config["feeds"].getElems().mapIt(it.getStr()) +let outdir = config["outdir"].getStr() +let chromedriverloc = config["chromedriver"].getStr() +let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache" +var cache = splitLines(readFile(cachefileloc)) #Webdriver -#TODO put location of chromedriver into config -let chromedriver = startProcess("/usr/bin/chromedriver") +let chromedriver = startProcess(chromedriverloc, "", ["--headless", "--disable-gpu"]) sleep 5000 echo "connecting" +#TODO make the port configurable, some users may have something running here let driver = newWebDriver("http://localhost:9515") let session = driver.createSession() -session.navigate(URL) - -#HTML Parser -var jobTimes: string -var salaryGuide: string - -let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText() -let employer = session.findElement(".jobsearch-CompanyReview--heading").get().getText() - -try: - jobTimes = session.findElement(".jobsearch-JobDescriptionSection-sectionItem").get().getText() -except UnpackDefect: - jobTimes = "" +var counter = 0 + +proc terminate() {.noconv.} = + echo "\nAcknowledged termination attempt..." + echo "Writing Cache..." + writeFile(cachefileloc, cache.join("\n")) + echo "Closing Session..." + session.close() + echo "Killing Chromedriver..." + terminate(chromedriver) + echo "Dying!" + quit() +setControlCHook(terminate) + +for feed in feeds: + echo "now reading " & feed + + #Feednim + var rssFeedReply = getRSS(feed) + + for entry in rssFeedReply.items: + echo entry.link + #Logging + + if counter > 7: + echo "resting for 7 seconds ..." + sleep 7000 + counter = 0 + var URL = entry.link + let URLID = entry.link.split('&')[4] + echo any(cache, proc (input: string): bool = input.contains(URLID)) + if not any(cache, proc (input: string): bool = input.contains(URLID)): + session.navigate(URL) + counter = counter + 1 + + #HTML Parser + var salaryInfoAndJobType: string + var employer: string + let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText() + + try: + employer = session.findElement(".jobsearch-InlineCompanyRating-companyHeader").get().getText() + except UnpackDefect: + salaryInfoAndJobType = "None Listed" + + try: + salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext() + except UnpackDefect: + salaryInfoAndJobType = "None Listed" + + let fullDesc = session.findElement("#jobDescriptionText").get().getText() + + #Job Value Scorer + + #Output + var output = """ + Title: $1 + Company: $2 + Salary Info and Job Type: $3 + URL : $4 + Description: + $5 + """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] + writeFile(outdir & jobTitle.replace("/") & ".txt", output) + cache.add(URL) -try: - salaryGuide = session.findelement("#salaryGuide").get().gettext() -except UnpackDefect: - salaryGuide = "None Listed" - -if salaryGuide.contains("Not provided by employer"): - salaryGuide = "None Listed" - -let fullDesc = session.findElement("#jobDescriptionText").get().getText() session.close() terminate(chromedriver) +writeFile(cachefileloc, cache.join("\n")) -#Job Value Scorer - -#Parsing Salary - -#Output -var output = """ -\documentclass{article} -\usepackage[margin=0.7in]{geometry} -\usepackage{pdfpages} -\usepackage{hyperref} -\hypersetup{ - colorlinks=true, - linkcolor=black, - filecolor=magenta, - urlcolor=blue, - } - - -\begin{document} - -\begin{center} - \Huge{$1} -\end{center} -\hrulefill - - \large{$2} - \hspace{3cm} - \large{$3} - \hspace{3cm} - \large{\href{$4}{URL}} - \hspace{3cm} - \large{$5} - -\hrulefill - -$6 - -\end{document} -""" % [jobTitle, employer, salaryGuide, URL.replace("&","\\&"), jobTimes, fullDesc.replace("#", "\\#").replace("&", "\\&").replace("\"", "\\\"").replace("'","\\'")] - -writeFile("/tmp/test.tex", output) -discard execCmd( "latexmk -pdf -pvc /tmp/test.tex") -- cgit v1.2.3