import rss import webdriver import osproc import options import os import strutils import parsetoml import sequtils #TODO make this create folders and files for this automatically upon first start up let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml") let feeds = config["feeds"].getElems().mapIt(it.getStr()) let outdir = config["outdir"].getStr() let chromedriverloc = config["chromedriver"].getStr() let cachefileloc = getCacheDir() & "/indeedwatcher/listings.cache" var cache = splitLines(readFile(cachefileloc)) #Webdriver let chromedriver = startProcess(chromedriverloc, "", ["--headless"]) sleep 5000 echo "connecting" #TODO make the port configurable, some users may have something running here let driver = newWebDriver("http://localhost:9515") let session = driver.createSession() var counter = 0 proc terminate() {.noconv.} = echo "\nAcknowledged termination attempt..." echo "Writing Cache..." writeFile(cachefileloc, cache.join("\n")) echo "Closing Session..." session.close() echo "Killing Chromedriver..." terminate(chromedriver) echo "Dying!" quit() setControlCHook(terminate) for feed in feeds: #Getting the listing URLs from the feeds echo "now reading " & feed sleep 1000 var rssFeedReply = getRSS(feed) for entry in rssFeedReply.items: #Sleep so indeed.com doesn't freak out if counter > 7: echo "resting for 7 seconds ..." sleep 7000 counter = 0 #Don't even bother visiting it if its in the cache var URL = entry.link let URLID = entry.link.split('&')[3] echo URL echo URLID echo any(cache, proc (input: string): bool = input.contains(URLID)) if not any(cache, proc (input: string): bool = input.contains(URLID)): session.navigate(URL) counter = counter + 1 #HTML Parser let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText() let fullDesc = session.findElement("#jobDescriptionText").get().getText() var employer: string try: employer = session.findElement(".jobsearch-InlineCompanyRating-companyHeader").get().getText() except UnpackDefect: employer = "None Listed" var salaryInfoAndJobType: string try: salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext() except UnpackDefect: salaryInfoAndJobType = "None Listed" #Job Value Scorer #Output var output = """ Title: $1 Company: $2 Salary Info and Job Type: $3 URL : $4 Description: $5 """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] writeFile(outdir & jobTitle.replace("/") & ".txt", output) cache.add(URL) session.close() terminate(chromedriver) writeFile(cachefileloc, cache.join("\n"))