diff options
Diffstat (limited to 'src/indeedgetter.nim')
-rw-r--r-- | src/indeedgetter.nim | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/src/indeedgetter.nim b/src/indeedgetter.nim new file mode 100644 index 0000000..372fc35 --- /dev/null +++ b/src/indeedgetter.nim @@ -0,0 +1,104 @@ +import webdriver +import osproc +import options +import os +import strutils +import parsetoml +import json +import std/segfaults + +type + indeedJobDesc = object + URL: string + jobName: string + employer: string + location: string + URLID: string + + +#Reading the config file +let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml") + +let driverURL = "http://localhost:" & config["port"].getInt().intToStr() +let chromedriverloc = config["chromedriver"].getStr() + +##Does the user desire headlessness? +var args: JsonNode +if config["headless"].getBool(): + args = %*{ "capabilities": {"alwaysMatch": { "goog:chromeOptions": { "args": ["headless", "lang=en_US", "window-size=1920,1080", "start-maximized", "user-agent=\"Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/109.0\""], } } } } + +else: + args = %*{ "capabilities": {"alwaysMatch": { "goog:chromeOptions": { "args": ["start-maximized"] } } } } + +#Webdriver +let chromedriver = startProcess(chromedriverloc) +sleep 5000 +echo "connecting" +let driver = newWebDriver(driverURL) +var session = driver.createSession(args) +var feedcounter = 0 +var urlcounter = 0 + +#Behavior when CTRL+C +proc terminate() {.noconv.} = + echo "\nAcknowledged termination attempt..." + echo "Closing Session..." + session.close() + echo "Killing Chromedriver..." + terminate(chromedriver) + echo "Dying!" + quit() +setControlCHook(terminate) + +var posting: indeedJobDesc +posting.URL = os.paramStr(1) + +for attempt in countup(0,3): + try: + echo "Telling chromium to navigate to " & posting.URL + session.navigate(posting.URL) + except: + if attempt < 3: + echo "Recieved an error: trying again..." + continue + else: + raise + break + +#HTML Parser +echo "Beginning to parse..." + +let fullDesc = session.findElement("#jobDescriptionText").get().getText() + +posting.jobName = session.findElement(".jobsearch-JobInfoHeader-title").get().getText() + +try: + posting.employer = session.findElement("/html/body/div[1]/div[2]/div/div[4]/div/div/div[1]/div[1]/div[2]/div[1]/div[2]/div/div/div/div[1]", strategy=XPathSelector).get().getText() +except UnpackDefect: + posting.employer = "None Listed" + +var salaryInfoAndJobType: string +try: + salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext() +except UnpackDefect: + salaryInfoAndJobType = "None Listed" +echo "Finishing the parse..." + +echo "Beginning to write to file..." +#Output +var output = """ +Title: $1 +Employer: $2 +Salary Info and Job Type: $3 +URL : $4 +Description: +$5 +""" % [posting.jobName, posting.employer, salaryInfoAndJobType, posting.URL, fullDesc] +let foldername = "./" & posting.employer.replace("/") & " - " & posting.jobName.replace("/") +if not dirExists(foldername): + createDir(foldername) +writeFile(foldername & "/url.txt", output) +echo "Wrote job to file!" +session.close() + +terminate(chromedriver) |