1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
import webdriver
import osproc
import options
import os
import strutils
import parsetoml
import json
import std/segfaults
type
indeedJobDesc = object
URL: string
jobName: string
employer: string
location: string
URLID: string
#Reading the config file
let config = parsetoml.parseFile(getConfigDir() & "/indeedwatcher/config.toml")
let driverURL = "http://localhost:" & config["port"].getInt().intToStr()
let chromedriverloc = config["chromedriver"].getStr()
##Does the user desire headlessness?
var args: JsonNode
if config["headless"].getBool():
args = %*{ "capabilities": {"alwaysMatch": { "goog:chromeOptions": { "args": ["headless", "lang=en_US", "window-size=1920,1080", "start-maximized", "user-agent=\"Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/109.0\""], } } } }
else:
args = %*{ "capabilities": {"alwaysMatch": { "goog:chromeOptions": { "args": ["start-maximized"] } } } }
#Webdriver
let chromedriver = startProcess(chromedriverloc)
sleep 5000
echo "connecting"
let driver = newWebDriver(driverURL)
var session = driver.createSession(args)
var feedcounter = 0
var urlcounter = 0
#Behavior when CTRL+C
proc terminate() {.noconv.} =
echo "\nAcknowledged termination attempt..."
echo "Closing Session..."
session.close()
echo "Killing Chromedriver..."
terminate(chromedriver)
echo "Dying!"
quit()
setControlCHook(terminate)
var posting: indeedJobDesc
posting.URL = os.paramStr(1)
for attempt in countup(0,3):
try:
echo "Telling chromium to navigate to " & posting.URL
session.navigate(posting.URL)
except:
if attempt < 3:
echo "Recieved an error: trying again..."
continue
else:
raise
break
#HTML Parser
echo "Beginning to parse..."
let fullDesc = session.findElement("#jobDescriptionText").get().getText()
posting.jobName = session.findElement(".jobsearch-JobInfoHeader-title").get().getText()
try:
posting.employer = session.findElement("/html/body/div[1]/div[2]/div/div[4]/div/div/div[1]/div[1]/div[2]/div[1]/div[2]/div/div/div/div[1]", strategy=XPathSelector).get().getText()
except UnpackDefect:
posting.employer = "None Listed"
var salaryInfoAndJobType: string
try:
salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext()
except UnpackDefect:
salaryInfoAndJobType = "None Listed"
echo "Finishing the parse..."
echo "Beginning to write to file..."
#Output
var output = """
Title: $1
Employer: $2
Salary Info and Job Type: $3
URL : $4
Description:
$5
""" % [posting.jobName, posting.employer, salaryInfoAndJobType, posting.URL, fullDesc]
let foldername = "./" & posting.employer.replace("/") & " - " & posting.jobName.replace("/")
if not dirExists(foldername):
createDir(foldername)
writeFile(foldername & "/url.txt", output)
echo "Wrote job to file!"
session.close()
terminate(chromedriver)
|