summaryrefslogtreecommitdiffstats
path: root/ddg-imager-taker
blob: c68817fbd1c1393080fc6fdc0a5512aea62ef433 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/python3
import argparse
import urllib.request
from os.path import basename

parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('--output','-o',
                    default=".",
                    dest='outputDir',
                    action="store",
                    help="location to save to",
                    type=str
                    )
parser.add_argument('--amount','-a',
                    default=100,
                    dest='amount',
                    action="store",
                    help="the amount of images you'd like to download",
                    type=int
                    )
parser.add_argument('URLs',
                    nargs='+',
                    type=str,
                    help='Site to query'
                    )



args = parser.parse_args()

from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
chrome_options = Options()
chrome_options.add_argument("--headless") 
driver = webdriver.Chrome(options=chrome_options)
SourceURLs=[]
driver.implicitly_wait(1)

def getSource():
    print("Opening...")
    AllPortraitThumbnails[element].click()
    SourceURLs.append(driver.find_element(By.XPATH, "/html/body/div[2]/div[3]/div/div[2]/div/div[1]/div[1]/div/div[2]/div/div/a").get_attribute('href'))


for url in args.URLs:
    driver.get(url)
    sleep(3)
    AllPortraitThumbnails = driver.find_elements(By.XPATH, "/html/body/div[2]/div[3]/div/div[1]/div[2]/div/./div")
    
    for element in range(args.amount):
        print("Obtaining element number " + str(element))
        try:
            if element % 100 == 0 and element != 0:
                sleep(5)
                getSource()
            else:
                getSource()
        except:
            AllPortraitThumbnails = driver.find_elements(By.XPATH, "/html/body/div[2]/div[3]/div/div[1]/div[2]/div/./div")
            sleep(5)


print("Starting Download...")
driver.quit()


for sourceURL in SourceURLs:
    name = basename(sourceURL)
    try:
        print("Downloading " + name)
        urllib.request.urlretrieve(sourceURL, args.outputDir + "/" + name)
    except KeyboardInterrupt:
        print("KeyboardInterrupt Detected!")
        driver.quit()
        quit(1)
    except:
        pass

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Affero General Public License version 3 as published by
#    the Free Software Foundation.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <https://www.gnu.org/licenses/>.