From 09db67368639ac0b3e510dbf0380d73bd40e2fcd Mon Sep 17 00:00:00 2001 From: msglm Date: Wed, 11 Jan 2023 20:46:08 -0600 Subject: rename --- README.md | 2 +- ddg-image-grabber | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ ddg-imager-taker | 91 ------------------------------------------------------- 3 files changed, 92 insertions(+), 92 deletions(-) create mode 100755 ddg-image-grabber delete mode 100755 ddg-imager-taker diff --git a/README.md b/README.md index 5d13711..4575e4d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

ddg-image-taker

+

ddg-image-grabber

Scrapes images off duckduckgo image search urls

Example

ddg-imager-taker -a 150 -o /tmp "https://duckduckgo.com/?t=ffab&q=vintage+world+leader+photos+&iax=images&ia=images&iaf=color%3AMonochrome" diff --git a/ddg-image-grabber b/ddg-image-grabber new file mode 100755 index 0000000..c68817f --- /dev/null +++ b/ddg-image-grabber @@ -0,0 +1,91 @@ +#!/usr/bin/python3 +import argparse +import urllib.request +from os.path import basename + +parser = argparse.ArgumentParser(add_help=True) +parser.add_argument('--output','-o', + default=".", + dest='outputDir', + action="store", + help="location to save to", + type=str + ) +parser.add_argument('--amount','-a', + default=100, + dest='amount', + action="store", + help="the amount of images you'd like to download", + type=int + ) +parser.add_argument('URLs', + nargs='+', + type=str, + help='Site to query' + ) + + + +args = parser.parse_args() + +from time import sleep +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +chrome_options = Options() +chrome_options.add_argument("--headless") +driver = webdriver.Chrome(options=chrome_options) +SourceURLs=[] +driver.implicitly_wait(1) + +def getSource(): + print("Opening...") + AllPortraitThumbnails[element].click() + SourceURLs.append(driver.find_element(By.XPATH, "/html/body/div[2]/div[3]/div/div[2]/div/div[1]/div[1]/div/div[2]/div/div/a").get_attribute('href')) + + +for url in args.URLs: + driver.get(url) + sleep(3) + AllPortraitThumbnails = driver.find_elements(By.XPATH, "/html/body/div[2]/div[3]/div/div[1]/div[2]/div/./div") + + for element in range(args.amount): + print("Obtaining element number " + str(element)) + try: + if element % 100 == 0 and element != 0: + sleep(5) + getSource() + else: + getSource() + except: + AllPortraitThumbnails = driver.find_elements(By.XPATH, "/html/body/div[2]/div[3]/div/div[1]/div[2]/div/./div") + sleep(5) + + +print("Starting Download...") +driver.quit() + + +for sourceURL in SourceURLs: + name = basename(sourceURL) + try: + print("Downloading " + name) + urllib.request.urlretrieve(sourceURL, args.outputDir + "/" + name) + except KeyboardInterrupt: + print("KeyboardInterrupt Detected!") + driver.quit() + quit(1) + except: + pass + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License version 3 as published by +# the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . diff --git a/ddg-imager-taker b/ddg-imager-taker deleted file mode 100755 index c68817f..0000000 --- a/ddg-imager-taker +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/python3 -import argparse -import urllib.request -from os.path import basename - -parser = argparse.ArgumentParser(add_help=True) -parser.add_argument('--output','-o', - default=".", - dest='outputDir', - action="store", - help="location to save to", - type=str - ) -parser.add_argument('--amount','-a', - default=100, - dest='amount', - action="store", - help="the amount of images you'd like to download", - type=int - ) -parser.add_argument('URLs', - nargs='+', - type=str, - help='Site to query' - ) - - - -args = parser.parse_args() - -from time import sleep -from selenium import webdriver -from selenium.webdriver.chrome.options import Options -from selenium.webdriver.common.by import By -chrome_options = Options() -chrome_options.add_argument("--headless") -driver = webdriver.Chrome(options=chrome_options) -SourceURLs=[] -driver.implicitly_wait(1) - -def getSource(): - print("Opening...") - AllPortraitThumbnails[element].click() - SourceURLs.append(driver.find_element(By.XPATH, "/html/body/div[2]/div[3]/div/div[2]/div/div[1]/div[1]/div/div[2]/div/div/a").get_attribute('href')) - - -for url in args.URLs: - driver.get(url) - sleep(3) - AllPortraitThumbnails = driver.find_elements(By.XPATH, "/html/body/div[2]/div[3]/div/div[1]/div[2]/div/./div") - - for element in range(args.amount): - print("Obtaining element number " + str(element)) - try: - if element % 100 == 0 and element != 0: - sleep(5) - getSource() - else: - getSource() - except: - AllPortraitThumbnails = driver.find_elements(By.XPATH, "/html/body/div[2]/div[3]/div/div[1]/div[2]/div/./div") - sleep(5) - - -print("Starting Download...") -driver.quit() - - -for sourceURL in SourceURLs: - name = basename(sourceURL) - try: - print("Downloading " + name) - urllib.request.urlretrieve(sourceURL, args.outputDir + "/" + name) - except KeyboardInterrupt: - print("KeyboardInterrupt Detected!") - driver.quit() - quit(1) - except: - pass - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License version 3 as published by -# the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -- cgit v1.2.3