#!/usr/bin/python3 import requests import urllib.request import sys import os import argparse import time import hashlib parser = argparse.ArgumentParser(add_help=True) parser.add_argument('--source', default=False, action='store_true', dest='source', help='Links to the source code of the software' ) parser.add_argument('--debug', default=False, action='store_true', dest='debug', help='Enables debug output' ) parser.add_argument('-js','-JS',"--Javascript",'--javascript', default=True, action='store_true', dest='usingJavascript', help='Enables the user of a webdriver to scrape funnies' ) parser.add_argument('--amount','-a', default=0, dest='amount', action="store", help="the amount of funnies you'd like to download per tag", type=int ) parser.add_argument('tags', nargs='+', type=str, help='Provides tags to be check for funny downloading' ) args = parser.parse_args() if args.source: print("https://git.snootgame.xyz/PrincipalSpears/comedyGenerator") if not args.usingJavascript: from bs4 import BeautifulSoup print("using html mode...") for tags in args.tags: URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video') webpage = BeautifulSoup(URL.content, 'html.parser') videos = webpage.findAll('video') if args.amount > 0: videos = video[:arg.amount] for video in videos: name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" print("saving " + video['data-src'] + " as " + name) urllib.request.urlretrieve(video['data-src'], '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name) elif args.usingJavascript: from selenium import webdriver from selenium.webdriver.common.by import By print("using JS mode...") try: if args.debug: print("testing if firefox works...") from selenium.webdriver.firefox.options import Options firefox_options = Options() firefox_options.add_argument("--headless") driver = webdriver.Firefox(options=firefox_options) if args.debug: print("Firefox Works!") except: if args.debug: print("Firefox didn't work! Trying Chrome!") try: from selenium.webdriver.chrome.options import Options chrome_options = Options() #chrome_options.add_argument("--headless") #TODO completely breaks script and screenshot shows a white screen and nothing but. likely got discovered. driver = webdriver.Chrome(options=chrome_options) if args.debug: print("Chrome Works!") except: if args.debug: print("Chrome Failed! Going to attempt an install of the firefox webdriver") try: from selenium.webdriver.firefox.options import Options firefox_options = Options() firefox_options.add_argument("--headless") driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=firefox_options) if args.debug: print("Install successful! using Firefox!") except: if args.debug: print("Install Failed! Trying Chrome webdriver install!") try: from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument("--headless") driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options) except: print("Could not find webdriver!") print("You'll have to manually install a webdriver to your path") print("If you are using GNU/Linux, it is likely that you can install from your standard repos. Debian labels their chromium driver chromium-driver. If you wish to use an ungoogled version of chromium (as to reduce possiblity of spying), you can find a link to that here: https://github.com/Eloston/ungoogled-chromium#downloads. On Debian (or debian likes such as Ubuntu or Devuan), you may then run apt install ungoogled-chromium-driver and this will no longer fail.") sys.exit() for tags in args.tags: if args.debug: print("Downloading Tag: " + tags) for tries in range(100): try: driver.get('https://ifunny.co/tags/' + tags + '?filter=video') if args.debug: print("Got Webpage!") except: if tries < 100 - 1: print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") #time.sleep(tries*1.5) continue break if args.amount > 0: if args.debug: print("starting to scroll...") isTimesScrolled = 0 oughtTimeScrolled = (args.amount/10) + 1 while isTimesScrolled < int(oughtTimeScrolled): driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") isTimesScrolled = isTimesScrolled + 1 time.sleep(3) if args.debug: print("Need to scroll " + str(oughtTimeScrolled-isTimesScrolled) + " more times...") videos = driver.find_elements(By.TAG_NAME,"video") if args.debug: print("Original Videos List: ") print(videos) if args.amount > 0: videos = videos[:args.amount] if args.debug: print("Videos list truncated! Its now: " + str(len(videos)) + " units long") for video in videos: print("Now running for " + str(video)) URL = video.get_attribute("data-src") if isinstance(URL, str): if args.debug: print("URL read as: " + URL) name = tags + "-" + hashlib.md5(URL.encode('utf-8')).hexdigest() + ".mp4" if args.debug: print("name read as: " + name) print("saving " + video.get_attribute("data-src") + " as " + name) for tries in range(100): try: urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name) except: if tries < 100 - 1: print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") time.sleep(tries*1.5) continue break elif args.debug: print("URL is NOT a string, it is a " + str(type(URL))) print("URL was " + URL) driver.quit() else: print("Neither Javascript or HTML was given!") sys.exit(1) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License version 3 as published by # the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see .