From 90ed90eceb1b92f9d75c935dcf4f70c60289f49c Mon Sep 17 00:00:00 2001 From: msglm Date: Wed, 11 Jan 2023 20:51:06 -0600 Subject: now works with javascript --- stealIfunny | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 100 insertions(+), 13 deletions(-) (limited to 'stealIfunny') diff --git a/stealIfunny b/stealIfunny index aa57f3d..0b21710 100755 --- a/stealIfunny +++ b/stealIfunny @@ -1,18 +1,105 @@ #!/usr/bin/python3 -from bs4 import BeautifulSoup import requests import urllib.request import sys import os -if len(sys.argv) == 1: - print("List tags at the end you'd like to download") -for tags in sys.argv: - URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video') - webpage = BeautifulSoup(URL.content, 'html.parser') - - epoch = len([item for item in os.listdir('/home/joybuke/Media/Videos/unsorted/')]) - videos = webpage.findAll('video') - for video in videos: - epoch = epoch + 1 - print(video['data-src']) - urllib.request.urlretrieve(video['data-src'], '/home/joybuke/Media/Videos/unsorted/' + str(epoch) + '.mp4') +import argparse +import time + +parser = argparse.ArgumentParser(add_help=True) + +parser.add_argument('-js','-JS',"--Javascript",'--javascript', + default=False, + action='store_true', + dest='usingJavascript', + help='Enables the user of a webdriver to scrape funnies' + ) + +parser.add_argument('--amount','-a', + default=0, + dest='amount', + action="store", + help="the amount of funnies you'd like to download per tag", + type=int + ) + +parser.add_argument('tags', + nargs='+', + type=str, + help='Provides tags to be check for funny downloading' + ) + +args = parser.parse_args() + +if not args.usingJavascript: + from bs4 import BeautifulSoup + print("using html mode...") + for tags in args.tags: + URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video') + webpage = BeautifulSoup(URL.content, 'html.parser') + videos = webpage.findAll('video') + epoch = len([item for item in os.listdir('/home/'+ os.environ['USER'] + '/Videos/unsorted/')]) + if args.amount > 0: + videos = video[:arg.amount] + for video in videos: + epoch = epoch + 1 + print(video['data-src']) + urllib.request.urlretrieve(video['data-src'], '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') +elif args.usingJavascript: + from selenium import webdriver + print("using JS mode...") + + try: + from selenium.webdriver.firefox.options import Options + firefox_options = Options() + firefox_options.add_argument("--headless") + driver = webdriver.Firefox(options=firefox_options) + + except: + try: + from selenium.webdriver.chrome.options import Options + chrome_options = Options() + chrome_options.add_argument("--headless") + driver = webdriver.Chrome(options=chrome_options) + except: + try: + from selenium.webdriver.firefox.options import Options + firefox_options = Options() + firefox_options.add_argument("--headless") + driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=firefox_options) + except: + try: + from selenium.webdriver.chrome.options import Options + chrome_options = Options() + chrome_options.add_argument("--headless") + driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options) + except: + print("Could not find webdriver!") + print("You'll have to manually install a webdriver to your path") + print("If you are using GNU/Linux, it is likely that you can install from your standard repos. Debian labels their chromium driver chromium-driver. If you wish to use an ungoogled version of chromium (as to reduce possiblity of spying), you can find a link to that here: https://github.com/Eloston/ungoogled-chromium#downloads. On Debian (or debian likes such as Ubuntu or Devuan), you may then run apt install ungoogled-chromium-driver and this will no longer fail.") + sys.exit() + for tags in args.tags: + driver.get('https://ifunny.co/tags/' + tags + '?filter=video') + if args.amount > 0: + isTimesScrolled = 0 + oughtTimeScrolled = (args.amount/10) + 1 + while isTimesScrolled < int(oughtTimeScrolled): + driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + isTimesScrolled = isTimesScrolled + 1 + time.sleep(3) + videos = driver.find_elements_by_tag_name("video") + if args.amount > 0: + videos = videos[:args.amount] + epoch = len([item for item in os.listdir('/home/'+ os.environ['USER'] + '/Videos/unsorted/')]) + for video in videos: + print("saving " + video.get_attribute("data-src") + " as " + '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') + urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') + driver.quit() + + +else: + print("Neither Javascript or HTML was given!") + sys.exit(1) + + + -- cgit v1.2.3