summaryrefslogtreecommitdiffstats
path: root/comedyGenerator
blob: 1ba068e6a0e2cc547cb9ba651faeff516637e7be (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/python3
import requests
import urllib.request
import urllib3.exceptions
import sys
import os
import argparse
import time

parser = argparse.ArgumentParser(add_help=True)

parser.add_argument('-js','-JS',"--Javascript",'--javascript',
                    default=False,
                    action='store_true',
                    dest='usingJavascript',
                    help='Enables the user of a webdriver to scrape funnies'
                    )

parser.add_argument('--amount','-a',
                    default=0,
                    dest='amount',
                    action="store",
                    help="the amount of funnies you'd like to download per tag",
                    type=int
                    )

parser.add_argument('tags',
                    nargs='+',
                    type=str,
                    help='Provides tags to be check for funny downloading'
                    )

args = parser.parse_args()

if not args.usingJavascript:
    from bs4 import BeautifulSoup
    print("using html mode...")
    for tags in args.tags:
        URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video')
        webpage = BeautifulSoup(URL.content, 'html.parser')
        videos = webpage.findAll('video')
        epoch = len([item for item in os.listdir('/home/'+ os.environ['USER'] + '/Videos/unsorted/')])
        if args.amount > 0:
            videos = video[:arg.amount]
        for video in videos:
            epoch = epoch + 1
            print("saving " + video['data-src'] + " as " + str(epoch) + '.mp4')
            urllib.request.urlretrieve(video['data-src'], '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') 
elif args.usingJavascript:
    from selenium import webdriver
    print("using JS mode...")
    
    try:
        from selenium.webdriver.firefox.options import Options
        firefox_options = Options()
        firefox_options.add_argument("--headless")
        driver = webdriver.Firefox(options=firefox_options)

    except:
            try:
                from selenium.webdriver.chrome.options import Options
                chrome_options = Options()
                chrome_options.add_argument("--headless")
                driver = webdriver.Chrome(options=chrome_options)
            except:
                    try:
                        from selenium.webdriver.firefox.options import Options
                        firefox_options = Options()
                        firefox_options.add_argument("--headless")
                        driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=firefox_options)
                    except:
                            try:
                                from selenium.webdriver.chrome.options import Options
                                chrome_options = Options()
                                chrome_options.add_argument("--headless")
                                driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options)
                            except:
                                    print("Could not find webdriver!")
                                    print("You'll have to manually install a webdriver to your path")
                                    print("If you are using GNU/Linux, it is likely that you can install from your standard repos. Debian labels their chromium driver chromium-driver. If you wish to use an ungoogled version of chromium (as to reduce possiblity of spying), you can find a link to that here: https://github.com/Eloston/ungoogled-chromium#downloads. On Debian (or debian likes such as Ubuntu or Devuan), you may then run apt install ungoogled-chromium-driver and this will no longer fail.")
                                    sys.exit()
    for tags in args.tags:
        for tries in range(100):
            try:
                driver.get('https://ifunny.co/tags/' + tags + '?filter=video')
            except:
                if tries < 100 - 1:
                    print("Rate Limited! Sleeping for " + str(tries*12) + " seconds!")
                    time.sleep(tries*12)
                    continue
            break

        if args.amount > 0:
            isTimesScrolled = 0
            oughtTimeScrolled = (args.amount/10) + 1
            while isTimesScrolled < int(oughtTimeScrolled):
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                isTimesScrolled = isTimesScrolled + 1
                time.sleep(3)
        videos = driver.find_elements_by_tag_name("video")
        if args.amount > 0:
            videos = videos[:args.amount]
        epoch = len([item for item in os.listdir('/home/'+ os.environ['USER'] + '/Videos/unsorted/')])
        for video in videos:
            epoch = epoch + 1
            print("saving " + video.get_attribute("data-src") + " as " + str(epoch) + '.mp4')
            urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') 
    driver.quit()
else:
    print("Neither Javascript or HTML was given!")
    sys.exit(1)