1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
#!/usr/bin/python3
import requests
import urllib.request
import urllib3.exceptions
import sys
import os
import argparse
import time
import hashlib
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('-js','-JS',"--Javascript",'--javascript',
default=False,
action='store_true',
dest='usingJavascript',
help='Enables the user of a webdriver to scrape funnies'
)
parser.add_argument('--amount','-a',
default=0,
dest='amount',
action="store",
help="the amount of funnies you'd like to download per tag",
type=int
)
parser.add_argument('tags',
nargs='+',
type=str,
help='Provides tags to be check for funny downloading'
)
args = parser.parse_args()
if not args.usingJavascript:
from bs4 import BeautifulSoup
print("using html mode...")
for tags in args.tags:
URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video')
webpage = BeautifulSoup(URL.content, 'html.parser')
videos = webpage.findAll('video')
if args.amount > 0:
videos = video[:arg.amount]
for video in videos:
name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4"
print("saving " + video['data-src'] + " as " + name)
urllib.request.urlretrieve(video['data-src'], '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)
elif args.usingJavascript:
from selenium import webdriver
print("using JS mode...")
try:
from selenium.webdriver.firefox.options import Options
firefox_options = Options()
firefox_options.add_argument("--headless")
driver = webdriver.Firefox(options=firefox_options)
except:
try:
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options)
except:
try:
from selenium.webdriver.firefox.options import Options
firefox_options = Options()
firefox_options.add_argument("--headless")
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=firefox_options)
except:
try:
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options)
except:
print("Could not find webdriver!")
print("You'll have to manually install a webdriver to your path")
print("If you are using GNU/Linux, it is likely that you can install from your standard repos. Debian labels their chromium driver chromium-driver. If you wish to use an ungoogled version of chromium (as to reduce possiblity of spying), you can find a link to that here: https://github.com/Eloston/ungoogled-chromium#downloads. On Debian (or debian likes such as Ubuntu or Devuan), you may then run apt install ungoogled-chromium-driver and this will no longer fail.")
sys.exit()
for tags in args.tags:
for tries in range(100):
try:
driver.get('https://ifunny.co/tags/' + tags + '?filter=video')
except:
if tries < 100 - 1:
print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!")
time.sleep(tries*1.5)
continue
break
if args.amount > 0:
isTimesScrolled = 0
oughtTimeScrolled = (args.amount/10) + 1
while isTimesScrolled < int(oughtTimeScrolled):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
isTimesScrolled = isTimesScrolled + 1
time.sleep(3)
videos = driver.find_elements_by_tag_name("video")
if args.amount > 0:
videos = videos[:args.amount]
for video in videos:
URL = video.get_attribute("data-src")
name = tags + "-" + hashlib.md5(URL.encode('utf-8')).hexdigest() + ".mp4"
print("saving " + video.get_attribute("data-src") + " as " + name)
for tries in range(100):
try:
urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)
except:
if tries < 100 - 1:
print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!")
time.sleep(tries*1.5)
continue
break
driver.quit()
else:
print("Neither Javascript or HTML was given!")
sys.exit(1)
|