summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormsglm <msglm@techchud.xyz>2023-01-11 20:51:08 -0600
committermsglm <msglm@techchud.xyz>2023-01-11 20:51:08 -0600
commit85f166eeecf2d8ce60b606f39354c59542825b87 (patch)
tree3fb92961e875ef162abcdb8403f0f6b83ad3bddd
parentdefaceb8c3b53d2aefbd7679390e915b20953c19 (diff)
downloadcomedyGenerator-85f166eeecf2d8ce60b606f39354c59542825b87.tar.gz
comedyGenerator-85f166eeecf2d8ce60b606f39354c59542825b87.tar.bz2
comedyGenerator-85f166eeecf2d8ce60b606f39354c59542825b87.zip
No more browser required, uses pure python and nothing more. Much faster and tells you percent completed when downloading the video metadata.
-rwxr-xr-xcomedyGenerator249
1 files changed, 112 insertions, 137 deletions
diff --git a/comedyGenerator b/comedyGenerator
index f281ed2..5b28284 100755
--- a/comedyGenerator
+++ b/comedyGenerator
@@ -6,118 +6,95 @@ import os
import argparse
import time
import hashlib
+import json
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('--source',
- default=False,
- action='store_true',
- dest='source',
- help='Links to the source code of the software'
- )
+ default=False,
+ action='store_true',
+ dest='source',
+ help='Links to the source code of the software'
+ )
parser.add_argument('--debug',
- default=False,
- action='store_true',
- dest='debug',
- help='Enables debug output'
- )
-
-parser.add_argument('-js','-JS',"--Javascript",'--javascript',
- default=True,
- action='store_true',
- dest='usingJavascript',
- help='Enables the user of a webdriver to scrape funnies'
- )
+ default=False,
+ action='store_true',
+ dest='debug',
+ help='Enables debug output'
+ )
parser.add_argument('--amount','-a',
- default=0,
- dest='amount',
- action="store",
- help="the amount of funnies you'd like to download per tag",
- type=int
- )
+ default=0,
+ dest='amount',
+ action="store",
+ help="the amount of funnies you'd like to download per tag",
+ type=int
+ )
parser.add_argument('tags',
- nargs='+',
- type=str,
- help='Provides tags to be check for funny downloading'
- )
+ nargs='+',
+ type=str,
+ help='Provides tags to be check for funny downloading'
+ )
args = parser.parse_args()
if args.source:
print("https://git.snootgame.xyz/PrincipalSpears/comedyGenerator")
-
-if not args.usingJavascript:
- from bs4 import BeautifulSoup
- print("using html mode...")
- for tags in args.tags:
- URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video')
- webpage = BeautifulSoup(URL.content, 'html.parser')
- videos = webpage.findAll('video')
- if args.amount > 0:
- videos = video[:arg.amount]
- for video in videos:
- name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4"
- print("saving " + video['data-src'] + " as " + name)
- urllib.request.urlretrieve(video['data-src'], '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)
-elif args.usingJavascript:
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- print("using JS mode...")
-
- try:
- if args.debug:
- print("testing if firefox works...")
- from selenium.webdriver.firefox.options import Options
- firefox_options = Options()
- firefox_options.add_argument("--headless")
- driver = webdriver.Firefox(options=firefox_options)
- if args.debug:
- print("Firefox Works!")
-
- except:
- if args.debug:
- print("Firefox didn't work! Trying Chrome!")
- try:
- from selenium.webdriver.chrome.options import Options
- chrome_options = Options()
- #chrome_options.add_argument("--headless") #TODO completely breaks script and screenshot shows a white screen and nothing but. likely got discovered.
- driver = webdriver.Chrome(options=chrome_options)
- if args.debug:
- print("Chrome Works!")
- except:
- if args.debug:
- print("Chrome Failed! Going to attempt an install of the firefox webdriver")
- try:
- from selenium.webdriver.firefox.options import Options
- firefox_options = Options()
- firefox_options.add_argument("--headless")
- driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=firefox_options)
- if args.debug:
- print("Install successful! using Firefox!")
- except:
- if args.debug:
- print("Install Failed! Trying Chrome webdriver install!")
- try:
- from selenium.webdriver.chrome.options import Options
- chrome_options = Options()
- chrome_options.add_argument("--headless")
- driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options)
- except:
- print("Could not find webdriver!")
- print("You'll have to manually install a webdriver to your path")
- print("If you are using GNU/Linux, it is likely that you can install from your standard repos. Debian labels their chromium driver chromium-driver. If you wish to use an ungoogled version of chromium (as to reduce possiblity of spying), you can find a link to that here: https://github.com/Eloston/ungoogled-chromium#downloads. On Debian (or debian likes such as Ubuntu or Devuan), you may then run apt install ungoogled-chromium-driver and this will no longer fail.")
- sys.exit()
- for tags in args.tags:
+ sys.exit(0)
+
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language": "en-US,en;q=0.5",
+ "DNT": "1",
+ "Connection": "keep-alive",
+ "Upgrade-Insecure-Requests": "1",
+ "Sec-Fetch-Dest": "document",
+ "Sec-Fetch-Mode": "navigate",
+ "Sec-Fetch-Site": "cross-site",
+ "Cache-Control": "max-age=0"
+ }
+
+videos = []
+for tags in args.tags:
if args.debug:
print("Downloading Tag: " + tags)
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language": "en-US,en;q=0.5",
+ "DNT": "1",
+ "Connection": "keep-alive",
+ "Upgrade-Insecure-Requests": "1",
+ "Sec-Fetch-Dest": "document",
+ "Sec-Fetch-Mode": "navigate",
+ "Sec-Fetch-Site": "cross-site",
+ "Cache-Control": "max-age=0"
+ }
+
+ master = requests.get('https://ifunny.co/', headers=headers)
+ combineHeader = (dict(master.headers)|headers)
+ requestHeader = {
+ "User-Agent":combineHeader['User-Agent'],
+ "Content-Type":combineHeader['Content-Type'],
+ "x-requested-with": "fetch",
+ "x-csrf-token": combineHeader['Set-Cookie'].split(';')[0].split('=')[1],
+ "set-cookies":combineHeader['Set-Cookie'],
+ "access-control-allow-headers":combineHeader['access-control-allow-headers']
+ }
+ requestCookies = {
+ "CID" : combineHeader['Set-Cookie'].split(';')[3].split('=')[2],
+ "sound" : "off",
+ "viewMode" : "list",
+ "x-csrf-token": combineHeader['Set-Cookie'].split(';')[0].split('=')[1]
+ }
for tries in range(100):
try:
- driver.get('https://ifunny.co/tags/' + tags + '?filter=video')
+ tagPage = requests.get("https://ifunny.co/api/v1/feeds?filter=video&tag=" + tags, headers=requestHeader, cookies=requestCookies)
if args.debug:
print("Got Webpage!")
except:
@@ -127,65 +104,63 @@ elif args.usingJavascript:
continue
break
- if args.amount > 0:
- if args.debug:
- print("starting to scroll...")
- isTimesScrolled = 0
- oughtTimeScrolled = (args.amount/10) + 1
- while isTimesScrolled < int(oughtTimeScrolled):
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
- isTimesScrolled = isTimesScrolled + 1
- time.sleep(3)
- if args.debug:
- print("Need to scroll " + str(oughtTimeScrolled-isTimesScrolled) + " more times...")
- videos = driver.find_elements(By.TAG_NAME,"video")
- if args.debug:
- print("Original Videos List: ")
- print(videos)
- if args.amount > 0:
+ JSONDump = tagPage.json()
+ while len(videos) < args.amount:
+ print("Currently have " + str(len(videos)) + " videos out of " + str(args.amount) + " (" + str((len(videos)/args.amount)*100) + "%)")
+ for item in range(len(JSONDump['items'])):
+ videos.append(JSONDump['items'][item]['url'])
+ for tries in range(100):
+ try:
+ tagPage = requests.get("https://ifunny.co/api/v1/feeds?filter=video&tag=" + tags + "&next=" + JSONDump['pagination']['next'], headers=requestHeader, cookies=requestCookies)
+ JSONDump = tagPage.json()
+ if args.debug:
+ print("Got New Tag Page!")
+ break
+ except:
+ if tries < 100 - 1:
+ print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!")
+ time.sleep(tries*1.5)
+ continue
+ if len(videos) > args.amount:
videos = videos[:args.amount]
if args.debug:
print("Videos list truncated! Its now: " + str(len(videos)) + " units long")
for video in videos:
print("Now running for " + str(video))
- URL = video.get_attribute("data-src")
-
- if isinstance(URL, str):
+
+ if isinstance(video, str):
if args.debug:
- print("URL read as: " + URL)
- name = tags + "-" + hashlib.md5(URL.encode('utf-8')).hexdigest() + ".mp4"
+ print("URL read as: " + video)
+ name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4"
+ path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name
if args.debug:
print("name read as: " + name)
- print("saving " + video.get_attribute("data-src") + " as " + name)
- for tries in range(100):
- try:
- urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)
- except:
+ print("saving " + video + " as " + name)
+ if os.path.exists(path):
+ print(name + " already exists!")
+ else:
+ for tries in range(100):
+ try:
+ urllib.request.urlretrieve(video, path)
+ except:
if tries < 100 - 1:
print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!")
time.sleep(tries*1.5)
- continue
- break
- #urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)
+ continue
+ break
+ #urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)
elif args.debug:
- print("URL is NOT a string, it is a " + str(type(URL)))
-
-
+ print("URL is NOT a string, it is a " + str(type(video)))
- driver.quit()
-else:
- print("Neither Javascript or HTML was given!")
- sys.exit(1)
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License version 3 as published by
-# the Free Software Foundation.
+#This program is free software: you can redistribute it and/or modify
+#it under the terms of the GNU Affero General Public License version 3 as published by
+#the Free Software Foundation.
#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#GNU Affero General Public License for more details.
#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#You should have received a copy of the GNU Affero General Public License
+#along with this program. If not, see <https://www.gnu.org/licenses/>.