From 5489d351439d32b5a432190b98bb97a8e8fde123 Mon Sep 17 00:00:00 2001 From: msglm Date: Wed, 11 Jan 2023 20:51:08 -0600 Subject: logging and config capabilties introduced. removed source url --- comedyGenerator | 82 +++++++++++++++++++++++++++++++++++++++++++------------- config | 8 ++++++ geckodriver.log | 0 requirements.txt | 4 --- 4 files changed, 71 insertions(+), 23 deletions(-) create mode 100644 config delete mode 100644 geckodriver.log delete mode 100644 requirements.txt diff --git a/comedyGenerator b/comedyGenerator index cdb109a..e2acc2f 100755 --- a/comedyGenerator +++ b/comedyGenerator @@ -1,32 +1,53 @@ #!/usr/bin/python3 import requests import urllib.request -import sys import os import argparse import time import hashlib +import configparser from multiprocessing import Pool +import xdg +import configparser +import os - +config = configparser.ConfigParser() +configdir = str(xdg.XDG_CONFIG_HOME) +config.read(configdir + "/comedyGenerator") parser = argparse.ArgumentParser(add_help=True) -parser.add_argument('--debug', +parser.add_argument('--verbose', '-v', default=False, action='store_true', - dest='debug', - help='Enables debug output' + dest='verbose', + help='Enables verbose output' ) parser.add_argument('--amount','-a', - default=0, + default=10, dest='amount', action="store", help="the amount of funnies you'd like to download per tag", type=int ) +parser.add_argument('--output','-o', + default=config['DEFAULT']['output'], + dest='output', + action="store", + help="the output directory of the funnies", + type=str + ) + +parser.add_argument('--log','-l', + default=config['DEFAULT']['log'], + dest='log', + action="store", + help="the output directory of the log file for your funnies", + type=str + ) + parser.add_argument('--jobs','-j', default=1, dest='jobs', @@ -43,6 +64,16 @@ parser.add_argument('tags', args = parser.parse_args() +#string comparision so I don't need two vars; bloat kills bloat +logging = False +if not args.log == "False": + logging = True + logFile = open(args.log, "a") + tmpLog = [] + if args.verbose: + print("We're going to be logging!") + print("Logging Path: " + args.log) + headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", @@ -71,8 +102,16 @@ for tags in args.tags: "Sec-Fetch-Site": "cross-site", "Cache-Control": "max-age=0" } - - master = requests.get('https://ifunny.co/', headers=headers) + + for tries in range(100): + try: + master = requests.get('https://ifunny.co/', headers=headers) + except: + if tries < 100 - 1: + print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") + time.sleep(tries*1.5) + continue + break combineHeader = (dict(master.headers)|headers) requestHeader = { "User-Agent":combineHeader['User-Agent'], @@ -91,7 +130,7 @@ for tags in args.tags: for tries in range(100): try: tagPage = requests.get("https://ifunny.co/api/v1/feeds?filter=video&tag=" + tags, headers=requestHeader, cookies=requestCookies) - if args.debug: + if args.verbose: print("Got Webpage!") except: if tries < 100 - 1: @@ -109,7 +148,7 @@ for tags in args.tags: try: tagPage = requests.get("https://ifunny.co/api/v1/feeds?filter=video&tag=" + tags + "&next=" + JSONDump['pagination']['next'], headers=requestHeader, cookies=requestCookies) JSONDump = tagPage.json() - if args.debug: + if args.verbose: print("Got New Tag Page!") break except: @@ -119,41 +158,46 @@ for tags in args.tags: continue if len(videos) > args.amount: videos = videos[:args.amount] - if args.debug: + if args.verbose: print("Videos list truncated! Its now: " + str(len(videos)) + " units long") def download(video): - if args.debug: + if args.verbose: print("Now running for " + str(video)) name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" - path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name - if args.debug: + outputPath = args.output + name + if args.verbose: print("name read as: " + name) - if os.path.exists(path): + if os.path.exists(outputPath): print(name + " already exists!") else: print("saving " + video + " as " + name) for tries in range(100): try: - urllib.request.urlretrieve(video, path) + urllib.request.urlretrieve(video, outputPath) except: if tries < 100 - 1: print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") time.sleep(tries*1.5) continue break - if args.jobs > 1: pool = Pool(args.jobs) for video in videos: pool.apply_async(download, (video,)) pool.close() pool.join() + if logging: + for video in videos: + logName = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" + logPath = args.output + logName + if args.verbose: + print("Writing " + logPath + " to log file") + logFile.write(logPath + "\n") + logFile.close() else: for video in videos: download(video) - - #This program is free software: you can redistribute it and/or modify #it under the terms of the GNU Affero General Public License version 3 as published by #the Free Software Foundation. diff --git a/config b/config new file mode 100644 index 0000000..a41902d --- /dev/null +++ b/config @@ -0,0 +1,8 @@ +#Move this to ~/.config/comedyGenerator +[DEFAULT] + +#Dictates where videos will be outputted to +output=./ + +#Location of the log file, if you care about that. False will disable the log file +log=False diff --git a/geckodriver.log b/geckodriver.log deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 6f555f5..0000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -beautifulsoup4 -requests -selenium -urllib3 -- cgit v1.2.3