From ce1dc58968255edcda797948687b5c489ef9843e Mon Sep 17 00:00:00 2001 From: msglm Date: Wed, 11 Jan 2023 20:51:08 -0600 Subject: program is now capable of multiprocessing --- comedyGenerator | 66 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/comedyGenerator b/comedyGenerator index 58bff1e..9a7ca5e 100755 --- a/comedyGenerator +++ b/comedyGenerator @@ -6,7 +6,7 @@ import os import argparse import time import hashlib -import json +from multiprocessing import Pool @@ -34,6 +34,14 @@ parser.add_argument('--amount','-a', type=int ) +parser.add_argument('--jobs','-j', + default=1, + dest='jobs', + action="store", + help="how many jobs you'd like to use while downloading", + type=int + ) + parser.add_argument('tags', nargs='+', type=str, @@ -104,7 +112,7 @@ for tags in args.tags: break JSONDump = tagPage.json() - while len(videos) < args.amount: + while len(videos) < args.amount and len(JSONDump['items']) > 0: print("Currently have " + str(len(videos)) + " videos out of " + str(args.amount) + " (" + str((len(videos)/args.amount)*100) + "%)") for item in range(len(JSONDump['items'])): videos.append(JSONDump['items'][item]['url']) @@ -124,34 +132,38 @@ for tags in args.tags: videos = videos[:args.amount] if args.debug: print("Videos list truncated! Its now: " + str(len(videos)) + " units long") - for video in videos: + + def download(video): if args.debug: print("Now running for " + str(video)) + name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" + path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name + if args.debug: + print("name read as: " + name) + if os.path.exists(path): + print(name + " already exists!") + else: + print("saving " + video + " as " + name) + for tries in range(100): + try: + urllib.request.urlretrieve(video, path) + except: + if tries < 100 - 1: + print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") + time.sleep(tries*1.5) + continue + break + + if args.jobs > 1: + pool = Pool(args.jobs) + for video in videos: + pool.apply_async(download, (video,)) + pool.close() + pool.join() + else: + for video in videos: + download(video) - if isinstance(video, str): - if args.debug: - print("URL read as: " + video) - name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" - path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name - if args.debug: - print("name read as: " + name) - if os.path.exists(path): - print(name + " already exists!") - else: - print("saving " + video + " as " + name) - for tries in range(100): - try: - urllib.request.urlretrieve(video, path) - except: - if tries < 100 - 1: - print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") - time.sleep(tries*1.5) - continue - break - #urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name) - - elif args.debug: - print("URL is NOT a string, it is a " + str(type(video))) #This program is free software: you can redistribute it and/or modify #it under the terms of the GNU Affero General Public License version 3 as published by -- cgit v1.2.3