diff options
| -rwxr-xr-x | comedyGenerator | 66 | 
1 files changed, 39 insertions, 27 deletions
| diff --git a/comedyGenerator b/comedyGenerator index 58bff1e..9a7ca5e 100755 --- a/comedyGenerator +++ b/comedyGenerator @@ -6,7 +6,7 @@ import os  import argparse  import time  import hashlib -import json +from multiprocessing import Pool @@ -34,6 +34,14 @@ parser.add_argument('--amount','-a',          type=int          ) +parser.add_argument('--jobs','-j', +        default=1, +        dest='jobs', +        action="store", +        help="how many jobs you'd like to use while downloading", +        type=int +        ) +  parser.add_argument('tags',          nargs='+',          type=str, @@ -104,7 +112,7 @@ for tags in args.tags:              break          JSONDump = tagPage.json() -        while len(videos) < args.amount: +        while len(videos) < args.amount and len(JSONDump['items']) > 0:              print("Currently have " + str(len(videos)) + " videos out of " + str(args.amount) + " (" + str((len(videos)/args.amount)*100) + "%)")              for item in range(len(JSONDump['items'])):                  videos.append(JSONDump['items'][item]['url']) @@ -124,34 +132,38 @@ for tags in args.tags:              videos = videos[:args.amount]              if args.debug:                  print("Videos list truncated! Its now: " + str(len(videos)) + " units long") -        for video in videos: + +        def download(video):              if args.debug:                  print("Now running for " + str(video)) +            name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" +            path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name +            if args.debug: +                print("name read as: " + name) +            if os.path.exists(path): +                print(name + " already exists!") +            else: +                print("saving " + video + " as " + name) +                for tries in range(100): +                    try: +                        urllib.request.urlretrieve(video, path)  +                    except: +                        if tries < 100 - 1: +                            print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") +                            time.sleep(tries*1.5) +                        continue +                    break +         +        if args.jobs > 1: +            pool = Pool(args.jobs) +            for video in videos: +                pool.apply_async(download, (video,)) +            pool.close() +            pool.join() +        else: +            for video in videos: +                download(video) -            if isinstance(video, str): -                if args.debug: -                    print("URL read as: " + video) -                name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" -                path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name -                if args.debug: -                    print("name read as: " + name) -                if os.path.exists(path): -                    print(name + " already exists!") -                else: -                    print("saving " + video + " as " + name) -                    for tries in range(100): -                        try: -                            urllib.request.urlretrieve(video, path)  -                        except: -                            if tries < 100 - 1: -                                print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") -                                time.sleep(tries*1.5) -                            continue -                        break -                #urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)  - -            elif args.debug: -                print("URL is NOT a string, it is a " + str(type(video)))  #This program is free software: you can redistribute it and/or modify  #it under the terms of the GNU Affero General Public License version 3 as published by | 
