summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormsglm <msglm@techchud.xyz>2023-01-11 20:51:08 -0600
committermsglm <msglm@techchud.xyz>2023-01-11 20:51:08 -0600
commitce1dc58968255edcda797948687b5c489ef9843e (patch)
tree82d3f748fa13b44c44e708d1ea8ea85b5fd83fa7
parent126910409154457cbdbab1948bad6153b897a522 (diff)
downloadcomedyGenerator-ce1dc58968255edcda797948687b5c489ef9843e.tar.gz
comedyGenerator-ce1dc58968255edcda797948687b5c489ef9843e.tar.bz2
comedyGenerator-ce1dc58968255edcda797948687b5c489ef9843e.zip
program is now capable of multiprocessing
-rwxr-xr-xcomedyGenerator66
1 files changed, 39 insertions, 27 deletions
diff --git a/comedyGenerator b/comedyGenerator
index 58bff1e..9a7ca5e 100755
--- a/comedyGenerator
+++ b/comedyGenerator
@@ -6,7 +6,7 @@ import os
import argparse
import time
import hashlib
-import json
+from multiprocessing import Pool
@@ -34,6 +34,14 @@ parser.add_argument('--amount','-a',
type=int
)
+parser.add_argument('--jobs','-j',
+ default=1,
+ dest='jobs',
+ action="store",
+ help="how many jobs you'd like to use while downloading",
+ type=int
+ )
+
parser.add_argument('tags',
nargs='+',
type=str,
@@ -104,7 +112,7 @@ for tags in args.tags:
break
JSONDump = tagPage.json()
- while len(videos) < args.amount:
+ while len(videos) < args.amount and len(JSONDump['items']) > 0:
print("Currently have " + str(len(videos)) + " videos out of " + str(args.amount) + " (" + str((len(videos)/args.amount)*100) + "%)")
for item in range(len(JSONDump['items'])):
videos.append(JSONDump['items'][item]['url'])
@@ -124,34 +132,38 @@ for tags in args.tags:
videos = videos[:args.amount]
if args.debug:
print("Videos list truncated! Its now: " + str(len(videos)) + " units long")
- for video in videos:
+
+ def download(video):
if args.debug:
print("Now running for " + str(video))
+ name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4"
+ path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name
+ if args.debug:
+ print("name read as: " + name)
+ if os.path.exists(path):
+ print(name + " already exists!")
+ else:
+ print("saving " + video + " as " + name)
+ for tries in range(100):
+ try:
+ urllib.request.urlretrieve(video, path)
+ except:
+ if tries < 100 - 1:
+ print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!")
+ time.sleep(tries*1.5)
+ continue
+ break
+
+ if args.jobs > 1:
+ pool = Pool(args.jobs)
+ for video in videos:
+ pool.apply_async(download, (video,))
+ pool.close()
+ pool.join()
+ else:
+ for video in videos:
+ download(video)
- if isinstance(video, str):
- if args.debug:
- print("URL read as: " + video)
- name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4"
- path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name
- if args.debug:
- print("name read as: " + name)
- if os.path.exists(path):
- print(name + " already exists!")
- else:
- print("saving " + video + " as " + name)
- for tries in range(100):
- try:
- urllib.request.urlretrieve(video, path)
- except:
- if tries < 100 - 1:
- print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!")
- time.sleep(tries*1.5)
- continue
- break
- #urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)
-
- elif args.debug:
- print("URL is NOT a string, it is a " + str(type(video)))
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU Affero General Public License version 3 as published by