now works with javascript

author: msglm <msglm@techchud.xyz> 2023-01-11 20:51:06 -0600
committer: msglm <msglm@techchud.xyz> 2023-01-11 20:51:06 -0600
commit: 90ed90eceb1b92f9d75c935dcf4f70c60289f49c (patch)
tree: 69f01669547ecc3b8e7698a720870130f2979824
parent: 2db2c6857c1d8d6537b0c8862bedcf14c2ef4621 (diff)
download: comedyGenerator-90ed90eceb1b92f9d75c935dcf4f70c60289f49c.tar.gz
comedyGenerator-90ed90eceb1b92f9d75c935dcf4f70c60289f49c.tar.bz2
comedyGenerator-90ed90eceb1b92f9d75c935dcf4f70c60289f49c.zip
2 files changed, 205 insertions, 13 deletions
diff --git a/comedyGenerator b/comedyGenerator
new file mode 100755
index 0000000..0b21710
--- /dev/null
+++ b/comedyGenerator
@@ -0,0 +1,105 @@
+#!/usr/bin/python3
+import requests
+import urllib.request
+import sys
+import os
+import argparse
+import time
+
+parser = argparse.ArgumentParser(add_help=True)
+
+parser.add_argument('-js','-JS',"--Javascript",'--javascript',
+                    default=False,
+                    action='store_true',
+                    dest='usingJavascript',
+                    help='Enables the user of a webdriver to scrape funnies'
+                    )
+
+parser.add_argument('--amount','-a',
+                    default=0,
+                    dest='amount',
+                    action="store",
+                    help="the amount of funnies you'd like to download per tag",
+                    type=int
+                    )
+
+parser.add_argument('tags',
+                    nargs='+',
+                    type=str,
+                    help='Provides tags to be check for funny downloading'
+                    )
+
+args = parser.parse_args()
+
+if not args.usingJavascript:
+    from bs4 import BeautifulSoup
+    print("using html mode...")
+    for tags in args.tags:
+        URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video')
+        webpage = BeautifulSoup(URL.content, 'html.parser')
+        videos = webpage.findAll('video')
+        epoch = len([item for item in os.listdir('/home/'+ os.environ['USER'] + '/Videos/unsorted/')])
+        if args.amount > 0:
+            videos = video[:arg.amount]
+        for video in videos:
+            epoch = epoch + 1
+            print(video['data-src'])
+            urllib.request.urlretrieve(video['data-src'], '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') 
+elif args.usingJavascript:
+    from selenium import webdriver
+    print("using JS mode...")
+    
+    try:
+        from selenium.webdriver.firefox.options import Options
+        firefox_options = Options()
+        firefox_options.add_argument("--headless")
+        driver = webdriver.Firefox(options=firefox_options)
+
+    except:
+            try:
+                from selenium.webdriver.chrome.options import Options
+                chrome_options = Options()
+                chrome_options.add_argument("--headless")
+                driver = webdriver.Chrome(options=chrome_options)
+            except:
+                    try:
+                        from selenium.webdriver.firefox.options import Options
+                        firefox_options = Options()
+                        firefox_options.add_argument("--headless")
+                        driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=firefox_options)
+                    except:
+                            try:
+                                from selenium.webdriver.chrome.options import Options
+                                chrome_options = Options()
+                                chrome_options.add_argument("--headless")
+                                driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options)
+                            except:
+                                    print("Could not find webdriver!")
+                                    print("You'll have to manually install a webdriver to your path")
+                                    print("If you are using GNU/Linux, it is likely that you can install from your standard repos. Debian labels their chromium driver chromium-driver. If you wish to use an ungoogled version of chromium (as to reduce possiblity of spying), you can find a link to that here: https://github.com/Eloston/ungoogled-chromium#downloads. On Debian (or debian likes such as Ubuntu or Devuan), you may then run apt install ungoogled-chromium-driver and this will no longer fail.")
+                                    sys.exit()
+    for tags in args.tags:
+        driver.get('https://ifunny.co/tags/' + tags + '?filter=video')
+        if args.amount > 0:
+            isTimesScrolled = 0
+            oughtTimeScrolled = (args.amount/10) + 1
+            while isTimesScrolled < int(oughtTimeScrolled):
+                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+                isTimesScrolled = isTimesScrolled + 1
+                time.sleep(3)
+        videos = driver.find_elements_by_tag_name("video")
+        if args.amount > 0:
+            videos = videos[:args.amount]
+        epoch = len([item for item in os.listdir('/home/'+ os.environ['USER'] + '/Videos/unsorted/')])
+        for video in videos:
+            print("saving " + video.get_attribute("data-src") + " as " + '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4')
+            urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') 
+        driver.quit()
+
+
+else:
+    print("Neither Javascript or HTML was given!")
+    sys.exit(1)
+    
+
+
diff --git a/stealIfunny b/stealIfunny
index aa57f3d..0b21710 100755
--- a/stealIfunny
+++ b/stealIfunny
@@ -1,18 +1,105 @@
 #!/usr/bin/python3
-from bs4 import BeautifulSoup
 import requests
 import urllib.request
 import sys
 import os
-if len(sys.argv) == 1:
-    print("List tags at the end you'd like to download")
-for tags in sys.argv:
-   URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video')
-   webpage = BeautifulSoup(URL.content, 'html.parser')
-
-   epoch = len([item for item in os.listdir('/home/joybuke/Media/Videos/unsorted/')])
-   videos = webpage.findAll('video')
-   for video in videos:
-       epoch = epoch + 1
-       print(video['data-src'])
-       urllib.request.urlretrieve(video['data-src'], '/home/joybuke/Media/Videos/unsorted/' + str(epoch) + '.mp4') 
+import argparse
+import time
+
+parser = argparse.ArgumentParser(add_help=True)
+
+parser.add_argument('-js','-JS',"--Javascript",'--javascript',
+                    default=False,
+                    action='store_true',
+                    dest='usingJavascript',
+                    help='Enables the user of a webdriver to scrape funnies'
+                    )
+
+parser.add_argument('--amount','-a',
+                    default=0,
+                    dest='amount',
+                    action="store",
+                    help="the amount of funnies you'd like to download per tag",
+                    type=int
+                    )
+
+parser.add_argument('tags',
+                    nargs='+',
+                    type=str,
+                    help='Provides tags to be check for funny downloading'
+                    )
+
+args = parser.parse_args()
+
+if not args.usingJavascript:
+    from bs4 import BeautifulSoup
+    print("using html mode...")
+    for tags in args.tags:
+        URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video')
+        webpage = BeautifulSoup(URL.content, 'html.parser')
+        videos = webpage.findAll('video')
+        epoch = len([item for item in os.listdir('/home/'+ os.environ['USER'] + '/Videos/unsorted/')])
+        if args.amount > 0:
+            videos = video[:arg.amount]
+        for video in videos:
+            epoch = epoch + 1
+            print(video['data-src'])
+            urllib.request.urlretrieve(video['data-src'], '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') 
+elif args.usingJavascript:
+    from selenium import webdriver
+    print("using JS mode...")
+    
+    try:
+        from selenium.webdriver.firefox.options import Options
+        firefox_options = Options()
+        firefox_options.add_argument("--headless")
+        driver = webdriver.Firefox(options=firefox_options)
+
+    except:
+            try:
+                from selenium.webdriver.chrome.options import Options
+                chrome_options = Options()
+                chrome_options.add_argument("--headless")
+                driver = webdriver.Chrome(options=chrome_options)
+            except:
+                    try:
+                        from selenium.webdriver.firefox.options import Options
+                        firefox_options = Options()
+                        firefox_options.add_argument("--headless")
+                        driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=firefox_options)
+                    except:
+                            try:
+                                from selenium.webdriver.chrome.options import Options
+                                chrome_options = Options()
+                                chrome_options.add_argument("--headless")
+                                driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options)
+                            except:
+                                    print("Could not find webdriver!")
+                                    print("You'll have to manually install a webdriver to your path")
+                                    print("If you are using GNU/Linux, it is likely that you can install from your standard repos. Debian labels their chromium driver chromium-driver. If you wish to use an ungoogled version of chromium (as to reduce possiblity of spying), you can find a link to that here: https://github.com/Eloston/ungoogled-chromium#downloads. On Debian (or debian likes such as Ubuntu or Devuan), you may then run apt install ungoogled-chromium-driver and this will no longer fail.")
+                                    sys.exit()
+    for tags in args.tags:
+        driver.get('https://ifunny.co/tags/' + tags + '?filter=video')
+        if args.amount > 0:
+            isTimesScrolled = 0
+            oughtTimeScrolled = (args.amount/10) + 1
+            while isTimesScrolled < int(oughtTimeScrolled):
+                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+                isTimesScrolled = isTimesScrolled + 1
+                time.sleep(3)
+        videos = driver.find_elements_by_tag_name("video")
+        if args.amount > 0:
+            videos = videos[:args.amount]
+        epoch = len([item for item in os.listdir('/home/'+ os.environ['USER'] + '/Videos/unsorted/')])
+        for video in videos:
+            print("saving " + video.get_attribute("data-src") + " as " + '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4')
+            urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + str(epoch) + '.mp4') 
+        driver.quit()
+
+
+else:
+    print("Neither Javascript or HTML was given!")
+    sys.exit(1)
+    
+
+
author	msglm <msglm@techchud.xyz>	2023-01-11 20:51:06 -0600
committer	msglm <msglm@techchud.xyz>	2023-01-11 20:51:06 -0600
commit	90ed90eceb1b92f9d75c935dcf4f70c60289f49c (patch)
tree	69f01669547ecc3b8e7698a720870130f2979824
parent	2db2c6857c1d8d6537b0c8862bedcf14c2ef4621 (diff)
download	comedyGenerator-90ed90eceb1b92f9d75c935dcf4f70c60289f49c.tar.gz comedyGenerator-90ed90eceb1b92f9d75c935dcf4f70c60289f49c.tar.bz2 comedyGenerator-90ed90eceb1b92f9d75c935dcf4f70c60289f49c.zip