python rewrite

author: msglm <msglm@techchud.xyz> 2024-04-06 00:59:51 -0500
committer: msglm <msglm@techchud.xyz> 2024-04-06 00:59:51 -0500
commit: de8ed62c6e86d1a03f69fc6846a1657b1c90c753 (patch)
tree: 4f8cb34eeb44148639cf43ae07e64d9c64802fe3
parent: 6d1e2f05f1c81f56951a2e890115b8e8bfb37e80 (diff)
download: stashley-de8ed62c6e86d1a03f69fc6846a1657b1c90c753.tar.gz
stashley-de8ed62c6e86d1a03f69fc6846a1657b1c90c753.tar.bz2
stashley-de8ed62c6e86d1a03f69fc6846a1657b1c90c753.zip
7 files changed, 104 insertions, 56 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a41d03e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+manifest.scm
diff --git a/setup.py b/setup.py
index 97abf83..d2c629e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-from setuptools import setup
+from setuptools import setup, find_packages
 
 setup(
    name='stashley',
@@ -6,6 +6,11 @@ setup(
    description='a program used for archiving personalities using TOML files',
    author='msglm',
    author_email='msglm@techchud.xyz',
-   packages=['src'],  #same as name
-   install_requires=['yt-dlp', 'gallery-dl'], #external packages as dependencies
+   url="https://git.techchud.xyz/stashley/",
+   packages=find_packages(),  #same as name
+   install_requires=['yt-dlp', 'gallery-dl', 'xdg'], #external packages as dependencies
+    entry_points='''
+        [console_scripts]
+        stashley=stashley.stashley:args_parser
+    '''
    )
diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index 500b322..0000000
--- a/src/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import tomllib
-import argparse
-
-parser = argparse.ArgumentParser(
-                    prog='Stashley',
-                    description='a program used for archiving personalities using TOML files'
-                    )
-parser.add_argument('filename', type=string, nargs=1, help="Toml file to process")
-
-args = parser.parse_args()
-
-input = tomllib.loads(args.filename)
-
-
diff --git a/stashley b/stashley
deleted file mode 100755
index 0bc2497..0000000
--- a/stashley
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/sh
-tomlq -r '.|keys_unsorted[]' "$1"  | while IFS= read -r site; 
-do 
-    if [ "$site" != "name" ] && [ "$site" != "IPFS" ]
-    then
-        tomlq -r ".$site.urls[]" "$1" | while IFS= read -r url;
-    do
-        case "$site" in
-            youtube)
-                yt-dlp --write-info-json --write-description --write-thumbnail --write-annotations --all-subs --ignore-error -f 'bestvideo[height>=720][fps>=60]+bestaudio/bestvideo+bestaudio/best' -o 'YouTube/%(uploader|author)s/%(title)s.%(ext)s' "$url"
-                ;;
-            tiktok)
-                yt-dlp --write-info-json --write-description --write-thumbnail --write-annotations --all-subs --ignore-error -f 'bestvideo[height>=720][fps>=60]+bestaudio/bestvideo+bestaudio/best' -o 'TikTok/%(uploader|author)s/%(title)s.%(ext)s' "$url"
-                ;;
-            linktree)
-                fixedurlname=$(echo "$url"|sed -e "s/\//∕/g")
-                wget -r -l 3 -e "robots=off" --warc-file="$(date +%s) - $fixedurlname" "$url"
-                ;;
-            website)
-                fixedurlname=$(echo "$url"|sed -e "s/\//∕/g")
-                wget -e "robots=off" --mirror --warc-file="$(date +%s) - $fixedurlname" "$url"
-                ;;
-            misskey)
-                gallery-dl --write-metadata "misskey:$url"
-                ;;
-            *)
-                gallery-dl --write-metadata "$url"
-                ;;
-        esac
-    done
-    fi
-done
-
-fdupes -dIr .
-
-if [ "$(tomlq -r '.IPFS' "$1")" = "true" ]
-then
-ipfs-archive-manager personality-archive "$(tomlq -r '.name' "$1")" .
-fi
diff --git a/stashley/__init__.py b/stashley/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/stashley/__init__.py
diff --git a/stashley/__pycache__/__init__.cpython-310.pyc b/stashley/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..b658025
--- /dev/null
+++ b/stashley/__pycache__/__init__.cpython-310.pyc
diff --git a/stashley/stashley.py b/stashley/stashley.py
new file mode 100644
index 0000000..e678e79
--- /dev/null
+++ b/stashley/stashley.py
@@ -0,0 +1,95 @@
+
+import tomli
+
+import argparse
+import yt_dlp
+import gallery_dl
+import os
+from datetime import datetime
+
+gallery_dl.config.set(('extractor',),"base-directory", './')
+
+parser = argparse.ArgumentParser(
+                    prog='Stashley',
+                    description='a program used for archiving personalities using TOML files'
+                    )
+parser.add_argument('filename', type=str, nargs='?', help="Toml file to process")
+
+args = parser.parse_args()
+
+input = tomli.load(open(args.filename, 'rb'))
+
+def legalfy(url: str) -> str:
+    return ''.join(letter for letter in url if letter.isalnum())
+
+
+def figureUse(lore: dict, keyname: str) -> str:
+    if "use" in lore:
+        match lore["use"]:
+            case "yt-dlp":
+                return "ytdl"
+            case "ytdlp":
+                return "ytdl"
+            case "youtube-dl":
+                return "ytdl"
+            case "yt-dl":
+                return "ytdl"
+            case "website":
+                return "website"
+            case "site":
+                return "website"
+        return "gallerydl"
+    else:
+        match keyname.lower():
+            case "youtube":
+                return "ytdl"
+            case "website":
+                return "website"
+            case "site":
+                return "website"
+        return "gallerydl"
+
+def videohost(sitename: str, urls: list[str]):
+    opts = {'allsubtitles': True,
+ 'extract_flat': 'discard_in_playlist',
+ 'format': 'bestvideo[height>=720][fps>=60]+bestaudio/bestvideo+bestaudio/best',
+ 'fragment_retries': 10,
+ 'ignoreerrors': True,
+ 'outtmpl': {'default': 'YouTube/%(uploader|author)s/%(title)s.%(ext)s'},
+ 'postprocessors': [{'key': 'FFmpegConcat',
+                     'only_multi_video': True,
+                     'when': 'playlist'}],
+ 'retries': 10,
+ 'writeannotations': True,
+ 'writedescription': True,
+ 'writeinfojson': True,
+ 'writesubtitles': True,
+ 'writethumbnail': True}
+
+    with yt_dlp.YoutubeDL(opts) as ydl:
+        ydl.download(urls)
+
+def gallery(urls: list[str]):
+    for url in urls:
+        gallery_dl.job.DownloadJob(url).run()
+
+def archivesite(urls: list[str]):
+    os.makedirs('websites', exist_ok=True)
+    for url in urls:
+        unix_timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
+        os.system('wget -E -H -k -p --delete-after --warc-file=websites/"' + str(unix_timestamp) + " - " + legalfy(url) + '" "' + url + '"')
+
+for key in input["sites"]:
+    print("Downloading for " + key)
+
+    program = figureUse(input["sites"][key], key)
+
+    print("Opting to use: " + program)
+    match program:
+        case "ytdl":
+            videohost(key, input["sites"][key]["urls"])
+        case "gallerydl":
+            gallery(input["sites"][key]["urls"])
+        case "website":
+            archivesite(input["sites"][key]["urls"])
+
author	msglm <msglm@techchud.xyz>	2024-04-06 00:59:51 -0500
committer	msglm <msglm@techchud.xyz>	2024-04-06 00:59:51 -0500
commit	de8ed62c6e86d1a03f69fc6846a1657b1c90c753 (patch)
tree	4f8cb34eeb44148639cf43ae07e64d9c64802fe3
parent	6d1e2f05f1c81f56951a2e890115b8e8bfb37e80 (diff)
download	stashley-de8ed62c6e86d1a03f69fc6846a1657b1c90c753.tar.gz stashley-de8ed62c6e86d1a03f69fc6846a1657b1c90c753.tar.bz2 stashley-de8ed62c6e86d1a03f69fc6846a1657b1c90c753.zip