From de8ed62c6e86d1a03f69fc6846a1657b1c90c753 Mon Sep 17 00:00:00 2001 From: msglm Date: Sat, 6 Apr 2024 00:59:51 -0500 Subject: python rewrite --- .gitignore | 1 + setup.py | 11 ++- src/__init__.py | 14 ---- stashley | 39 ----------- stashley/__init__.py | 0 stashley/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 2364 bytes stashley/stashley.py | 95 ++++++++++++++++++++++++++ 7 files changed, 104 insertions(+), 56 deletions(-) create mode 100644 .gitignore delete mode 100644 src/__init__.py delete mode 100755 stashley create mode 100644 stashley/__init__.py create mode 100644 stashley/__pycache__/__init__.cpython-310.pyc create mode 100644 stashley/stashley.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a41d03e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +manifest.scm diff --git a/setup.py b/setup.py index 97abf83..d2c629e 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup +from setuptools import setup, find_packages setup( name='stashley', @@ -6,6 +6,11 @@ setup( description='a program used for archiving personalities using TOML files', author='msglm', author_email='msglm@techchud.xyz', - packages=['src'], #same as name - install_requires=['yt-dlp', 'gallery-dl'], #external packages as dependencies + url="https://git.techchud.xyz/stashley/", + packages=find_packages(), #same as name + install_requires=['yt-dlp', 'gallery-dl', 'xdg'], #external packages as dependencies + entry_points=''' + [console_scripts] + stashley=stashley.stashley:args_parser + ''' ) diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index 500b322..0000000 --- a/src/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -import tomllib -import argparse - -parser = argparse.ArgumentParser( - prog='Stashley', - description='a program used for archiving personalities using TOML files' - ) -parser.add_argument('filename', type=string, nargs=1, help="Toml file to process") - -args = parser.parse_args() - -input = tomllib.loads(args.filename) - - diff --git a/stashley b/stashley deleted file mode 100755 index 0bc2497..0000000 --- a/stashley +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh -tomlq -r '.|keys_unsorted[]' "$1" | while IFS= read -r site; -do - if [ "$site" != "name" ] && [ "$site" != "IPFS" ] - then - tomlq -r ".$site.urls[]" "$1" | while IFS= read -r url; - do - case "$site" in - youtube) - yt-dlp --write-info-json --write-description --write-thumbnail --write-annotations --all-subs --ignore-error -f 'bestvideo[height>=720][fps>=60]+bestaudio/bestvideo+bestaudio/best' -o 'YouTube/%(uploader|author)s/%(title)s.%(ext)s' "$url" - ;; - tiktok) - yt-dlp --write-info-json --write-description --write-thumbnail --write-annotations --all-subs --ignore-error -f 'bestvideo[height>=720][fps>=60]+bestaudio/bestvideo+bestaudio/best' -o 'TikTok/%(uploader|author)s/%(title)s.%(ext)s' "$url" - ;; - linktree) - fixedurlname=$(echo "$url"|sed -e "s/\//∕/g") - wget -r -l 3 -e "robots=off" --warc-file="$(date +%s) - $fixedurlname" "$url" - ;; - website) - fixedurlname=$(echo "$url"|sed -e "s/\//∕/g") - wget -e "robots=off" --mirror --warc-file="$(date +%s) - $fixedurlname" "$url" - ;; - misskey) - gallery-dl --write-metadata "misskey:$url" - ;; - *) - gallery-dl --write-metadata "$url" - ;; - esac - done - fi -done - -fdupes -dIr . - -if [ "$(tomlq -r '.IPFS' "$1")" = "true" ] -then -ipfs-archive-manager personality-archive "$(tomlq -r '.name' "$1")" . -fi diff --git a/stashley/__init__.py b/stashley/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stashley/__pycache__/__init__.cpython-310.pyc b/stashley/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..b658025 Binary files /dev/null and b/stashley/__pycache__/__init__.cpython-310.pyc differ diff --git a/stashley/stashley.py b/stashley/stashley.py new file mode 100644 index 0000000..e678e79 --- /dev/null +++ b/stashley/stashley.py @@ -0,0 +1,95 @@ + +import tomli + +import argparse +import yt_dlp +import gallery_dl +import os +from datetime import datetime + +gallery_dl.config.set(('extractor',),"base-directory", './') + +parser = argparse.ArgumentParser( + prog='Stashley', + description='a program used for archiving personalities using TOML files' + ) +parser.add_argument('filename', type=str, nargs='?', help="Toml file to process") + +args = parser.parse_args() + +input = tomli.load(open(args.filename, 'rb')) + +def legalfy(url: str) -> str: + return ''.join(letter for letter in url if letter.isalnum()) + + +def figureUse(lore: dict, keyname: str) -> str: + if "use" in lore: + match lore["use"]: + case "yt-dlp": + return "ytdl" + case "ytdlp": + return "ytdl" + case "youtube-dl": + return "ytdl" + case "yt-dl": + return "ytdl" + case "website": + return "website" + case "site": + return "website" + return "gallerydl" + else: + match keyname.lower(): + case "youtube": + return "ytdl" + case "website": + return "website" + case "site": + return "website" + return "gallerydl" + +def videohost(sitename: str, urls: list[str]): + opts = {'allsubtitles': True, + 'extract_flat': 'discard_in_playlist', + 'format': 'bestvideo[height>=720][fps>=60]+bestaudio/bestvideo+bestaudio/best', + 'fragment_retries': 10, + 'ignoreerrors': True, + 'outtmpl': {'default': 'YouTube/%(uploader|author)s/%(title)s.%(ext)s'}, + 'postprocessors': [{'key': 'FFmpegConcat', + 'only_multi_video': True, + 'when': 'playlist'}], + 'retries': 10, + 'writeannotations': True, + 'writedescription': True, + 'writeinfojson': True, + 'writesubtitles': True, + 'writethumbnail': True} + + with yt_dlp.YoutubeDL(opts) as ydl: + ydl.download(urls) + +def gallery(urls: list[str]): + for url in urls: + gallery_dl.job.DownloadJob(url).run() + +def archivesite(urls: list[str]): + os.makedirs('websites', exist_ok=True) + for url in urls: + unix_timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds()) + os.system('wget -E -H -k -p --delete-after --warc-file=websites/"' + str(unix_timestamp) + " - " + legalfy(url) + '" "' + url + '"') + +for key in input["sites"]: + print("Downloading for " + key) + + program = figureUse(input["sites"][key], key) + + print("Opting to use: " + program) + match program: + case "ytdl": + videohost(key, input["sites"][key]["urls"]) + case "gallerydl": + gallery(input["sites"][key]["urls"]) + case "website": + archivesite(input["sites"][key]["urls"]) + -- cgit v1.2.3