From bf5eca28c394a24b5f44c686be7ff3393c477e6d Mon Sep 17 00:00:00 2001 From: boyska Date: Sun, 21 Jun 2020 12:56:53 +0200 Subject: [PATCH 1/3] audiogen http: download factored in fsutils --- larigira/audiogen_http.py | 33 +++------------------------------ larigira/fsutils.py | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/larigira/audiogen_http.py b/larigira/audiogen_http.py index 3d35ccd..746d9ee 100644 --- a/larigira/audiogen_http.py +++ b/larigira/audiogen_http.py @@ -1,31 +1,4 @@ -import os -import logging -import posixpath -from tempfile import mkstemp -import urllib.request -from urllib.parse import urlparse - -log = logging.getLogger(__name__) - - -def put(url, destdir=None, copy=False): - if url.split(":")[0] not in ("http", "https"): - log.warning("Not a valid URL: %s", url) - return None - ext = url.split(".")[-1] - if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"): - log.warning('Invalid format (%s) for "%s"', ext, url) - return None - if not copy: - return url - fname = posixpath.basename(urlparse(url).path) - # sanitize - fname = "".join(c for c in fname if c.isalnum() or c in list("._-")).rstrip() - tmp = mkstemp(suffix="." + ext, prefix="http-%s-" % fname, dir=destdir) - os.close(tmp[0]) - log.info("downloading %s -> %s", url, tmp[1]) - fname, headers = urllib.request.urlretrieve(url, tmp[1]) - return "file://%s" % os.path.realpath(tmp[1]) +from larigira.fsutils import download_http def generate(spec): @@ -35,10 +8,10 @@ def generate(spec): Recognized argument is "paths" (list of static paths) """ if "urls" not in spec: - raise ValueError("Malformed audiospec: missing 'paths'") + raise ValueError("Malformed audiospec: missing 'urls'") for url in spec["urls"]: - ret = put(url, copy=True) + ret = download_http(url, copy=True, prefix="http") if ret is None: continue yield ret diff --git a/larigira/fsutils.py b/larigira/fsutils.py index 9eae11c..95b8a2e 100644 --- a/larigira/fsutils.py +++ b/larigira/fsutils.py @@ -1,6 +1,13 @@ -import os import fnmatch +import logging import mimetypes +import os +import posixpath +import urllib.request +from tempfile import mkstemp +from urllib.parse import urlparse + +log = logging.getLogger(__name__) def scan_dir(dirname, extension=None): @@ -37,3 +44,27 @@ def shortname(path): name = name.rsplit(".", 1)[0] # no extension name = "".join(c for c in name if c.isalnum()) # no strange chars return name + + +def download_http(url, destdir=None, copy=False, prefix="httpdl"): + if url.split(":")[0] not in ("http", "https"): + log.warning("Not a valid URL: %s", url) + return None + ext = url.split(".")[-1] + if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"): + log.warning('Invalid format (%s) for "%s"', ext, url) + return None + if not copy: + return url + fname = posixpath.basename(urlparse(url).path) + # sanitize + fname = "".join( + c for c in fname if c.isalnum() or c in list("._-") + ).rstrip() + tmp = mkstemp( + suffix="." + ext, prefix="%s-%s-" % (prefix, fname), dir=destdir + ) + os.close(tmp[0]) + log.info("downloading %s -> %s", url, tmp[1]) + fname, headers = urllib.request.urlretrieve(url, tmp[1]) + return "file://%s" % os.path.realpath(tmp[1]) From 1fbe659fc1babfd68b9e7cc26b68da08933958a3 Mon Sep 17 00:00:00 2001 From: boyska Date: Sun, 21 Jun 2020 12:57:59 +0200 Subject: [PATCH 2/3] add podcast audiogen --- larigira/audioform_podcast.py | 63 ++++++++++++ larigira/audiogen_podcast.py | 184 ++++++++++++++++++++++++++++++++++ setup.py | 3 + 3 files changed, 250 insertions(+) create mode 100644 larigira/audioform_podcast.py create mode 100644 larigira/audiogen_podcast.py diff --git a/larigira/audioform_podcast.py b/larigira/audioform_podcast.py new file mode 100644 index 0000000..1b364db --- /dev/null +++ b/larigira/audioform_podcast.py @@ -0,0 +1,63 @@ +from flask_wtf import Form +from wtforms import (BooleanField, IntegerField, SelectField, StringField, + SubmitField, validators) +from wtforms.fields.html5 import URLField + + +class AudioForm(Form): + nick = StringField( + "Audio nick", + validators=[validators.required()], + description="A simple name to recognize this audio", + ) + url = URLField( + "URL", + validators=[validators.required()], + description="URL of the podcast; it must be valid xml", + ) + + # TODO: group by filters/sort/select + min_len = StringField( + "Accetta solo audio lunghi almeno:", + description="Leaving this empty will disable this filter", + ) + max_len = StringField( + "Accetta solo audio lunghi al massimo:", + description="Leaving this empty will disable this filter", + ) + sort_by = SelectField( + "Sort episodes", + choices=[ + ("none", "Don't sort"), + ("random", "Random"), + ("duration", "Duration"), + ("date", "date"), + ], + ) + start = IntegerField( + "Play from episode number", + description="Episodes count from 0; 0 is a sane default", + ) + reverse = BooleanField("Reverse sort (descending)") + submit = SubmitField("Submit") + + def populate_from_audiospec(self, audiospec): + for key in ("nick", "url", "sort_by", "reverse", "min_len", "max_len"): + if key in audiospec: + getattr(self, key).data = audiospec[key] + self.start.data = int(audiospec.get("start", 0)) + + +def audio_receive(form): + d = {"kind": "podcast"} + for key in ( + "nick", + "url", + "sort_by", + "reverse", + "min_len", + "max_len", + "start", + ): + d[key] = getattr(form, key).data + return d diff --git a/larigira/audiogen_podcast.py b/larigira/audiogen_podcast.py new file mode 100644 index 0000000..b18d738 --- /dev/null +++ b/larigira/audiogen_podcast.py @@ -0,0 +1,184 @@ +import datetime +import logging +import os +import random +import sys +from subprocess import CalledProcessError, check_output + +import requests + +from larigira.fsutils import download_http +from lxml import html +from pytimeparse.timeparse import timeparse + + +def delta_humanreadable(tdelta): + if tdelta is None: + return "" + days = tdelta.days + hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600 + if days: + return "{}d{}h".format(days, hours) + return "{}h".format(hours) + + +def get_duration(url): + try: + lineout = check_output( + [ + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-i", + url, + ] + ).split(b"\n") + except CalledProcessError as exc: + raise ValueError("error probing `%s`" % url) from exc + duration = next(l for l in lineout if l.startswith(b"duration=")) + value = duration.split(b"=")[1] + return int(float(value)) + + +class Audio(object): + def __init__(self, url, duration=None, date=None): + self.url = url + if duration is None: + duration = get_duration(url.encode("utf-8")) + self.duration = duration + self.date = date + self.end_date = datetime.datetime( + 9999, 12, 31, tzinfo=datetime.timezone.utc + ) + + def __str__(self): + return self.url + + def __repr__(self): + return "