diff --git a/doc/source/audiogenerators.rst b/doc/source/audiogenerators.rst index 229a09e..e0b15f8 100644 --- a/doc/source/audiogenerators.rst +++ b/doc/source/audiogenerators.rst @@ -33,6 +33,48 @@ mostrecent It is similar to randomdir, but instead of picking randomly, picks the most recent file (according to the ctime). +podcast +------------ + +This is probably the most powerful generator that comes included with +``larigira``. To use this generator, you would need to have a valid podcast +URL. Beware, here the world __podcast__ refer to its very specific meaning of +an xml-based format which resembles a RSS feed but has more media-specific +entities. See `this specification +`_ for +more technical details. + +So, if you have a valid podcast URL, larigira can look at it, extract audios, +and play the most recent one. Here are some typical usecases for this: + * You want to play replica based on what you host on your radio's website. + * You want to play some audio from some other radio (or other kind of podcast + source) + +The podcast form has many many options, but I promise you that 90% of the cases +are easily solved using ONLY the first option: enter the URL of the podcast +and... it works! + +So, what are all the other options for? Well, to cover some other use cases. + +For example, let's say that at night you want to play a *random* show (not the +last one, which is the default) that happened on your radio. Then you can +change the "sort by" to be "random". Easy, right? + +Another typical usecase is selecting an audio that has a duration which "fits" +with the schedule of your radio: not too long and not too short. You can do +that with the "min len" and "max len" fields. For example, setting a `min_len` +of `30min` and `max_len` of `1h15m` you can avoid picking flash news (too +short) and very long shows. + +You can do many other things with its options, but I left those to your +immagination. Let's just clarify the workflow: + + * the podcast URL is fetched and audio information is retrieved + * filter: audios are filtered by min/max length + * sort: audios are sorted according to `sort_by` and `reverse` + * select: the n-th episode is fetched, according to `start` field + + script -------- diff --git a/larigira/audioform_podcast.py b/larigira/audioform_podcast.py new file mode 100644 index 0000000..1b364db --- /dev/null +++ b/larigira/audioform_podcast.py @@ -0,0 +1,63 @@ +from flask_wtf import Form +from wtforms import (BooleanField, IntegerField, SelectField, StringField, + SubmitField, validators) +from wtforms.fields.html5 import URLField + + +class AudioForm(Form): + nick = StringField( + "Audio nick", + validators=[validators.required()], + description="A simple name to recognize this audio", + ) + url = URLField( + "URL", + validators=[validators.required()], + description="URL of the podcast; it must be valid xml", + ) + + # TODO: group by filters/sort/select + min_len = StringField( + "Accetta solo audio lunghi almeno:", + description="Leaving this empty will disable this filter", + ) + max_len = StringField( + "Accetta solo audio lunghi al massimo:", + description="Leaving this empty will disable this filter", + ) + sort_by = SelectField( + "Sort episodes", + choices=[ + ("none", "Don't sort"), + ("random", "Random"), + ("duration", "Duration"), + ("date", "date"), + ], + ) + start = IntegerField( + "Play from episode number", + description="Episodes count from 0; 0 is a sane default", + ) + reverse = BooleanField("Reverse sort (descending)") + submit = SubmitField("Submit") + + def populate_from_audiospec(self, audiospec): + for key in ("nick", "url", "sort_by", "reverse", "min_len", "max_len"): + if key in audiospec: + getattr(self, key).data = audiospec[key] + self.start.data = int(audiospec.get("start", 0)) + + +def audio_receive(form): + d = {"kind": "podcast"} + for key in ( + "nick", + "url", + "sort_by", + "reverse", + "min_len", + "max_len", + "start", + ): + d[key] = getattr(form, key).data + return d diff --git a/larigira/audiogen_http.py b/larigira/audiogen_http.py index 3d35ccd..746d9ee 100644 --- a/larigira/audiogen_http.py +++ b/larigira/audiogen_http.py @@ -1,31 +1,4 @@ -import os -import logging -import posixpath -from tempfile import mkstemp -import urllib.request -from urllib.parse import urlparse - -log = logging.getLogger(__name__) - - -def put(url, destdir=None, copy=False): - if url.split(":")[0] not in ("http", "https"): - log.warning("Not a valid URL: %s", url) - return None - ext = url.split(".")[-1] - if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"): - log.warning('Invalid format (%s) for "%s"', ext, url) - return None - if not copy: - return url - fname = posixpath.basename(urlparse(url).path) - # sanitize - fname = "".join(c for c in fname if c.isalnum() or c in list("._-")).rstrip() - tmp = mkstemp(suffix="." + ext, prefix="http-%s-" % fname, dir=destdir) - os.close(tmp[0]) - log.info("downloading %s -> %s", url, tmp[1]) - fname, headers = urllib.request.urlretrieve(url, tmp[1]) - return "file://%s" % os.path.realpath(tmp[1]) +from larigira.fsutils import download_http def generate(spec): @@ -35,10 +8,10 @@ def generate(spec): Recognized argument is "paths" (list of static paths) """ if "urls" not in spec: - raise ValueError("Malformed audiospec: missing 'paths'") + raise ValueError("Malformed audiospec: missing 'urls'") for url in spec["urls"]: - ret = put(url, copy=True) + ret = download_http(url, copy=True, prefix="http") if ret is None: continue yield ret diff --git a/larigira/audiogen_podcast.py b/larigira/audiogen_podcast.py new file mode 100644 index 0000000..b18d738 --- /dev/null +++ b/larigira/audiogen_podcast.py @@ -0,0 +1,184 @@ +import datetime +import logging +import os +import random +import sys +from subprocess import CalledProcessError, check_output + +import requests + +from larigira.fsutils import download_http +from lxml import html +from pytimeparse.timeparse import timeparse + + +def delta_humanreadable(tdelta): + if tdelta is None: + return "" + days = tdelta.days + hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600 + if days: + return "{}d{}h".format(days, hours) + return "{}h".format(hours) + + +def get_duration(url): + try: + lineout = check_output( + [ + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-i", + url, + ] + ).split(b"\n") + except CalledProcessError as exc: + raise ValueError("error probing `%s`" % url) from exc + duration = next(l for l in lineout if l.startswith(b"duration=")) + value = duration.split(b"=")[1] + return int(float(value)) + + +class Audio(object): + def __init__(self, url, duration=None, date=None): + self.url = url + if duration is None: + duration = get_duration(url.encode("utf-8")) + self.duration = duration + self.date = date + self.end_date = datetime.datetime( + 9999, 12, 31, tzinfo=datetime.timezone.utc + ) + + def __str__(self): + return self.url + + def __repr__(self): + return "