add podcast audiogen

2020-06-21 12:57:59 +02:00 · 2020-06-21 12:57:59 +02:00 · 1fbe659fc1
commit 1fbe659fc1
parent bf5eca28c3
3 changed files with 250 additions and 0 deletions
--- a/larigira/audioform_podcast.py
+++ b/larigira/audioform_podcast.py
@ -0,0 +1,63 @@
 from flask_wtf import Form
 from wtforms import (BooleanField, IntegerField, SelectField, StringField,
                     SubmitField, validators)
 from wtforms.fields.html5 import URLField
 class AudioForm(Form):
    nick = StringField(
        "Audio nick",
        validators=[validators.required()],
        description="A simple name to recognize this audio",
    )
    url = URLField(
        "URL",
        validators=[validators.required()],
        description="URL of the podcast; it must be valid xml",
    )
    # TODO: group by filters/sort/select
    min_len = StringField(
        "Accetta solo audio lunghi almeno:",
        description="Leaving this empty will disable this filter",
    )
    max_len = StringField(
        "Accetta solo audio lunghi al massimo:",
        description="Leaving this empty will disable this filter",
    )
    sort_by = SelectField(
        "Sort episodes",
        choices=[
            ("none", "Don't sort"),
            ("random", "Random"),
            ("duration", "Duration"),
            ("date", "date"),
        ],
    )
    start = IntegerField(
        "Play from episode number",
        description="Episodes count from 0; 0 is a sane default",
    )
    reverse = BooleanField("Reverse sort (descending)")
    submit = SubmitField("Submit")
    def populate_from_audiospec(self, audiospec):
        for key in ("nick", "url", "sort_by", "reverse", "min_len", "max_len"):
            if key in audiospec:
                getattr(self, key).data = audiospec[key]
        self.start.data = int(audiospec.get("start", 0))
 def audio_receive(form):
    d = {"kind": "podcast"}
    for key in (
        "nick",
        "url",
        "sort_by",
        "reverse",
        "min_len",
        "max_len",
        "start",
    ):
        d[key] = getattr(form, key).data
    return d
--- a/larigira/audiogen_podcast.py
+++ b/larigira/audiogen_podcast.py
@ -0,0 +1,184 @@
 import datetime
 import logging
 import os
 import random
 import sys
 from subprocess import CalledProcessError, check_output
 import requests
 from larigira.fsutils import download_http
 from lxml import html
 from pytimeparse.timeparse import timeparse
 def delta_humanreadable(tdelta):
    if tdelta is None:
        return ""
    days = tdelta.days
    hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600
    if days:
        return "{}d{}h".format(days, hours)
    return "{}h".format(hours)
 def get_duration(url):
    try:
        lineout = check_output(
            [
                "ffprobe",
                "-v",
                "error",
                "-show_entries",
                "format=duration",
                "-i",
                url,
            ]
        ).split(b"\n")
    except CalledProcessError as exc:
        raise ValueError("error probing `%s`" % url) from exc
    duration = next(l for l in lineout if l.startswith(b"duration="))
    value = duration.split(b"=")[1]
    return int(float(value))
 class Audio(object):
    def __init__(self, url, duration=None, date=None):
        self.url = url
        if duration is None:
            duration = get_duration(url.encode("utf-8"))
        self.duration = duration
        self.date = date
        self.end_date = datetime.datetime(
            9999, 12, 31, tzinfo=datetime.timezone.utc
        )
    def __str__(self):
        return self.url
    def __repr__(self):
        return "<Audio {} ({} {})>".format(
            self.url, self.duration, delta_humanreadable(self.age)
        )
    @property
    def urls(self):
        return [self.url]
    @property
    def age(self):
        if self.date is None:
            return None
        now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
        return now - self.date
    @property
    def valid(self):
        return self.end_date >= datetime.datetime.utcnow().replace(
            tzinfo=datetime.timezone.utc
        )
 def get_tree(feed_url):
    if feed_url.startswith("http:") or feed_url.startswith("https:"):
        tree = html.fromstring(requests.get(feed_url).content)
    else:
        if not os.path.exists(feed_url):
            raise ValueError("file not found: {}".format(feed_url))
        tree = html.parse(open(feed_url))
    return tree
 def get_item_date(el):
    el_date = el.find("pubdate")
    if el_date is None:
        return None
    for time_format in ("%Y-%m-%dT%H:%M:%S%z", "%a, %d %b %Y %H:%M:%S %z"):
        try:
            return datetime.datetime.strptime(el_date.text, time_format)
        except:
            continue
 def get_audio_from_item(item):
    encl = item.find("enclosure")
    url = encl.get("url")
    audio_args = {}
    if item.find("duration") is not None:
        duration_parts = item.findtext("duration").split(":")
        total_seconds = 0
        for i, num in enumerate(reversed(duration_parts)):
            total_seconds += int(num) * (60 ** i)
        if total_seconds:
            audio_args["duration"] = total_seconds
    return Audio(url, **audio_args)
 def get_urls(tree):
    items = tree.xpath("//item")
    for it in items:
        # title = it.find("title").text
        audio = get_audio_from_item(it)
        if audio.date is None:
            audio.date = get_item_date(it)
        yield audio
 def parse_duration(arg):
    if arg.isdecimal():
        secs = int(arg)
    else:
        secs = timeparse(arg)
        if secs is None:
            raise ValueError("%r is not a valid duration" % arg)
    return secs
 def generate(spec):
    if "url" not in spec:
        raise ValueError("Malformed audiospec: missing 'url'")
    audios = list(get_urls(get_tree(spec["url"])))
    if spec.get("min_len", False):
        audios = [
            a for a in audios if a.duration >= parse_duration(spec["min_len"])
        ]
    if spec.get("max_len", False):
        audios = [
            a for a in audios if a.duration <= parse_duration(spec["max_len"])
        ]
    # sort
    sort_by = spec.get("sort_by", "none")
    if sort_by == "random":
        random.shuffle(audios)
    elif sort_by == "date":
        audios.sort(key=lambda x: x.age)
    elif sort_by == "duration":
        audios.sort(key=lambda x: x.duration)
    if spec.get("reverse", False):
        audios.reverse()
    # slice
    audios = audios[int(spec.get("start", 0)) :]
    audios = audios[: int(spec.get("howmany", 1))]
    # copy local
    local_audios = [
        download_http(a.url, copy=spec.get("copy", True), prefix="podcast")
        for a in audios
    ]
    return local_audios
 # TODO: testing
 # TODO: lxml should maybe be optional?
 # TODO: ui
 if __name__ == "__main__":
    # less than proper testing
    logging.basicConfig(level=logging.DEBUG)
    for u in get_urls(get_tree(sys.argv[1])):
        print(" -", repr(u))
--- a/setup.py
+++ b/setup.py
@ -73,6 +73,7 @@ setup(
            "mpd = larigira.audiogen_mpdrandom:generate_by_artist",
            "static = larigira.audiogen_static:generate",
            "http = larigira.audiogen_http:generate",
            "podcast = larigira.audiogen_podcast:generate",
            "randomdir = larigira.audiogen_randomdir:generate",
            "mostrecent = larigira.audiogen_mostrecent:generate",
            "script = larigira.audiogen_script:generate",
@ -95,6 +96,7 @@ setup(
        "larigira.audioform_create": [
            "static = larigira.audioform_static:StaticAudioForm",
            "http = larigira.audioform_http:AudioForm",
            "podcast = larigira.audioform_podcast:AudioForm",
            "script = larigira.audioform_script:ScriptAudioForm",
            "randomdir = larigira.audioform_randomdir:Form",
            "mostrecent = larigira.audioform_mostrecent:AudioForm",
@ -102,6 +104,7 @@ setup(
        "larigira.audioform_receive": [
            "static = larigira.audioform_static:staticaudio_receive",
            "http = larigira.audioform_http:audio_receive",
            "podcast = larigira.audioform_podcast:audio_receive",
            "script = larigira.audioform_script:scriptaudio_receive",
            "randomdir = larigira.audioform_randomdir:receive",
            "mostrecent = larigira.audioform_mostrecent:audio_receive",