Browse Source

add podcast audiogen

boyska 3 years ago
parent
commit
1fbe659fc1
3 changed files with 250 additions and 0 deletions
  1. 63 0
      larigira/audioform_podcast.py
  2. 184 0
      larigira/audiogen_podcast.py
  3. 3 0
      setup.py

+ 63 - 0
larigira/audioform_podcast.py

@@ -0,0 +1,63 @@
+from flask_wtf import Form
+from wtforms import (BooleanField, IntegerField, SelectField, StringField,
+                     SubmitField, validators)
+from wtforms.fields.html5 import URLField
+
+
+class AudioForm(Form):
+    nick = StringField(
+        "Audio nick",
+        validators=[validators.required()],
+        description="A simple name to recognize this audio",
+    )
+    url = URLField(
+        "URL",
+        validators=[validators.required()],
+        description="URL of the podcast; it must be valid xml",
+    )
+
+    # TODO: group by filters/sort/select
+    min_len = StringField(
+        "Accetta solo audio lunghi almeno:",
+        description="Leaving this empty will disable this filter",
+    )
+    max_len = StringField(
+        "Accetta solo audio lunghi al massimo:",
+        description="Leaving this empty will disable this filter",
+    )
+    sort_by = SelectField(
+        "Sort episodes",
+        choices=[
+            ("none", "Don't sort"),
+            ("random", "Random"),
+            ("duration", "Duration"),
+            ("date", "date"),
+        ],
+    )
+    start = IntegerField(
+        "Play from episode number",
+        description="Episodes count from 0; 0 is a sane default",
+    )
+    reverse = BooleanField("Reverse sort (descending)")
+    submit = SubmitField("Submit")
+
+    def populate_from_audiospec(self, audiospec):
+        for key in ("nick", "url", "sort_by", "reverse", "min_len", "max_len"):
+            if key in audiospec:
+                getattr(self, key).data = audiospec[key]
+        self.start.data = int(audiospec.get("start", 0))
+
+
+def audio_receive(form):
+    d = {"kind": "podcast"}
+    for key in (
+        "nick",
+        "url",
+        "sort_by",
+        "reverse",
+        "min_len",
+        "max_len",
+        "start",
+    ):
+        d[key] = getattr(form, key).data
+    return d

+ 184 - 0
larigira/audiogen_podcast.py

@@ -0,0 +1,184 @@
+import datetime
+import logging
+import os
+import random
+import sys
+from subprocess import CalledProcessError, check_output
+
+import requests
+
+from larigira.fsutils import download_http
+from lxml import html
+from pytimeparse.timeparse import timeparse
+
+
+def delta_humanreadable(tdelta):
+    if tdelta is None:
+        return ""
+    days = tdelta.days
+    hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600
+    if days:
+        return "{}d{}h".format(days, hours)
+    return "{}h".format(hours)
+
+
+def get_duration(url):
+    try:
+        lineout = check_output(
+            [
+                "ffprobe",
+                "-v",
+                "error",
+                "-show_entries",
+                "format=duration",
+                "-i",
+                url,
+            ]
+        ).split(b"\n")
+    except CalledProcessError as exc:
+        raise ValueError("error probing `%s`" % url) from exc
+    duration = next(l for l in lineout if l.startswith(b"duration="))
+    value = duration.split(b"=")[1]
+    return int(float(value))
+
+
+class Audio(object):
+    def __init__(self, url, duration=None, date=None):
+        self.url = url
+        if duration is None:
+            duration = get_duration(url.encode("utf-8"))
+        self.duration = duration
+        self.date = date
+        self.end_date = datetime.datetime(
+            9999, 12, 31, tzinfo=datetime.timezone.utc
+        )
+
+    def __str__(self):
+        return self.url
+
+    def __repr__(self):
+        return "<Audio {} ({} {})>".format(
+            self.url, self.duration, delta_humanreadable(self.age)
+        )
+
+    @property
+    def urls(self):
+        return [self.url]
+
+    @property
+    def age(self):
+        if self.date is None:
+            return None
+        now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
+
+        return now - self.date
+
+    @property
+    def valid(self):
+        return self.end_date >= datetime.datetime.utcnow().replace(
+            tzinfo=datetime.timezone.utc
+        )
+
+
+def get_tree(feed_url):
+    if feed_url.startswith("http:") or feed_url.startswith("https:"):
+        tree = html.fromstring(requests.get(feed_url).content)
+    else:
+        if not os.path.exists(feed_url):
+            raise ValueError("file not found: {}".format(feed_url))
+        tree = html.parse(open(feed_url))
+    return tree
+
+
+def get_item_date(el):
+    el_date = el.find("pubdate")
+    if el_date is None:
+        return None
+    for time_format in ("%Y-%m-%dT%H:%M:%S%z", "%a, %d %b %Y %H:%M:%S %z"):
+        try:
+            return datetime.datetime.strptime(el_date.text, time_format)
+        except:
+            continue
+
+
+def get_audio_from_item(item):
+    encl = item.find("enclosure")
+    url = encl.get("url")
+    audio_args = {}
+    if item.find("duration") is not None:
+        duration_parts = item.findtext("duration").split(":")
+        total_seconds = 0
+        for i, num in enumerate(reversed(duration_parts)):
+            total_seconds += int(num) * (60 ** i)
+        if total_seconds:
+            audio_args["duration"] = total_seconds
+    return Audio(url, **audio_args)
+
+
+def get_urls(tree):
+    items = tree.xpath("//item")
+    for it in items:
+        # title = it.find("title").text
+        audio = get_audio_from_item(it)
+        if audio.date is None:
+            audio.date = get_item_date(it)
+        yield audio
+
+
+def parse_duration(arg):
+    if arg.isdecimal():
+        secs = int(arg)
+    else:
+        secs = timeparse(arg)
+        if secs is None:
+            raise ValueError("%r is not a valid duration" % arg)
+    return secs
+
+
+def generate(spec):
+    if "url" not in spec:
+        raise ValueError("Malformed audiospec: missing 'url'")
+    audios = list(get_urls(get_tree(spec["url"])))
+    if spec.get("min_len", False):
+        audios = [
+            a for a in audios if a.duration >= parse_duration(spec["min_len"])
+        ]
+    if spec.get("max_len", False):
+        audios = [
+            a for a in audios if a.duration <= parse_duration(spec["max_len"])
+        ]
+
+    # sort
+    sort_by = spec.get("sort_by", "none")
+    if sort_by == "random":
+        random.shuffle(audios)
+    elif sort_by == "date":
+        audios.sort(key=lambda x: x.age)
+    elif sort_by == "duration":
+        audios.sort(key=lambda x: x.duration)
+
+    if spec.get("reverse", False):
+        audios.reverse()
+
+    # slice
+    audios = audios[int(spec.get("start", 0)) :]
+    audios = audios[: int(spec.get("howmany", 1))]
+
+    # copy local
+    local_audios = [
+        download_http(a.url, copy=spec.get("copy", True), prefix="podcast")
+        for a in audios
+    ]
+    return local_audios
+
+
+# TODO: testing
+# TODO: lxml should maybe be optional?
+# TODO: ui
+
+
+if __name__ == "__main__":
+    # less than proper testing
+    logging.basicConfig(level=logging.DEBUG)
+    for u in get_urls(get_tree(sys.argv[1])):
+        print(" -", repr(u))

+ 3 - 0
setup.py

@@ -73,6 +73,7 @@ setup(
             "mpd = larigira.audiogen_mpdrandom:generate_by_artist",
             "static = larigira.audiogen_static:generate",
             "http = larigira.audiogen_http:generate",
+            "podcast = larigira.audiogen_podcast:generate",
             "randomdir = larigira.audiogen_randomdir:generate",
             "mostrecent = larigira.audiogen_mostrecent:generate",
             "script = larigira.audiogen_script:generate",
@@ -95,6 +96,7 @@ setup(
         "larigira.audioform_create": [
             "static = larigira.audioform_static:StaticAudioForm",
             "http = larigira.audioform_http:AudioForm",
+            "podcast = larigira.audioform_podcast:AudioForm",
             "script = larigira.audioform_script:ScriptAudioForm",
             "randomdir = larigira.audioform_randomdir:Form",
             "mostrecent = larigira.audioform_mostrecent:AudioForm",
@@ -102,6 +104,7 @@ setup(
         "larigira.audioform_receive": [
             "static = larigira.audioform_static:staticaudio_receive",
             "http = larigira.audioform_http:audio_receive",
+            "podcast = larigira.audioform_podcast:audio_receive",
             "script = larigira.audioform_script:scriptaudio_receive",
             "randomdir = larigira.audioform_randomdir:receive",
             "mostrecent = larigira.audioform_mostrecent:audio_receive",