3 years ago · 1fbe659fc1
--- a/larigira/audioform_podcast.py
+++ b/larigira/audioform_podcast.py
@@ -0,0 +1,63 @@
 
				+from flask_wtf import Form
			
 
				+from wtforms import (BooleanField, IntegerField, SelectField, StringField,
			
 
				+                     SubmitField, validators)
			
 
				+from wtforms.fields.html5 import URLField
			
 
				+
			
 
				+
			
 
				+class AudioForm(Form):
			
 
				+    nick = StringField(
			
 
				+        "Audio nick",
			
 
				+        validators=[validators.required()],
			
 
				+        description="A simple name to recognize this audio",
			
 
				+    )
			
 
				+    url = URLField(
			
 
				+        "URL",
			
 
				+        validators=[validators.required()],
			
 
				+        description="URL of the podcast; it must be valid xml",
			
 
				+    )
			
 
				+
			
 
				+    # TODO: group by filters/sort/select
			
 
				+    min_len = StringField(
			
 
				+        "Accetta solo audio lunghi almeno:",
			
 
				+        description="Leaving this empty will disable this filter",
			
 
				+    )
			
 
				+    max_len = StringField(
			
 
				+        "Accetta solo audio lunghi al massimo:",
			
 
				+        description="Leaving this empty will disable this filter",
			
 
				+    )
			
 
				+    sort_by = SelectField(
			
 
				+        "Sort episodes",
			
 
				+        choices=[
			
 
				+            ("none", "Don't sort"),
			
 
				+            ("random", "Random"),
			
 
				+            ("duration", "Duration"),
			
 
				+            ("date", "date"),
			
 
				+        ],
			
 
				+    )
			
 
				+    start = IntegerField(
			
 
				+        "Play from episode number",
			
 
				+        description="Episodes count from 0; 0 is a sane default",
			
 
				+    )
			
 
				+    reverse = BooleanField("Reverse sort (descending)")
			
 
				+    submit = SubmitField("Submit")
			
 
				+
			
 
				+    def populate_from_audiospec(self, audiospec):
			
 
				+        for key in ("nick", "url", "sort_by", "reverse", "min_len", "max_len"):
			
 
				+            if key in audiospec:
			
 
				+                getattr(self, key).data = audiospec[key]
			
 
				+        self.start.data = int(audiospec.get("start", 0))
			
 
				+
			
 
				+
			
 
				+def audio_receive(form):
			
 
				+    d = {"kind": "podcast"}
			
 
				+    for key in (
			
 
				+        "nick",
			
 
				+        "url",
			
 
				+        "sort_by",
			
 
				+        "reverse",
			
 
				+        "min_len",
			
 
				+        "max_len",
			
 
				+        "start",
			
 
				+    ):
			
 
				+        d[key] = getattr(form, key).data
			
 
				+    return d
			
--- a/larigira/audiogen_podcast.py
+++ b/larigira/audiogen_podcast.py
@@ -0,0 +1,184 @@
 
				+import datetime
			
 
				+import logging
			
 
				+import os
			
 
				+import random
			
 
				+import sys
			
 
				+from subprocess import CalledProcessError, check_output
			
 
				+
			
 
				+import requests
			
 
				+
			
 
				+from larigira.fsutils import download_http
			
 
				+from lxml import html
			
 
				+from pytimeparse.timeparse import timeparse
			
 
				+
			
 
				+
			
 
				+def delta_humanreadable(tdelta):
			
 
				+    if tdelta is None:
			
 
				+        return ""
			
 
				+    days = tdelta.days
			
 
				+    hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600
			
 
				+    if days:
			
 
				+        return "{}d{}h".format(days, hours)
			
 
				+    return "{}h".format(hours)
			
 
				+
			
 
				+
			
 
				+def get_duration(url):
			
 
				+    try:
			
 
				+        lineout = check_output(
			
 
				+            [
			
 
				+                "ffprobe",
			
 
				+                "-v",
			
 
				+                "error",
			
 
				+                "-show_entries",
			
 
				+                "format=duration",
			
 
				+                "-i",
			
 
				+                url,
			
 
				+            ]
			
 
				+        ).split(b"\n")
			
 
				+    except CalledProcessError as exc:
			
 
				+        raise ValueError("error probing `%s`" % url) from exc
			
 
				+    duration = next(l for l in lineout if l.startswith(b"duration="))
			
 
				+    value = duration.split(b"=")[1]
			
 
				+    return int(float(value))
			
 
				+
			
 
				+
			
 
				+class Audio(object):
			
 
				+    def __init__(self, url, duration=None, date=None):
			
 
				+        self.url = url
			
 
				+        if duration is None:
			
 
				+            duration = get_duration(url.encode("utf-8"))
			
 
				+        self.duration = duration
			
 
				+        self.date = date
			
 
				+        self.end_date = datetime.datetime(
			
 
				+            9999, 12, 31, tzinfo=datetime.timezone.utc
			
 
				+        )
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return self.url
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return "<Audio {} ({} {})>".format(
			
 
				+            self.url, self.duration, delta_humanreadable(self.age)
			
 
				+        )
			
 
				+
			
 
				+    @property
			
 
				+    def urls(self):
			
 
				+        return [self.url]
			
 
				+
			
 
				+    @property
			
 
				+    def age(self):
			
 
				+        if self.date is None:
			
 
				+            return None
			
 
				+        now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
			
 
				+
			
 
				+        return now - self.date
			
 
				+
			
 
				+    @property
			
 
				+    def valid(self):
			
 
				+        return self.end_date >= datetime.datetime.utcnow().replace(
			
 
				+            tzinfo=datetime.timezone.utc
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def get_tree(feed_url):
			
 
				+    if feed_url.startswith("http:") or feed_url.startswith("https:"):
			
 
				+        tree = html.fromstring(requests.get(feed_url).content)
			
 
				+    else:
			
 
				+        if not os.path.exists(feed_url):
			
 
				+            raise ValueError("file not found: {}".format(feed_url))
			
 
				+        tree = html.parse(open(feed_url))
			
 
				+    return tree
			
 
				+
			
 
				+
			
 
				+def get_item_date(el):
			
 
				+    el_date = el.find("pubdate")
			
 
				+    if el_date is None:
			
 
				+        return None
			
 
				+    for time_format in ("%Y-%m-%dT%H:%M:%S%z", "%a, %d %b %Y %H:%M:%S %z"):
			
 
				+        try:
			
 
				+            return datetime.datetime.strptime(el_date.text, time_format)
			
 
				+        except:
			
 
				+            continue
			
 
				+
			
 
				+
			
 
				+def get_audio_from_item(item):
			
 
				+    encl = item.find("enclosure")
			
 
				+    url = encl.get("url")
			
 
				+    audio_args = {}
			
 
				+    if item.find("duration") is not None:
			
 
				+        duration_parts = item.findtext("duration").split(":")
			
 
				+        total_seconds = 0
			
 
				+        for i, num in enumerate(reversed(duration_parts)):
			
 
				+            total_seconds += int(num) * (60 ** i)
			
 
				+        if total_seconds:
			
 
				+            audio_args["duration"] = total_seconds
			
 
				+    return Audio(url, **audio_args)
			
 
				+
			
 
				+
			
 
				+def get_urls(tree):
			
 
				+    items = tree.xpath("//item")
			
 
				+    for it in items:
			
 
				+        # title = it.find("title").text
			
 
				+        audio = get_audio_from_item(it)
			
 
				+        if audio.date is None:
			
 
				+            audio.date = get_item_date(it)
			
 
				+        yield audio
			
 
				+
			
 
				+
			
 
				+def parse_duration(arg):
			
 
				+    if arg.isdecimal():
			
 
				+        secs = int(arg)
			
 
				+    else:
			
 
				+        secs = timeparse(arg)
			
 
				+        if secs is None:
			
 
				+            raise ValueError("%r is not a valid duration" % arg)
			
 
				+    return secs
			
 
				+
			
 
				+
			
 
				+def generate(spec):
			
 
				+    if "url" not in spec:
			
 
				+        raise ValueError("Malformed audiospec: missing 'url'")
			
 
				+    audios = list(get_urls(get_tree(spec["url"])))
			
 
				+    if spec.get("min_len", False):
			
 
				+        audios = [
			
 
				+            a for a in audios if a.duration >= parse_duration(spec["min_len"])
			
 
				+        ]
			
 
				+    if spec.get("max_len", False):
			
 
				+        audios = [
			
 
				+            a for a in audios if a.duration <= parse_duration(spec["max_len"])
			
 
				+        ]
			
 
				+
			
 
				+    # sort
			
 
				+    sort_by = spec.get("sort_by", "none")
			
 
				+    if sort_by == "random":
			
 
				+        random.shuffle(audios)
			
 
				+    elif sort_by == "date":
			
 
				+        audios.sort(key=lambda x: x.age)
			
 
				+    elif sort_by == "duration":
			
 
				+        audios.sort(key=lambda x: x.duration)
			
 
				+
			
 
				+    if spec.get("reverse", False):
			
 
				+        audios.reverse()
			
 
				+
			
 
				+    # slice
			
 
				+    audios = audios[int(spec.get("start", 0)) :]
			
 
				+    audios = audios[: int(spec.get("howmany", 1))]
			
 
				+
			
 
				+    # copy local
			
 
				+    local_audios = [
			
 
				+        download_http(a.url, copy=spec.get("copy", True), prefix="podcast")
			
 
				+        for a in audios
			
 
				+    ]
			
 
				+    return local_audios
			
 
				+
			
 
				+
			
 
				+# TODO: testing
			
 
				+# TODO: lxml should maybe be optional?
			
 
				+# TODO: ui
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # less than proper testing
			
 
				+    logging.basicConfig(level=logging.DEBUG)
			
 
				+    for u in get_urls(get_tree(sys.argv[1])):
			
 
				+        print(" -", repr(u))
			
--- a/setup.py
+++ b/setup.py
@@ -73,6 +73,7 @@ setup(
 
				             "mpd = larigira.audiogen_mpdrandom:generate_by_artist",
			
 
				             "static = larigira.audiogen_static:generate",
			
 
				             "http = larigira.audiogen_http:generate",
			
 
				+            "podcast = larigira.audiogen_podcast:generate",
			
 
				             "randomdir = larigira.audiogen_randomdir:generate",
			
 
				             "mostrecent = larigira.audiogen_mostrecent:generate",
			
 
				             "script = larigira.audiogen_script:generate",
			
@@ -95,6 +96,7 @@ setup(
 
				         "larigira.audioform_create": [
			
 
				             "static = larigira.audioform_static:StaticAudioForm",
			
 
				             "http = larigira.audioform_http:AudioForm",
			
 
				+            "podcast = larigira.audioform_podcast:AudioForm",
			
 
				             "script = larigira.audioform_script:ScriptAudioForm",
			
 
				             "randomdir = larigira.audioform_randomdir:Form",
			
 
				             "mostrecent = larigira.audioform_mostrecent:AudioForm",
			
@@ -102,6 +104,7 @@ setup(
 
				         "larigira.audioform_receive": [
			
 
				             "static = larigira.audioform_static:staticaudio_receive",
			
 
				             "http = larigira.audioform_http:audio_receive",
			
 
				+            "podcast = larigira.audioform_podcast:audio_receive",
			
 
				             "script = larigira.audioform_script:scriptaudio_receive",
			
 
				             "randomdir = larigira.audioform_randomdir:receive",
			
 
				             "mostrecent = larigira.audioform_mostrecent:audio_receive",