add podcast audiogen

This commit is contained in:
boyska 2020-06-21 12:57:59 +02:00
parent bf5eca28c3
commit 1fbe659fc1
3 changed files with 250 additions and 0 deletions

View file

@ -0,0 +1,63 @@
from flask_wtf import Form
from wtforms import (BooleanField, IntegerField, SelectField, StringField,
SubmitField, validators)
from wtforms.fields.html5 import URLField
class AudioForm(Form):
nick = StringField(
"Audio nick",
validators=[validators.required()],
description="A simple name to recognize this audio",
)
url = URLField(
"URL",
validators=[validators.required()],
description="URL of the podcast; it must be valid xml",
)
# TODO: group by filters/sort/select
min_len = StringField(
"Accetta solo audio lunghi almeno:",
description="Leaving this empty will disable this filter",
)
max_len = StringField(
"Accetta solo audio lunghi al massimo:",
description="Leaving this empty will disable this filter",
)
sort_by = SelectField(
"Sort episodes",
choices=[
("none", "Don't sort"),
("random", "Random"),
("duration", "Duration"),
("date", "date"),
],
)
start = IntegerField(
"Play from episode number",
description="Episodes count from 0; 0 is a sane default",
)
reverse = BooleanField("Reverse sort (descending)")
submit = SubmitField("Submit")
def populate_from_audiospec(self, audiospec):
for key in ("nick", "url", "sort_by", "reverse", "min_len", "max_len"):
if key in audiospec:
getattr(self, key).data = audiospec[key]
self.start.data = int(audiospec.get("start", 0))
def audio_receive(form):
d = {"kind": "podcast"}
for key in (
"nick",
"url",
"sort_by",
"reverse",
"min_len",
"max_len",
"start",
):
d[key] = getattr(form, key).data
return d

View file

@ -0,0 +1,184 @@
import datetime
import logging
import os
import random
import sys
from subprocess import CalledProcessError, check_output
import requests
from larigira.fsutils import download_http
from lxml import html
from pytimeparse.timeparse import timeparse
def delta_humanreadable(tdelta):
if tdelta is None:
return ""
days = tdelta.days
hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600
if days:
return "{}d{}h".format(days, hours)
return "{}h".format(hours)
def get_duration(url):
try:
lineout = check_output(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-i",
url,
]
).split(b"\n")
except CalledProcessError as exc:
raise ValueError("error probing `%s`" % url) from exc
duration = next(l for l in lineout if l.startswith(b"duration="))
value = duration.split(b"=")[1]
return int(float(value))
class Audio(object):
def __init__(self, url, duration=None, date=None):
self.url = url
if duration is None:
duration = get_duration(url.encode("utf-8"))
self.duration = duration
self.date = date
self.end_date = datetime.datetime(
9999, 12, 31, tzinfo=datetime.timezone.utc
)
def __str__(self):
return self.url
def __repr__(self):
return "<Audio {} ({} {})>".format(
self.url, self.duration, delta_humanreadable(self.age)
)
@property
def urls(self):
return [self.url]
@property
def age(self):
if self.date is None:
return None
now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
return now - self.date
@property
def valid(self):
return self.end_date >= datetime.datetime.utcnow().replace(
tzinfo=datetime.timezone.utc
)
def get_tree(feed_url):
if feed_url.startswith("http:") or feed_url.startswith("https:"):
tree = html.fromstring(requests.get(feed_url).content)
else:
if not os.path.exists(feed_url):
raise ValueError("file not found: {}".format(feed_url))
tree = html.parse(open(feed_url))
return tree
def get_item_date(el):
el_date = el.find("pubdate")
if el_date is None:
return None
for time_format in ("%Y-%m-%dT%H:%M:%S%z", "%a, %d %b %Y %H:%M:%S %z"):
try:
return datetime.datetime.strptime(el_date.text, time_format)
except:
continue
def get_audio_from_item(item):
encl = item.find("enclosure")
url = encl.get("url")
audio_args = {}
if item.find("duration") is not None:
duration_parts = item.findtext("duration").split(":")
total_seconds = 0
for i, num in enumerate(reversed(duration_parts)):
total_seconds += int(num) * (60 ** i)
if total_seconds:
audio_args["duration"] = total_seconds
return Audio(url, **audio_args)
def get_urls(tree):
items = tree.xpath("//item")
for it in items:
# title = it.find("title").text
audio = get_audio_from_item(it)
if audio.date is None:
audio.date = get_item_date(it)
yield audio
def parse_duration(arg):
if arg.isdecimal():
secs = int(arg)
else:
secs = timeparse(arg)
if secs is None:
raise ValueError("%r is not a valid duration" % arg)
return secs
def generate(spec):
if "url" not in spec:
raise ValueError("Malformed audiospec: missing 'url'")
audios = list(get_urls(get_tree(spec["url"])))
if spec.get("min_len", False):
audios = [
a for a in audios if a.duration >= parse_duration(spec["min_len"])
]
if spec.get("max_len", False):
audios = [
a for a in audios if a.duration <= parse_duration(spec["max_len"])
]
# sort
sort_by = spec.get("sort_by", "none")
if sort_by == "random":
random.shuffle(audios)
elif sort_by == "date":
audios.sort(key=lambda x: x.age)
elif sort_by == "duration":
audios.sort(key=lambda x: x.duration)
if spec.get("reverse", False):
audios.reverse()
# slice
audios = audios[int(spec.get("start", 0)) :]
audios = audios[: int(spec.get("howmany", 1))]
# copy local
local_audios = [
download_http(a.url, copy=spec.get("copy", True), prefix="podcast")
for a in audios
]
return local_audios
# TODO: testing
# TODO: lxml should maybe be optional?
# TODO: ui
if __name__ == "__main__":
# less than proper testing
logging.basicConfig(level=logging.DEBUG)
for u in get_urls(get_tree(sys.argv[1])):
print(" -", repr(u))

View file

@ -73,6 +73,7 @@ setup(
"mpd = larigira.audiogen_mpdrandom:generate_by_artist",
"static = larigira.audiogen_static:generate",
"http = larigira.audiogen_http:generate",
"podcast = larigira.audiogen_podcast:generate",
"randomdir = larigira.audiogen_randomdir:generate",
"mostrecent = larigira.audiogen_mostrecent:generate",
"script = larigira.audiogen_script:generate",
@ -95,6 +96,7 @@ setup(
"larigira.audioform_create": [
"static = larigira.audioform_static:StaticAudioForm",
"http = larigira.audioform_http:AudioForm",
"podcast = larigira.audioform_podcast:AudioForm",
"script = larigira.audioform_script:ScriptAudioForm",
"randomdir = larigira.audioform_randomdir:Form",
"mostrecent = larigira.audioform_mostrecent:AudioForm",
@ -102,6 +104,7 @@ setup(
"larigira.audioform_receive": [
"static = larigira.audioform_static:staticaudio_receive",
"http = larigira.audioform_http:audio_receive",
"podcast = larigira.audioform_podcast:audio_receive",
"script = larigira.audioform_script:scriptaudio_receive",
"randomdir = larigira.audioform_randomdir:receive",
"mostrecent = larigira.audioform_mostrecent:audio_receive",