Merge branch 'podcast'
This commit is contained in:
commit
88ff77b968
6 changed files with 327 additions and 31 deletions
|
@ -33,6 +33,48 @@ mostrecent
|
||||||
It is similar to randomdir, but instead of picking randomly, picks the most
|
It is similar to randomdir, but instead of picking randomly, picks the most
|
||||||
recent file (according to the ctime).
|
recent file (according to the ctime).
|
||||||
|
|
||||||
|
podcast
|
||||||
|
------------
|
||||||
|
|
||||||
|
This is probably the most powerful generator that comes included with
|
||||||
|
``larigira``. To use this generator, you would need to have a valid podcast
|
||||||
|
URL. Beware, here the world __podcast__ refer to its very specific meaning of
|
||||||
|
an xml-based format which resembles a RSS feed but has more media-specific
|
||||||
|
entities. See `this specification
|
||||||
|
<https://github.com/simplepie/simplepie-ng/wiki/Spec:-iTunes-Podcast-RSS>`_ for
|
||||||
|
more technical details.
|
||||||
|
|
||||||
|
So, if you have a valid podcast URL, larigira can look at it, extract audios,
|
||||||
|
and play the most recent one. Here are some typical usecases for this:
|
||||||
|
* You want to play replica based on what you host on your radio's website.
|
||||||
|
* You want to play some audio from some other radio (or other kind of podcast
|
||||||
|
source)
|
||||||
|
|
||||||
|
The podcast form has many many options, but I promise you that 90% of the cases
|
||||||
|
are easily solved using ONLY the first option: enter the URL of the podcast
|
||||||
|
and... it works!
|
||||||
|
|
||||||
|
So, what are all the other options for? Well, to cover some other use cases.
|
||||||
|
|
||||||
|
For example, let's say that at night you want to play a *random* show (not the
|
||||||
|
last one, which is the default) that happened on your radio. Then you can
|
||||||
|
change the "sort by" to be "random". Easy, right?
|
||||||
|
|
||||||
|
Another typical usecase is selecting an audio that has a duration which "fits"
|
||||||
|
with the schedule of your radio: not too long and not too short. You can do
|
||||||
|
that with the "min len" and "max len" fields. For example, setting a `min_len`
|
||||||
|
of `30min` and `max_len` of `1h15m` you can avoid picking flash news (too
|
||||||
|
short) and very long shows.
|
||||||
|
|
||||||
|
You can do many other things with its options, but I left those to your
|
||||||
|
immagination. Let's just clarify the workflow:
|
||||||
|
|
||||||
|
* the podcast URL is fetched and audio information is retrieved
|
||||||
|
* filter: audios are filtered by min/max length
|
||||||
|
* sort: audios are sorted according to `sort_by` and `reverse`
|
||||||
|
* select: the n-th episode is fetched, according to `start` field
|
||||||
|
|
||||||
|
|
||||||
script
|
script
|
||||||
--------
|
--------
|
||||||
|
|
||||||
|
|
63
larigira/audioform_podcast.py
Normal file
63
larigira/audioform_podcast.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
from flask_wtf import Form
|
||||||
|
from wtforms import (BooleanField, IntegerField, SelectField, StringField,
|
||||||
|
SubmitField, validators)
|
||||||
|
from wtforms.fields.html5 import URLField
|
||||||
|
|
||||||
|
|
||||||
|
class AudioForm(Form):
|
||||||
|
nick = StringField(
|
||||||
|
"Audio nick",
|
||||||
|
validators=[validators.required()],
|
||||||
|
description="A simple name to recognize this audio",
|
||||||
|
)
|
||||||
|
url = URLField(
|
||||||
|
"URL",
|
||||||
|
validators=[validators.required()],
|
||||||
|
description="URL of the podcast; it must be valid xml",
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: group by filters/sort/select
|
||||||
|
min_len = StringField(
|
||||||
|
"Accetta solo audio lunghi almeno:",
|
||||||
|
description="Leaving this empty will disable this filter",
|
||||||
|
)
|
||||||
|
max_len = StringField(
|
||||||
|
"Accetta solo audio lunghi al massimo:",
|
||||||
|
description="Leaving this empty will disable this filter",
|
||||||
|
)
|
||||||
|
sort_by = SelectField(
|
||||||
|
"Sort episodes",
|
||||||
|
choices=[
|
||||||
|
("none", "Don't sort"),
|
||||||
|
("random", "Random"),
|
||||||
|
("duration", "Duration"),
|
||||||
|
("date", "date"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
start = IntegerField(
|
||||||
|
"Play from episode number",
|
||||||
|
description="Episodes count from 0; 0 is a sane default",
|
||||||
|
)
|
||||||
|
reverse = BooleanField("Reverse sort (descending)")
|
||||||
|
submit = SubmitField("Submit")
|
||||||
|
|
||||||
|
def populate_from_audiospec(self, audiospec):
|
||||||
|
for key in ("nick", "url", "sort_by", "reverse", "min_len", "max_len"):
|
||||||
|
if key in audiospec:
|
||||||
|
getattr(self, key).data = audiospec[key]
|
||||||
|
self.start.data = int(audiospec.get("start", 0))
|
||||||
|
|
||||||
|
|
||||||
|
def audio_receive(form):
|
||||||
|
d = {"kind": "podcast"}
|
||||||
|
for key in (
|
||||||
|
"nick",
|
||||||
|
"url",
|
||||||
|
"sort_by",
|
||||||
|
"reverse",
|
||||||
|
"min_len",
|
||||||
|
"max_len",
|
||||||
|
"start",
|
||||||
|
):
|
||||||
|
d[key] = getattr(form, key).data
|
||||||
|
return d
|
|
@ -1,31 +1,4 @@
|
||||||
import os
|
from larigira.fsutils import download_http
|
||||||
import logging
|
|
||||||
import posixpath
|
|
||||||
from tempfile import mkstemp
|
|
||||||
import urllib.request
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def put(url, destdir=None, copy=False):
|
|
||||||
if url.split(":")[0] not in ("http", "https"):
|
|
||||||
log.warning("Not a valid URL: %s", url)
|
|
||||||
return None
|
|
||||||
ext = url.split(".")[-1]
|
|
||||||
if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"):
|
|
||||||
log.warning('Invalid format (%s) for "%s"', ext, url)
|
|
||||||
return None
|
|
||||||
if not copy:
|
|
||||||
return url
|
|
||||||
fname = posixpath.basename(urlparse(url).path)
|
|
||||||
# sanitize
|
|
||||||
fname = "".join(c for c in fname if c.isalnum() or c in list("._-")).rstrip()
|
|
||||||
tmp = mkstemp(suffix="." + ext, prefix="http-%s-" % fname, dir=destdir)
|
|
||||||
os.close(tmp[0])
|
|
||||||
log.info("downloading %s -> %s", url, tmp[1])
|
|
||||||
fname, headers = urllib.request.urlretrieve(url, tmp[1])
|
|
||||||
return "file://%s" % os.path.realpath(tmp[1])
|
|
||||||
|
|
||||||
|
|
||||||
def generate(spec):
|
def generate(spec):
|
||||||
|
@ -35,10 +8,10 @@ def generate(spec):
|
||||||
Recognized argument is "paths" (list of static paths)
|
Recognized argument is "paths" (list of static paths)
|
||||||
"""
|
"""
|
||||||
if "urls" not in spec:
|
if "urls" not in spec:
|
||||||
raise ValueError("Malformed audiospec: missing 'paths'")
|
raise ValueError("Malformed audiospec: missing 'urls'")
|
||||||
|
|
||||||
for url in spec["urls"]:
|
for url in spec["urls"]:
|
||||||
ret = put(url, copy=True)
|
ret = download_http(url, copy=True, prefix="http")
|
||||||
if ret is None:
|
if ret is None:
|
||||||
continue
|
continue
|
||||||
yield ret
|
yield ret
|
||||||
|
|
184
larigira/audiogen_podcast.py
Normal file
184
larigira/audiogen_podcast.py
Normal file
|
@ -0,0 +1,184 @@
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
from subprocess import CalledProcessError, check_output
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from larigira.fsutils import download_http
|
||||||
|
from lxml import html
|
||||||
|
from pytimeparse.timeparse import timeparse
|
||||||
|
|
||||||
|
|
||||||
|
def delta_humanreadable(tdelta):
|
||||||
|
if tdelta is None:
|
||||||
|
return ""
|
||||||
|
days = tdelta.days
|
||||||
|
hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600
|
||||||
|
if days:
|
||||||
|
return "{}d{}h".format(days, hours)
|
||||||
|
return "{}h".format(hours)
|
||||||
|
|
||||||
|
|
||||||
|
def get_duration(url):
|
||||||
|
try:
|
||||||
|
lineout = check_output(
|
||||||
|
[
|
||||||
|
"ffprobe",
|
||||||
|
"-v",
|
||||||
|
"error",
|
||||||
|
"-show_entries",
|
||||||
|
"format=duration",
|
||||||
|
"-i",
|
||||||
|
url,
|
||||||
|
]
|
||||||
|
).split(b"\n")
|
||||||
|
except CalledProcessError as exc:
|
||||||
|
raise ValueError("error probing `%s`" % url) from exc
|
||||||
|
duration = next(l for l in lineout if l.startswith(b"duration="))
|
||||||
|
value = duration.split(b"=")[1]
|
||||||
|
return int(float(value))
|
||||||
|
|
||||||
|
|
||||||
|
class Audio(object):
|
||||||
|
def __init__(self, url, duration=None, date=None):
|
||||||
|
self.url = url
|
||||||
|
if duration is None:
|
||||||
|
duration = get_duration(url.encode("utf-8"))
|
||||||
|
self.duration = duration
|
||||||
|
self.date = date
|
||||||
|
self.end_date = datetime.datetime(
|
||||||
|
9999, 12, 31, tzinfo=datetime.timezone.utc
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.url
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<Audio {} ({} {})>".format(
|
||||||
|
self.url, self.duration, delta_humanreadable(self.age)
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def urls(self):
|
||||||
|
return [self.url]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def age(self):
|
||||||
|
if self.date is None:
|
||||||
|
return None
|
||||||
|
now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
|
||||||
|
|
||||||
|
return now - self.date
|
||||||
|
|
||||||
|
@property
|
||||||
|
def valid(self):
|
||||||
|
return self.end_date >= datetime.datetime.utcnow().replace(
|
||||||
|
tzinfo=datetime.timezone.utc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_tree(feed_url):
|
||||||
|
if feed_url.startswith("http:") or feed_url.startswith("https:"):
|
||||||
|
tree = html.fromstring(requests.get(feed_url).content)
|
||||||
|
else:
|
||||||
|
if not os.path.exists(feed_url):
|
||||||
|
raise ValueError("file not found: {}".format(feed_url))
|
||||||
|
tree = html.parse(open(feed_url))
|
||||||
|
return tree
|
||||||
|
|
||||||
|
|
||||||
|
def get_item_date(el):
|
||||||
|
el_date = el.find("pubdate")
|
||||||
|
if el_date is None:
|
||||||
|
return None
|
||||||
|
for time_format in ("%Y-%m-%dT%H:%M:%S%z", "%a, %d %b %Y %H:%M:%S %z"):
|
||||||
|
try:
|
||||||
|
return datetime.datetime.strptime(el_date.text, time_format)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def get_audio_from_item(item):
|
||||||
|
encl = item.find("enclosure")
|
||||||
|
url = encl.get("url")
|
||||||
|
audio_args = {}
|
||||||
|
if item.find("duration") is not None:
|
||||||
|
duration_parts = item.findtext("duration").split(":")
|
||||||
|
total_seconds = 0
|
||||||
|
for i, num in enumerate(reversed(duration_parts)):
|
||||||
|
total_seconds += int(num) * (60 ** i)
|
||||||
|
if total_seconds:
|
||||||
|
audio_args["duration"] = total_seconds
|
||||||
|
return Audio(url, **audio_args)
|
||||||
|
|
||||||
|
|
||||||
|
def get_urls(tree):
|
||||||
|
items = tree.xpath("//item")
|
||||||
|
for it in items:
|
||||||
|
# title = it.find("title").text
|
||||||
|
audio = get_audio_from_item(it)
|
||||||
|
if audio.date is None:
|
||||||
|
audio.date = get_item_date(it)
|
||||||
|
yield audio
|
||||||
|
|
||||||
|
|
||||||
|
def parse_duration(arg):
|
||||||
|
if arg.isdecimal():
|
||||||
|
secs = int(arg)
|
||||||
|
else:
|
||||||
|
secs = timeparse(arg)
|
||||||
|
if secs is None:
|
||||||
|
raise ValueError("%r is not a valid duration" % arg)
|
||||||
|
return secs
|
||||||
|
|
||||||
|
|
||||||
|
def generate(spec):
|
||||||
|
if "url" not in spec:
|
||||||
|
raise ValueError("Malformed audiospec: missing 'url'")
|
||||||
|
audios = list(get_urls(get_tree(spec["url"])))
|
||||||
|
if spec.get("min_len", False):
|
||||||
|
audios = [
|
||||||
|
a for a in audios if a.duration >= parse_duration(spec["min_len"])
|
||||||
|
]
|
||||||
|
if spec.get("max_len", False):
|
||||||
|
audios = [
|
||||||
|
a for a in audios if a.duration <= parse_duration(spec["max_len"])
|
||||||
|
]
|
||||||
|
|
||||||
|
# sort
|
||||||
|
sort_by = spec.get("sort_by", "none")
|
||||||
|
if sort_by == "random":
|
||||||
|
random.shuffle(audios)
|
||||||
|
elif sort_by == "date":
|
||||||
|
audios.sort(key=lambda x: x.age)
|
||||||
|
elif sort_by == "duration":
|
||||||
|
audios.sort(key=lambda x: x.duration)
|
||||||
|
|
||||||
|
if spec.get("reverse", False):
|
||||||
|
audios.reverse()
|
||||||
|
|
||||||
|
# slice
|
||||||
|
audios = audios[int(spec.get("start", 0)) :]
|
||||||
|
audios = audios[: int(spec.get("howmany", 1))]
|
||||||
|
|
||||||
|
# copy local
|
||||||
|
local_audios = [
|
||||||
|
download_http(a.url, copy=spec.get("copy", True), prefix="podcast")
|
||||||
|
for a in audios
|
||||||
|
]
|
||||||
|
return local_audios
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: testing
|
||||||
|
# TODO: lxml should maybe be optional?
|
||||||
|
# TODO: ui
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# less than proper testing
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
for u in get_urls(get_tree(sys.argv[1])):
|
||||||
|
print(" -", repr(u))
|
|
@ -1,6 +1,13 @@
|
||||||
import os
|
|
||||||
import fnmatch
|
import fnmatch
|
||||||
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import os
|
||||||
|
import posixpath
|
||||||
|
import urllib.request
|
||||||
|
from tempfile import mkstemp
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def scan_dir(dirname, extension=None):
|
def scan_dir(dirname, extension=None):
|
||||||
|
@ -37,3 +44,27 @@ def shortname(path):
|
||||||
name = name.rsplit(".", 1)[0] # no extension
|
name = name.rsplit(".", 1)[0] # no extension
|
||||||
name = "".join(c for c in name if c.isalnum()) # no strange chars
|
name = "".join(c for c in name if c.isalnum()) # no strange chars
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def download_http(url, destdir=None, copy=False, prefix="httpdl"):
|
||||||
|
if url.split(":")[0] not in ("http", "https"):
|
||||||
|
log.warning("Not a valid URL: %s", url)
|
||||||
|
return None
|
||||||
|
ext = url.split(".")[-1]
|
||||||
|
if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"):
|
||||||
|
log.warning('Invalid format (%s) for "%s"', ext, url)
|
||||||
|
return None
|
||||||
|
if not copy:
|
||||||
|
return url
|
||||||
|
fname = posixpath.basename(urlparse(url).path)
|
||||||
|
# sanitize
|
||||||
|
fname = "".join(
|
||||||
|
c for c in fname if c.isalnum() or c in list("._-")
|
||||||
|
).rstrip()
|
||||||
|
tmp = mkstemp(
|
||||||
|
suffix="." + ext, prefix="%s-%s-" % (prefix, fname), dir=destdir
|
||||||
|
)
|
||||||
|
os.close(tmp[0])
|
||||||
|
log.info("downloading %s -> %s", url, tmp[1])
|
||||||
|
fname, headers = urllib.request.urlretrieve(url, tmp[1])
|
||||||
|
return "file://%s" % os.path.realpath(tmp[1])
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -73,6 +73,7 @@ setup(
|
||||||
"mpd = larigira.audiogen_mpdrandom:generate_by_artist",
|
"mpd = larigira.audiogen_mpdrandom:generate_by_artist",
|
||||||
"static = larigira.audiogen_static:generate",
|
"static = larigira.audiogen_static:generate",
|
||||||
"http = larigira.audiogen_http:generate",
|
"http = larigira.audiogen_http:generate",
|
||||||
|
"podcast = larigira.audiogen_podcast:generate",
|
||||||
"randomdir = larigira.audiogen_randomdir:generate",
|
"randomdir = larigira.audiogen_randomdir:generate",
|
||||||
"mostrecent = larigira.audiogen_mostrecent:generate",
|
"mostrecent = larigira.audiogen_mostrecent:generate",
|
||||||
"script = larigira.audiogen_script:generate",
|
"script = larigira.audiogen_script:generate",
|
||||||
|
@ -95,6 +96,7 @@ setup(
|
||||||
"larigira.audioform_create": [
|
"larigira.audioform_create": [
|
||||||
"static = larigira.audioform_static:StaticAudioForm",
|
"static = larigira.audioform_static:StaticAudioForm",
|
||||||
"http = larigira.audioform_http:AudioForm",
|
"http = larigira.audioform_http:AudioForm",
|
||||||
|
"podcast = larigira.audioform_podcast:AudioForm",
|
||||||
"script = larigira.audioform_script:ScriptAudioForm",
|
"script = larigira.audioform_script:ScriptAudioForm",
|
||||||
"randomdir = larigira.audioform_randomdir:Form",
|
"randomdir = larigira.audioform_randomdir:Form",
|
||||||
"mostrecent = larigira.audioform_mostrecent:AudioForm",
|
"mostrecent = larigira.audioform_mostrecent:AudioForm",
|
||||||
|
@ -102,6 +104,7 @@ setup(
|
||||||
"larigira.audioform_receive": [
|
"larigira.audioform_receive": [
|
||||||
"static = larigira.audioform_static:staticaudio_receive",
|
"static = larigira.audioform_static:staticaudio_receive",
|
||||||
"http = larigira.audioform_http:audio_receive",
|
"http = larigira.audioform_http:audio_receive",
|
||||||
|
"podcast = larigira.audioform_podcast:audio_receive",
|
||||||
"script = larigira.audioform_script:scriptaudio_receive",
|
"script = larigira.audioform_script:scriptaudio_receive",
|
||||||
"randomdir = larigira.audioform_randomdir:receive",
|
"randomdir = larigira.audioform_randomdir:receive",
|
||||||
"mostrecent = larigira.audioform_mostrecent:audio_receive",
|
"mostrecent = larigira.audioform_mostrecent:audio_receive",
|
||||||
|
|
Loading…
Reference in a new issue