feed: pesca audio dai feed con molte magie
This commit is contained in:
commit
235d7a938f
1 changed files with 161 additions and 0 deletions
161
feed
Executable file
161
feed
Executable file
|
@ -0,0 +1,161 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
from argparse import ArgumentParser
|
||||
from subprocess import check_output
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
|
||||
from lxml import html
|
||||
import requests
|
||||
|
||||
|
||||
class Audio(object):
|
||||
def __init__(self, url, durata=None):
|
||||
self.url = url
|
||||
if durata is None:
|
||||
durata = get_duration(url.encode('utf-8'))
|
||||
self.durata = durata
|
||||
|
||||
def __str__(self):
|
||||
return self.url
|
||||
|
||||
def __repr__(self):
|
||||
return '<Audio {} ({})>'.format(self.url, self.durata)
|
||||
|
||||
|
||||
class AudioGroup(list):
|
||||
def __init__(self, description=None):
|
||||
self.description = description or ''
|
||||
self.audios = []
|
||||
|
||||
def __len__(self):
|
||||
return len(self.audios)
|
||||
|
||||
def append(self, arg):
|
||||
self.audios.append(arg)
|
||||
|
||||
def __str__(self):
|
||||
return '\n'.join(str(a) for a in self.audios)
|
||||
|
||||
def __repr__(self):
|
||||
return '<AudioGroup "{}" ({})\n{}>'.format(self.description,
|
||||
self.durata,
|
||||
'\n'.join(repr(a) for a in self.audios))
|
||||
|
||||
@property
|
||||
def durata(self):
|
||||
return sum(a.durata for a in self.audios if a.durata is not None)
|
||||
|
||||
|
||||
def get_tree(feed_url):
|
||||
if feed_url.startswith('http:') or feed_url.startswith('https:'):
|
||||
tree = html.fromstring(requests.get(feed_url).content)
|
||||
else:
|
||||
if not os.path.exists(feed_url):
|
||||
raise ValueError("file not found: {}".format(feed_url))
|
||||
tree = html.parse(open(feed_url))
|
||||
return tree
|
||||
|
||||
|
||||
def get_audio_from_description(text):
|
||||
# non-empty lines
|
||||
lines = [line.strip()
|
||||
for line in text.split('\n')
|
||||
if line.strip()]
|
||||
url = lines[0]
|
||||
durata = None
|
||||
if len(lines) > 1:
|
||||
durata = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
|
||||
return Audio(url, durata)
|
||||
|
||||
|
||||
def get_urls(tree):
|
||||
urls = tree.xpath('//item/description')
|
||||
for url_elem in urls:
|
||||
yield get_audio_from_description(url_elem.text)
|
||||
|
||||
|
||||
def get_grouped_urls(tree):
|
||||
groups = OrderedDict()
|
||||
items = tree.xpath('//item')
|
||||
for item in items:
|
||||
guid = item.xpath('guid')[0].text.strip()
|
||||
if guid not in groups:
|
||||
groups[guid] = AudioGroup(guid)
|
||||
groups[guid].append(get_audio_from_description(
|
||||
item.xpath('description')[0].text))
|
||||
return groups
|
||||
|
||||
|
||||
def get_duration(url):
|
||||
lineout = check_output(['ffprobe', '-v', 'error',
|
||||
'-show_entries', 'format=duration',
|
||||
'-i', url]).split(b'\n')
|
||||
duration = next(l for l in lineout if l.startswith(b'duration='))
|
||||
value = duration.split(b'=')[1]
|
||||
return int(float(value))
|
||||
|
||||
|
||||
def get_parser():
|
||||
p = ArgumentParser('Get music from a (well-specified) xml feed')
|
||||
p.add_argument('--start', default=0, type=int,
|
||||
help='0-indexed start number. '
|
||||
'By default, play from most recent')
|
||||
p.add_argument('--max-len', default=0, type=int,
|
||||
help='Exclude any audio that is longer than MAXLEN seconds')
|
||||
p.add_argument('--howmany', default=1, type=int,
|
||||
help='If not specified, only 1 will be played')
|
||||
p.add_argument('--slotsize', help='Seconds between each audio', type=int)
|
||||
p.add_argument('--group', help='Group articles', default=False,
|
||||
action='store_true')
|
||||
p.add_argument('--debug', help='Debug messages', default=False,
|
||||
action='store_true')
|
||||
p.add_argument('url')
|
||||
return p
|
||||
|
||||
|
||||
def main():
|
||||
args = get_parser().parse_args()
|
||||
# download the feed
|
||||
tree = get_tree(args.url)
|
||||
if not args.group:
|
||||
# get audio urls, removing those that are too long
|
||||
audios = [audio for audio in get_urls(tree)
|
||||
if args.max_len == 0 or
|
||||
audio.durata <= args.max_len]
|
||||
audios = audios[args.start:args.start+args.howmany]
|
||||
else:
|
||||
groups = get_grouped_urls(tree)
|
||||
audios = [groups[g] for g in groups.keys()
|
||||
if args.max_len == 0 or
|
||||
groups[g].durata <= args.max_len
|
||||
][args.start:args.start+args.howmany]
|
||||
|
||||
# the for loop excludes the last one
|
||||
# this is to support the --slotsize option
|
||||
if not audios:
|
||||
return
|
||||
for audio in audios[:-1]:
|
||||
if args.debug:
|
||||
print(repr(audio))
|
||||
else:
|
||||
print(audio)
|
||||
if args.slotsize is not None:
|
||||
duration = audio.durata
|
||||
if duration < args.slotsize:
|
||||
print('## musica per {} secondi'
|
||||
.format(args.slotsize - duration))
|
||||
# finally, the last one
|
||||
if args.debug:
|
||||
print(repr(audios[-1]))
|
||||
else:
|
||||
print(audios[-1])
|
||||
# else: # grouping; TODO: support slotsize
|
||||
# for item in groups:
|
||||
# if args.debug:
|
||||
# print('#', item, groups[item].durata)
|
||||
# print(groups[item])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in a new issue