278 خطوط
8.6 KiB
Python
Executable file
278 خطوط
8.6 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
'''
|
|
Feed parser with many features
|
|
|
|
from a feed, it supports filtering, subslicing, random picking
|
|
|
|
Beside feeds, it supports picking files from directories
|
|
'''
|
|
import os
|
|
import logging
|
|
from argparse import ArgumentParser
|
|
from subprocess import check_output
|
|
from collections import OrderedDict
|
|
import re
|
|
import urllib.request
|
|
from urllib.parse import urlparse, unquote
|
|
import posixpath
|
|
import random
|
|
from bisect import bisect
|
|
|
|
from lxml import html
|
|
import requests
|
|
|
|
|
|
def weighted_choice(values, weights):
|
|
'''
|
|
random.choice with weights
|
|
|
|
weights must be integers greater than 0.
|
|
|
|
Their meaning is "relative", that is [1,2,3] is the same as [2,4,6]
|
|
'''
|
|
assert len(values) == len(weights)
|
|
total = 0
|
|
cum_weights = []
|
|
for w in weights:
|
|
total += w
|
|
cum_weights.append(total)
|
|
x = random.random() * total
|
|
i = bisect(cum_weights, x)
|
|
return values[i]
|
|
|
|
|
|
class Audio(object):
|
|
def __init__(self, url, duration=None):
|
|
self.url = url
|
|
if duration is None:
|
|
duration = get_duration(url.encode('utf-8'))
|
|
self.duration = duration
|
|
|
|
def __str__(self):
|
|
return self.url
|
|
|
|
def __repr__(self):
|
|
return '<Audio {} ({})>'.format(self.url, self.duration)
|
|
|
|
@property
|
|
def urls(self):
|
|
return [self.url]
|
|
|
|
|
|
class AudioGroup(list):
|
|
def __init__(self, description=None):
|
|
self.description = description or ''
|
|
self.audios = []
|
|
|
|
def __len__(self):
|
|
return len(self.audios)
|
|
|
|
def append(self, arg):
|
|
self.audios.append(arg)
|
|
|
|
def __str__(self):
|
|
return '\n'.join(str(a) for a in self.audios)
|
|
|
|
def __repr__(self):
|
|
return '<AudioGroup "{}" ({})\n{} >'.\
|
|
format(self.description, self.duration,
|
|
'\n'.join(' ' + repr(a) for a in self.audios))
|
|
|
|
@property
|
|
def duration(self):
|
|
return sum(a.duration for a in self.audios if a.duration is not None)
|
|
|
|
@property
|
|
def urls(self):
|
|
return [a.url for a in self.audios]
|
|
|
|
|
|
def get_tree(feed_url):
|
|
if feed_url.startswith('http:') or feed_url.startswith('https:'):
|
|
tree = html.fromstring(requests.get(feed_url).content)
|
|
else:
|
|
if not os.path.exists(feed_url):
|
|
raise ValueError("file not found: {}".format(feed_url))
|
|
tree = html.parse(open(feed_url))
|
|
return tree
|
|
|
|
|
|
def get_audio_from_description(text):
|
|
# non-empty lines
|
|
lines = [line.strip()
|
|
for line in text.split('\n')
|
|
if line.strip()]
|
|
url = lines[0]
|
|
duration = None
|
|
if len(lines) > 1:
|
|
duration = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
|
|
return Audio(unquote(url), duration)
|
|
|
|
|
|
# copied from larigira.fsutils
|
|
def scan_dir_audio(dirname, extensions=('mp3', 'oga', 'wav', 'ogg')):
|
|
for root, dirnames, filenames in os.walk(dirname):
|
|
for fname in filenames:
|
|
if fname.split('.')[-1].lower() in extensions:
|
|
yield os.path.join(root, fname)
|
|
|
|
|
|
def get_audio_from_dir(dirpath):
|
|
fpaths = scan_dir_audio(dirpath)
|
|
return [Audio('file://' + os.path.realpath(u)) for u in fpaths]
|
|
|
|
|
|
def get_urls(tree):
|
|
urls = tree.xpath('//item/description')
|
|
for url_elem in urls:
|
|
yield get_audio_from_description(url_elem.text)
|
|
|
|
|
|
def get_grouped_urls(tree):
|
|
groups = OrderedDict()
|
|
items = tree.xpath('//item')
|
|
for item in items:
|
|
guid = item.xpath('guid')[0].text.strip()
|
|
if guid not in groups:
|
|
groups[guid] = AudioGroup(guid)
|
|
groups[guid].append(get_audio_from_description(
|
|
item.xpath('description')[0].text))
|
|
return groups
|
|
|
|
|
|
def get_duration(url):
|
|
lineout = check_output(['ffprobe', '-v', 'error',
|
|
'-show_entries', 'format=duration',
|
|
'-i', url]).split(b'\n')
|
|
duration = next(l for l in lineout if l.startswith(b'duration='))
|
|
value = duration.split(b'=')[1]
|
|
return int(float(value))
|
|
|
|
|
|
def get_parser():
|
|
p = ArgumentParser('Get music from a (well-specified) xml feed')
|
|
src = p.add_argument_group('sources', 'How to deal with sources')
|
|
p.add_argument('--source-weights',
|
|
help='Select only one "source" based on this weights')
|
|
|
|
filters = p.add_argument_group('filters', 'Select only items that match these conditions')
|
|
filters.add_argument('--max-len', default=0, type=int,
|
|
help='Exclude any audio that is longer than MAXLEN seconds')
|
|
filters.add_argument('--random', default=False,
|
|
action='store_true', help='Pick randomly')
|
|
|
|
p.add_argument('--start', default=0, type=int,
|
|
help='0-indexed start number. '
|
|
'By default, play from most recent')
|
|
p.add_argument('--howmany', default=1, type=int,
|
|
help='If not specified, only 1 will be played')
|
|
p.add_argument('--slotsize', help='Seconds between each audio', type=int)
|
|
p.add_argument('--group', help='Group articles', default=False,
|
|
action='store_true')
|
|
p.add_argument('--copy', help='Copy files to $TMPDIR', default=False,
|
|
action='store_true')
|
|
p.add_argument('--debug', help='Debug messages', default=False,
|
|
action='store_true')
|
|
p.add_argument('urls', metavar='URL', nargs='+')
|
|
return p
|
|
|
|
|
|
def put(audio, copy=False):
|
|
if not copy:
|
|
for url in audio.urls:
|
|
print(url)
|
|
else:
|
|
for url in audio.urls:
|
|
if url.split(':')[0] in ('http', 'https'):
|
|
destdir = (os.environ.get('TMPDIR', '.'))
|
|
fname = posixpath.basename(urlparse(url).path)
|
|
# sanitize
|
|
fname = "".join(c for c in fname
|
|
if c.isalnum() or c in list('._-')).rstrip()
|
|
dest = os.path.join(destdir, fname)
|
|
os.makedirs(destdir, exist_ok=True)
|
|
fname, headers = urllib.request.urlretrieve(url, dest)
|
|
print('file://%s' % os.path.realpath(fname))
|
|
else:
|
|
# FIXME: file:// urls are just copied
|
|
print(url)
|
|
|
|
|
|
def main():
|
|
parser = get_parser()
|
|
args = parser.parse_args()
|
|
if not args.debug:
|
|
logging.basicConfig(level=logging.WARNING)
|
|
else:
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
sources = args.urls
|
|
|
|
if args.source_weights:
|
|
weights = tuple(map(int, args.source_weights.split(':')))
|
|
if len(weights) != len(sources):
|
|
parser.exit(status=2, message='Weight must be in the'
|
|
' same number as sources\n')
|
|
sources = [weighted_choice(sources, weights)]
|
|
|
|
audios = []
|
|
for url in sources:
|
|
if url.startswith('http:') or url.startswith('https:') \
|
|
or os.path.isfile(url):
|
|
# download the feed
|
|
tree = get_tree(url)
|
|
if not args.group:
|
|
# get audio urls, removing those that are too long
|
|
audios += [audio for audio in get_urls(tree)
|
|
if args.max_len == 0 or
|
|
audio.duration <= args.max_len]
|
|
else:
|
|
groups = get_grouped_urls(tree)
|
|
audios += [groups[g] for g in groups.keys()
|
|
if args.max_len == 0 or
|
|
groups[g].duration <= args.max_len
|
|
]
|
|
elif os.path.isdir(url):
|
|
audiodir = get_audio_from_dir(url)
|
|
if not args.group:
|
|
audios += audiodir
|
|
else:
|
|
for a in audiodir:
|
|
ag = AudioGroup(os.path.basename(a.url))
|
|
ag.append(a)
|
|
audios.append(ag)
|
|
else:
|
|
logging.info('unsupported url `%s`', url)
|
|
|
|
audios = audios[args.start:]
|
|
if args.random:
|
|
random.shuffle(audios)
|
|
audios = audios[:args.howmany]
|
|
|
|
# the for loop excludes the last one
|
|
# this is to support the --slotsize option
|
|
if not audios:
|
|
return
|
|
for audio in audios[:-1]:
|
|
if args.debug:
|
|
print(repr(audio))
|
|
else:
|
|
put(audio, args.copy)
|
|
if args.slotsize is not None:
|
|
duration = audio.duration
|
|
if duration < args.slotsize:
|
|
print('## musica per {} secondi'
|
|
.format(args.slotsize - duration))
|
|
# finally, the last one
|
|
if args.debug:
|
|
print(repr(audios[-1]))
|
|
else:
|
|
put(audios[-1], args.copy)
|
|
# else: # grouping; TODO: support slotsize
|
|
# for item in groups:
|
|
# if args.debug:
|
|
# print('#', item, groups[item].duration)
|
|
# print(groups[item])
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|