123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 |
- #!/usr/bin/env python3
- import os
- from argparse import ArgumentParser
- from subprocess import check_output
- from collections import OrderedDict
- import re
- from lxml import html
- import requests
- class Audio(object):
- def __init__(self, url, durata=None):
- self.url = url
- if durata is None:
- durata = get_duration(url.encode('utf-8'))
- self.durata = durata
- def __str__(self):
- return self.url
- def __repr__(self):
- return '<Audio {} ({})>'.format(self.url, self.durata)
- class AudioGroup(list):
- def __init__(self, description=None):
- self.description = description or ''
- self.audios = []
- def __len__(self):
- return len(self.audios)
- def append(self, arg):
- self.audios.append(arg)
- def __str__(self):
- return '\n'.join(str(a) for a in self.audios)
- def __repr__(self):
- return '<AudioGroup "{}" ({})\n{}>'.format(self.description,
- self.durata,
- '\n'.join(repr(a) for a in self.audios))
- @property
- def durata(self):
- return sum(a.durata for a in self.audios if a.durata is not None)
- def get_tree(feed_url):
- if feed_url.startswith('http:') or feed_url.startswith('https:'):
- tree = html.fromstring(requests.get(feed_url).content)
- else:
- if not os.path.exists(feed_url):
- raise ValueError("file not found: {}".format(feed_url))
- tree = html.parse(open(feed_url))
- return tree
- def get_audio_from_description(text):
- # non-empty lines
- lines = [line.strip()
- for line in text.split('\n')
- if line.strip()]
- url = lines[0]
- durata = None
- if len(lines) > 1:
- durata = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
- return Audio(url, durata)
- def get_urls(tree):
- urls = tree.xpath('//item/description')
- for url_elem in urls:
- yield get_audio_from_description(url_elem.text)
- def get_grouped_urls(tree):
- groups = OrderedDict()
- items = tree.xpath('//item')
- for item in items:
- guid = item.xpath('guid')[0].text.strip()
- if guid not in groups:
- groups[guid] = AudioGroup(guid)
- groups[guid].append(get_audio_from_description(
- item.xpath('description')[0].text))
- return groups
- def get_duration(url):
- lineout = check_output(['ffprobe', '-v', 'error',
- '-show_entries', 'format=duration',
- '-i', url]).split(b'\n')
- duration = next(l for l in lineout if l.startswith(b'duration='))
- value = duration.split(b'=')[1]
- return int(float(value))
- def get_parser():
- p = ArgumentParser('Get music from a (well-specified) xml feed')
- p.add_argument('--start', default=0, type=int,
- help='0-indexed start number. '
- 'By default, play from most recent')
- p.add_argument('--max-len', default=0, type=int,
- help='Exclude any audio that is longer than MAXLEN seconds')
- p.add_argument('--howmany', default=1, type=int,
- help='If not specified, only 1 will be played')
- p.add_argument('--slotsize', help='Seconds between each audio', type=int)
- p.add_argument('--group', help='Group articles', default=False,
- action='store_true')
- p.add_argument('--debug', help='Debug messages', default=False,
- action='store_true')
- p.add_argument('url')
- return p
- def main():
- args = get_parser().parse_args()
- # download the feed
- tree = get_tree(args.url)
- if not args.group:
- # get audio urls, removing those that are too long
- audios = [audio for audio in get_urls(tree)
- if args.max_len == 0 or
- audio.durata <= args.max_len]
- audios = audios[args.start:args.start+args.howmany]
- else:
- groups = get_grouped_urls(tree)
- audios = [groups[g] for g in groups.keys()
- if args.max_len == 0 or
- groups[g].durata <= args.max_len
- ][args.start:args.start+args.howmany]
- # the for loop excludes the last one
- # this is to support the --slotsize option
- if not audios:
- return
- for audio in audios[:-1]:
- if args.debug:
- print(repr(audio))
- else:
- print(audio)
- if args.slotsize is not None:
- duration = audio.durata
- if duration < args.slotsize:
- print('## musica per {} secondi'
- .format(args.slotsize - duration))
- # finally, the last one
- if args.debug:
- print(repr(audios[-1]))
- else:
- print(audios[-1])
- # else: # grouping; TODO: support slotsize
- # for item in groups:
- # if args.debug:
- # print('#', item, groups[item].durata)
- # print(groups[item])
- if __name__ == '__main__':
- main()
|