larigira-scripts/feed

#!/usr/bin/env python3
import os
from argparse import ArgumentParser
from subprocess import check_output
from collections import OrderedDict
import re

from lxml import html
import requests


class Audio(object):
    def __init__(self, url, durata=None):
        self.url = url
        if durata is None:
            durata = get_duration(url.encode('utf-8'))
        self.durata = durata

    def __str__(self):
        return self.url

    def __repr__(self):
        return '<Audio {} ({})>'.format(self.url, self.durata)


class AudioGroup(list):
    def __init__(self, description=None):
        self.description = description or ''
        self.audios = []

    def __len__(self):
        return len(self.audios)

    def append(self, arg):
        self.audios.append(arg)

    def __str__(self):
        return '\n'.join(str(a) for a in self.audios)

    def __repr__(self):
        return '<AudioGroup "{}" ({})\n{}>'.format(self.description,
                                                   self.durata,
                                                   '\n'.join(repr(a) for a in self.audios))

    @property
    def durata(self):
        return sum(a.durata for a in self.audios if a.durata is not None)


def get_tree(feed_url):
    if feed_url.startswith('http:') or feed_url.startswith('https:'):
        tree = html.fromstring(requests.get(feed_url).content)
    else:
        if not os.path.exists(feed_url):
            raise ValueError("file not found: {}".format(feed_url))
        tree = html.parse(open(feed_url))
    return tree


def get_audio_from_description(text):
    # non-empty lines
    lines = [line.strip()
             for line in text.split('\n')
             if line.strip()]
    url = lines[0]
    durata = None
    if len(lines) > 1:
        durata = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
    return Audio(url, durata)


def get_urls(tree):
    urls = tree.xpath('//item/description')
    for url_elem in urls:
        yield get_audio_from_description(url_elem.text)


def get_grouped_urls(tree):
    groups = OrderedDict()
    items = tree.xpath('//item')
    for item in items:
        guid = item.xpath('guid')[0].text.strip()
        if guid not in groups:
            groups[guid] = AudioGroup(guid)
        groups[guid].append(get_audio_from_description(
            item.xpath('description')[0].text))
    return groups


def get_duration(url):
    lineout = check_output(['ffprobe', '-v', 'error',
                            '-show_entries', 'format=duration',
                            '-i', url]).split(b'\n')
    duration = next(l for l in lineout if l.startswith(b'duration='))
    value = duration.split(b'=')[1]
    return int(float(value))


def get_parser():
    p = ArgumentParser('Get music from a (well-specified) xml feed')
    p.add_argument('--start', default=0, type=int,
                   help='0-indexed start number. '
                   'By default, play from most recent')
    p.add_argument('--max-len', default=0, type=int,
                   help='Exclude any audio that is longer than MAXLEN seconds')
    p.add_argument('--howmany', default=1, type=int,
                   help='If not specified, only 1 will be played')
    p.add_argument('--slotsize', help='Seconds between each audio', type=int)
    p.add_argument('--group', help='Group articles', default=False,
                   action='store_true')
    p.add_argument('--debug', help='Debug messages', default=False,
                   action='store_true')
    p.add_argument('url')
    return p


def main():
    args = get_parser().parse_args()
    # download the feed
    tree = get_tree(args.url)
    if not args.group:
        # get audio urls, removing those that are too long
        audios = [audio for audio in get_urls(tree)
                  if args.max_len == 0 or
                  audio.durata <= args.max_len]
        audios = audios[args.start:args.start+args.howmany]
    else:
        groups = get_grouped_urls(tree)
        audios = [groups[g] for g in groups.keys()
                  if args.max_len == 0 or
                  groups[g].durata <= args.max_len
                 ][args.start:args.start+args.howmany]

    # the for loop excludes the last one
    # this is to support  the --slotsize option
    if not audios:
        return
    for audio in audios[:-1]:
        if args.debug:
            print(repr(audio))
        else:
            print(audio)
        if args.slotsize is not None:
            duration = audio.durata
            if duration < args.slotsize:
                print('## musica per {} secondi'
                      .format(args.slotsize - duration))
    # finally, the last one
    if args.debug:
        print(repr(audios[-1]))
    else:
        print(audios[-1])
#     else:  # grouping; TODO: support slotsize
#         for item in groups:
#             if args.debug:
#                 print('#', item, groups[item].durata)
#             print(groups[item])


if __name__ == '__main__':
    main()
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`#!/usr/bin/env python3`
			`import os`
			`from argparse import ArgumentParser`
			`from subprocess import check_output`
			`from collections import OrderedDict`
			`import re`

			`from lxml import html`
			`import requests`


			`class Audio(object):`
			`def __init__(self, url, durata=None):`
			`self.url = url`
			`if durata is None:`
			`durata = get_duration(url.encode('utf-8'))`
			`self.durata = durata`

			`def __str__(self):`
			`return self.url`

			`def __repr__(self):`
			`return '<Audio {} ({})>'.format(self.url, self.durata)`


			`class AudioGroup(list):`
			`def __init__(self, description=None):`
			`self.description = description or ''`
			`self.audios = []`

			`def __len__(self):`
			`return len(self.audios)`

			`def append(self, arg):`
			`self.audios.append(arg)`

			`def __str__(self):`
			`return '\n'.join(str(a) for a in self.audios)`

			`def __repr__(self):`
			`return '<AudioGroup "{}" ({})\n{}>'.format(self.description,`
			`self.durata,`
			`'\n'.join(repr(a) for a in self.audios))`

			`@property`
			`def durata(self):`
			`return sum(a.durata for a in self.audios if a.durata is not None)`


			`def get_tree(feed_url):`
			`if feed_url.startswith('http:') or feed_url.startswith('https:'):`
			`tree = html.fromstring(requests.get(feed_url).content)`
			`else:`
			`if not os.path.exists(feed_url):`
			`raise ValueError("file not found: {}".format(feed_url))`
			`tree = html.parse(open(feed_url))`
			`return tree`


			`def get_audio_from_description(text):`
			`# non-empty lines`
			`lines = [line.strip()`
			`for line in text.split('\n')`
			`if line.strip()]`
			`url = lines[0]`
			`durata = None`
			`if len(lines) > 1:`
			`durata = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])`
			`return Audio(url, durata)`


			`def get_urls(tree):`
			`urls = tree.xpath('//item/description')`
			`for url_elem in urls:`
			`yield get_audio_from_description(url_elem.text)`


			`def get_grouped_urls(tree):`
			`groups = OrderedDict()`
			`items = tree.xpath('//item')`
			`for item in items:`
			`guid = item.xpath('guid')[0].text.strip()`
			`if guid not in groups:`
			`groups[guid] = AudioGroup(guid)`
			`groups[guid].append(get_audio_from_description(`
			`item.xpath('description')[0].text))`
			`return groups`


			`def get_duration(url):`
			`lineout = check_output(['ffprobe', '-v', 'error',`
			`'-show_entries', 'format=duration',`
			`'-i', url]).split(b'\n')`
			`duration = next(l for l in lineout if l.startswith(b'duration='))`
			`value = duration.split(b'=')[1]`
			`return int(float(value))`


			`def get_parser():`
			`p = ArgumentParser('Get music from a (well-specified) xml feed')`
			`p.add_argument('--start', default=0, type=int,`
			`help='0-indexed start number. '`
			`'By default, play from most recent')`
			`p.add_argument('--max-len', default=0, type=int,`
			`help='Exclude any audio that is longer than MAXLEN seconds')`
			`p.add_argument('--howmany', default=1, type=int,`
			`help='If not specified, only 1 will be played')`
			`p.add_argument('--slotsize', help='Seconds between each audio', type=int)`
			`p.add_argument('--group', help='Group articles', default=False,`
			`action='store_true')`
			`p.add_argument('--debug', help='Debug messages', default=False,`
			`action='store_true')`
			`p.add_argument('url')`
			`return p`


			`def main():`
			`args = get_parser().parse_args()`
			`# download the feed`
			`tree = get_tree(args.url)`
			`if not args.group:`
			`# get audio urls, removing those that are too long`
			`audios = [audio for audio in get_urls(tree)`
			`if args.max_len == 0 or`
			`audio.durata <= args.max_len]`
			`audios = audios[args.start:args.start+args.howmany]`
			`else:`
			`groups = get_grouped_urls(tree)`
			`audios = [groups[g] for g in groups.keys()`
			`if args.max_len == 0 or`
			`groups[g].durata <= args.max_len`
			`][args.start:args.start+args.howmany]`

			`# the for loop excludes the last one`
			`# this is to support the --slotsize option`
			`if not audios:`
			`return`
			`for audio in audios[:-1]:`
			`if args.debug:`
			`print(repr(audio))`
			`else:`
			`print(audio)`
			`if args.slotsize is not None:`
			`duration = audio.durata`
			`if duration < args.slotsize:`
			`print('## musica per {} secondi'`
			`.format(args.slotsize - duration))`
			`# finally, the last one`
			`if args.debug:`
			`print(repr(audios[-1]))`
			`else:`
			`print(audios[-1])`
			`# else: # grouping; TODO: support slotsize`
			`# for item in groups:`
			`# if args.debug:`
			`# print('#', item, groups[item].durata)`
			`# print(groups[item])`


			`if __name__ == '__main__':`
			`main()`