larigira-scripts/feed

#!/usr/bin/env python3
'''
Feed parser with many features

from a feed, it supports filtering, subslicing, random picking

Beside feeds, it supports picking files from directories
'''
import os
import logging
from argparse import ArgumentParser, ArgumentTypeError
from subprocess import check_output
from collections import OrderedDict
import re
import urllib.request
from urllib.parse import urlparse, unquote
import posixpath
import random
from bisect import bisect
import datetime

from lxml import html
import requests
from pytimeparse.timeparse import timeparse


def TimeDeltaType(arg):
    secs = timeparse(arg)
    if secs is None:
        raise ArgumentTypeError('%r is not a valid time range' % arg)
    return datetime.timedelta(seconds=timeparse(arg))


def weighted_choice(values, weights):
    '''
    random.choice with weights

    weights must be integers greater than 0.

    Their meaning is "relative", that is [1,2,3] is the same as [2,4,6]
    '''
    assert len(values) == len(weights)
    total = 0
    cum_weights = []
    for w in weights:
        total += w
        cum_weights.append(total)
    x = random.random() * total
    i = bisect(cum_weights, x)
    return values[i]


def delta_humanreadable(tdelta):
    if tdelta is None:
        return ''
    days = tdelta.days
    hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600
    if days:
        return '{}d{}h'.format(days, hours)
    return '{}h'.format(hours)


class Audio(object):
    def __init__(self, url, duration=None, date=None):
        self.url = url
        if duration is None:
            duration = get_duration(url.encode('utf-8'))
        self.duration = duration
        self.date = date

    def __str__(self):
        return self.url

    def __repr__(self):
        return '<Audio {} ({} {})>'.format(self.url, self.duration,
                                           delta_humanreadable(self.age))

    @property
    def urls(self):
        return [self.url]

    @property
    def age(self):
        if self.date is None:
            return None
        now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)

        return now - self.date


class AudioGroup(list):
    def __init__(self, description=None):
        self.description = description or ''
        self.audios = []

    def __len__(self):
        return len(self.audios)

    def append(self, arg):
        self.audios.append(arg)

    def __str__(self):
        return '\n'.join(str(a) for a in self.audios)

    def __repr__(self):
        return '<AudioGroup "{}" ({} {})\n{} >'.\
                format(self.description, self.duration,
                       delta_humanreadable(self.age),
                       '\n'.join('   ' + repr(a) for a in self.audios))

    @property
    def duration(self):
        return sum(a.duration for a in self.audios if a.duration is not None)

    @property
    def urls(self):
        return [a.url for a in self.audios]

    @property
    def date(self):
        for a in self.audios:
            if hasattr(a, 'date'):
                return a.date
        return None

    @property
    def age(self):
        if self.date is None:
            return None
        now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)

        return now - self.date


def get_tree(feed_url):
    if feed_url.startswith('http:') or feed_url.startswith('https:'):
        tree = html.fromstring(requests.get(feed_url).content)
    else:
        if not os.path.exists(feed_url):
            raise ValueError("file not found: {}".format(feed_url))
        tree = html.parse(open(feed_url))
    return tree


def get_audio_from_description(text):
    # non-empty lines
    lines = [line.strip()
             for line in text.split('\n')
             if line.strip()]
    url = lines[0]
    duration = None
    if len(lines) > 1:
        duration = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
    return Audio(unquote(url), duration)


# copied from larigira.fsutils
def scan_dir_audio(dirname, extensions=('mp3', 'oga', 'wav', 'ogg')):
    for root, dirnames, filenames in os.walk(dirname):
        for fname in filenames:
            if fname.split('.')[-1].lower() in extensions:
                yield os.path.join(root, fname)


def get_audio_from_dir(dirpath):
    fpaths = scan_dir_audio(dirpath)
    return [Audio('file://' + os.path.realpath(u)) for u in fpaths]


def get_item_date(el):
    el_date = el.find('pubdate')
    if el_date is not None:
        return datetime.datetime.strptime(
            el_date.text, '%Y-%m-%dT%H:%M:%S%z')
    return None


def get_urls(tree):
    items = tree.xpath('//item')
    for it in items:
        el_body = it.find('description')
        if el_body is not None:
            audio = get_audio_from_description(el_body.text)
            audio.date = get_item_date(it)
            yield audio


def get_grouped_urls(tree):
    groups = OrderedDict()
    items = tree.xpath('//item')
    for item in items:
        guid = item.xpath('guid')[0].text.strip()
        if guid not in groups:
            groups[guid] = AudioGroup(guid)
        audio = get_audio_from_description(item.xpath('description')[0].text)
        audio.date = get_item_date(item)
        groups[guid].append(audio)
    return groups


def get_duration(url):
    lineout = check_output(['ffprobe', '-v', 'error',
                            '-show_entries', 'format=duration',
                            '-i', url]).split(b'\n')
    duration = next(l for l in lineout if l.startswith(b'duration='))
    value = duration.split(b'=')[1]
    return int(float(value))


HELP = '''
Collect audio informations from multiple sources (XML feeds).
Audios are (in that order):
 1. Collected from feeds; (grouped by article if --group is used)
 2. Filtered; everything that does not match with requirements is excluded
 3. Sorted; even randomly
 4. Sliced; take HOWMANY elements, skipping START elements
 5. (if --copy) Copied
Usage: '''


def get_parser():
    p = ArgumentParser(HELP)
    src = p.add_argument_group('sources', 'How to deal with sources')
    src.add_argument('--source-weights',
                     help='Select only one "source" based on this weights')
    src.add_argument('--group', default=False, action='store_true',
                     help='Group audios that belong to the same article')

    filters = p.add_argument_group('filters', 'Select only items that match '
                                   'these conditions')
    filters.add_argument('--max-len', default=0, type=int,
                         help='Exclude any audio that is longer '
                         'than MAX_LEN seconds')
    filters.add_argument('--random', default=False,
                         action='store_true', help='Pick randomly')
    filters.add_argument('--min-len', default=0, type=int,
                         help='Exclude any audio that is shorter '
                         'than MIN_LEN seconds')

    filters.add_argument('--min-age', default=datetime.timedelta(),
                         type=TimeDeltaType,
                         help='Exclude audio more recent than MIN_AGE')
    filters.add_argument('--max-age', default=datetime.timedelta(),
                         type=TimeDeltaType,
                         help='Exclude audio older than MAX_AGE')

    p.add_argument('--start', default=0, type=int,
                   help='0-indexed start number. '
                   'By default, play from most recent')
    p.add_argument('--howmany', default=1, type=int,
                   help='If not specified, only 1 will be played')
    p.add_argument('--slotsize', type=int,
                   help='Seconds between each audio. Still unsupported')

    general = p.add_argument_group('general', 'General options')
    general.add_argument('--copy', help='Copy files to $TMPDIR', default=False,
                         action='store_true')
    general.add_argument('--debug', help='Debug messages', default=False,
                         action='store_true')

    p.add_argument('urls', metavar='URL', nargs='+')
    return p


def put(audio, copy=False):
    if not copy:
        for url in audio.urls:
            print(url)
    else:
        for url in audio.urls:
            if url.split(':')[0] in ('http', 'https'):
                destdir = (os.environ.get('TMPDIR', '.'))
                fname = posixpath.basename(urlparse(url).path)
                # sanitize
                fname = "".join(c for c in fname
                                if c.isalnum() or c in list('._-')).rstrip()
                dest = os.path.join(destdir, fname)
                os.makedirs(destdir, exist_ok=True)
                fname, headers = urllib.request.urlretrieve(url, dest)
                print('file://%s' % os.path.realpath(fname))
            else:
                # FIXME: file:// urls are just copied
                print(url)


def main():
    parser = get_parser()
    args = parser.parse_args()
    if not args.debug:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.DEBUG)
    sources = args.urls

    if args.source_weights:
        weights = tuple(map(int, args.source_weights.split(':')))
        if len(weights) != len(sources):
            parser.exit(status=2, message='Weight must be in the'
                        ' same number as sources\n')
        sources = [weighted_choice(sources, weights)]

    audios = []
    for url in sources:
        if url.startswith('http:') or url.startswith('https:') \
           or os.path.isfile(url):
            # download the feed
            tree = get_tree(url)
            if not args.group:
                # get audio urls, removing those that are too long
                audios += [audio for audio in get_urls(tree) if
                           (args.max_len == 0 or
                            audio.duration <= args.max_len) and
                           (args.min_len == 0 or
                            audio.duration >= args.min_len) and
                           (args.min_age.total_seconds() == 0 or
                            audio.age >= args.min_age) and
                           (args.max_age.total_seconds() == 0 or
                            audio.age <= args.max_age)
                           ]
            else:
                groups = get_grouped_urls(tree)
                audios += [groups[g] for g in groups.keys()
                           if
                           (args.max_len == 0 or
                            groups[g].duration <= args.max_len) and
                           (args.min_len == 0 or
                            groups[g].duration >= args.max_len) and
                           (args.min_age.total_seconds() == 0 or
                            groups[g].age >= args.min_age) and
                           (args.max_age.total_seconds() == 0 or
                            groups[g].age <= args.max_age)
                           ]
        elif os.path.isdir(url):
            audiodir = get_audio_from_dir(url)
            if not args.group:
                audios += audiodir
            else:
                for a in audiodir:
                    ag = AudioGroup(os.path.basename(a.url))
                    ag.append(a)
                    audios.append(ag)
        else:
            logging.info('unsupported url `%s`', url)

    audios = audios[args.start:]
    if args.random:
        random.shuffle(audios)
    audios = audios[:args.howmany]

    # the for loop excludes the last one
    # this is to support  the --slotsize option
    if not audios:
        return
    for audio in audios[:-1]:
        if args.debug:
            print(repr(audio))
        else:
            put(audio, args.copy)
        if args.slotsize is not None:
            duration = audio.duration
            if duration < args.slotsize:
                print('## musica per {} secondi'
                      .format(args.slotsize - duration))
    # finally, the last one
    if args.debug:
        print(repr(audios[-1]))
    else:
        put(audios[-1], args.copy)
#     else:  # grouping; TODO: support slotsize
#         for item in groups:
#             if args.debug:
#                 print('#', item, groups[item].duration)
#             print(groups[item])


if __name__ == '__main__':
    main()
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`#!/usr/bin/env python3`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`'''`
			`Feed parser with many features`

			`from a feed, it supports filtering, subslicing, random picking`

			`Beside feeds, it supports picking files from directories`
			`'''`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`import os`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`import logging`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`from argparse import ArgumentParser, ArgumentTypeError`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`from subprocess import check_output`
			`from collections import OrderedDict`
			`import re`
add --copy 2017-05-25 16:52:57 +02:00			`import urllib.request`
feed --copy: sanitize url 2017-05-25 19:22:00 +02:00			`from urllib.parse import urlparse, unquote`
			`import posixpath`
--random 2017-06-12 23:26:43 +02:00			`import random`
FIX #1: --source-weights source weights are meant to first of all pick a source, with the defined weights. This option is unrelated to --random. In fact, --random will switch from chronological order to random one. --source-weights will, instead, pick only one of the sources given in command line. A typical example is for jingles: if you have a big number of jingles that should run relatively rarely, and a small number of jingles that should run often, then putting them all in the same directory isn't a good idea. You'd better put them in two different dirs and use --source-weights 1:2 2017-12-09 17:51:56 +01:00			`from bisect import bisect`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`import datetime`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
			`from lxml import html`
			`import requests`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`from pytimeparse.timeparse import timeparse`


			`def TimeDeltaType(arg):`
			`secs = timeparse(arg)`
			`if secs is None:`
			`raise ArgumentTypeError('%r is not a valid time range' % arg)`
			`return datetime.timedelta(seconds=timeparse(arg))`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00

FIX #1: --source-weights source weights are meant to first of all pick a source, with the defined weights. This option is unrelated to --random. In fact, --random will switch from chronological order to random one. --source-weights will, instead, pick only one of the sources given in command line. A typical example is for jingles: if you have a big number of jingles that should run relatively rarely, and a small number of jingles that should run often, then putting them all in the same directory isn't a good idea. You'd better put them in two different dirs and use --source-weights 1:2 2017-12-09 17:51:56 +01:00			`def weighted_choice(values, weights):`
			`'''`
			`random.choice with weights`

			`weights must be integers greater than 0.`

			`Their meaning is "relative", that is [1,2,3] is the same as [2,4,6]`
			`'''`
			`assert len(values) == len(weights)`
			`total = 0`
			`cum_weights = []`
			`for w in weights:`
			`total += w`
			`cum_weights.append(total)`
			`x = random.random() * total`
			`i = bisect(cum_weights, x)`
			`return values[i]`


Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`def delta_humanreadable(tdelta):`
			`if tdelta is None:`
			`return ''`
			`days = tdelta.days`
			`hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600`
			`if days:`
			`return '{}d{}h'.format(days, hours)`
			`return '{}h'.format(hours)`


feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`class Audio(object):`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`def __init__(self, url, duration=None, date=None):`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`self.url = url`
minor: english variable names 2018-02-13 20:28:01 +01:00			`if duration is None:`
			`duration = get_duration(url.encode('utf-8'))`
			`self.duration = duration`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`self.date = date`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
			`def __str__(self):`
			`return self.url`

			`def __repr__(self):`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`return '<Audio {} ({} {})>'.format(self.url, self.duration,`
			`delta_humanreadable(self.age))`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
fix group 2017-06-12 23:26:36 +02:00			`@property`
			`def urls(self):`
			`return [self.url]`

Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`@property`
			`def age(self):`
			`if self.date is None:`
			`return None`
			`now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)`

			`return now - self.date`

feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
			`class AudioGroup(list):`
			`def __init__(self, description=None):`
			`self.description = description or ''`
			`self.audios = []`

			`def __len__(self):`
			`return len(self.audios)`

			`def append(self, arg):`
			`self.audios.append(arg)`

			`def __str__(self):`
			`return '\n'.join(str(a) for a in self.audios)`

			`def __repr__(self):`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`return '<AudioGroup "{}" ({} {})\n{} >'.\`
minor: english variable names 2018-02-13 20:28:01 +01:00			`format(self.description, self.duration,`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`delta_humanreadable(self.age),`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`'\n'.join(' ' + repr(a) for a in self.audios))`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
			`@property`
minor: english variable names 2018-02-13 20:28:01 +01:00			`def duration(self):`
			`return sum(a.duration for a in self.audios if a.duration is not None)`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
fix group 2017-06-12 23:26:36 +02:00			`@property`
			`def urls(self):`
			`return [a.url for a in self.audios]`

Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`@property`
			`def date(self):`
			`for a in self.audios:`
			`if hasattr(a, 'date'):`
			`return a.date`
			`return None`

			`@property`
			`def age(self):`
			`if self.date is None:`
			`return None`
			`now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)`

			`return now - self.date`


feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
			`def get_tree(feed_url):`
			`if feed_url.startswith('http:') or feed_url.startswith('https:'):`
			`tree = html.fromstring(requests.get(feed_url).content)`
			`else:`
			`if not os.path.exists(feed_url):`
			`raise ValueError("file not found: {}".format(feed_url))`
			`tree = html.parse(open(feed_url))`
			`return tree`


			`def get_audio_from_description(text):`
			`# non-empty lines`
			`lines = [line.strip()`
			`for line in text.split('\n')`
			`if line.strip()]`
			`url = lines[0]`
minor: english variable names 2018-02-13 20:28:01 +01:00			`duration = None`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`if len(lines) > 1:`
minor: english variable names 2018-02-13 20:28:01 +01:00			`duration = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])`
			`return Audio(unquote(url), duration)`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00

feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`# copied from larigira.fsutils`
			`def scan_dir_audio(dirname, extensions=('mp3', 'oga', 'wav', 'ogg')):`
			`for root, dirnames, filenames in os.walk(dirname):`
			`for fname in filenames:`
			`if fname.split('.')[-1].lower() in extensions:`
			`yield os.path.join(root, fname)`


			`def get_audio_from_dir(dirpath):`
			`fpaths = scan_dir_audio(dirpath)`
			`return [Audio('file://' + os.path.realpath(u)) for u in fpaths]`


Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`def get_item_date(el):`
			`el_date = el.find('pubdate')`
			`if el_date is not None:`
			`return datetime.datetime.strptime(`
			`el_date.text, '%Y-%m-%dT%H:%M:%S%z')`
			`return None`


feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`def get_urls(tree):`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`items = tree.xpath('//item')`
			`for it in items:`
			`el_body = it.find('description')`
			`if el_body is not None:`
			`audio = get_audio_from_description(el_body.text)`
			`audio.date = get_item_date(it)`
			`yield audio`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00

			`def get_grouped_urls(tree):`
			`groups = OrderedDict()`
			`items = tree.xpath('//item')`
			`for item in items:`
			`guid = item.xpath('guid')[0].text.strip()`
			`if guid not in groups:`
			`groups[guid] = AudioGroup(guid)`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`audio = get_audio_from_description(item.xpath('description')[0].text)`
			`audio.date = get_item_date(item)`
			`groups[guid].append(audio)`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`return groups`


			`def get_duration(url):`
			`lineout = check_output(['ffprobe', '-v', 'error',`
			`'-show_entries', 'format=duration',`
			`'-i', url]).split(b'\n')`
			`duration = next(l for l in lineout if l.startswith(b'duration='))`
			`value = duration.split(b'=')[1]`
			`return int(float(value))`


cosmetic changes 2018-02-13 21:49:30 +01:00			`HELP = '''`
			`Collect audio informations from multiple sources (XML feeds).`
			`Audios are (in that order):`
			`1. Collected from feeds; (grouped by article if --group is used)`
			`2. Filtered; everything that does not match with requirements is excluded`
			`3. Sorted; even randomly`
			`4. Sliced; take HOWMANY elements, skipping START elements`
			`5. (if --copy) Copied`
			`Usage: '''`


feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`def get_parser():`
cosmetic changes 2018-02-13 21:49:30 +01:00			`p = ArgumentParser(HELP)`
options are organized in groups 2018-02-13 20:20:06 +01:00			`src = p.add_argument_group('sources', 'How to deal with sources')`
minor: option group for --source-weights 2018-02-13 21:34:37 +01:00			`src.add_argument('--source-weights',`
			`help='Select only one "source" based on this weights')`
cosmetic changes 2018-02-13 21:49:30 +01:00			`src.add_argument('--group', default=False, action='store_true',`
			`help='Group audios that belong to the same article')`
options are organized in groups 2018-02-13 20:20:06 +01:00
cosmetic changes 2018-02-13 21:49:30 +01:00			`filters = p.add_argument_group('filters', 'Select only items that match '`
			`'these conditions')`
options are organized in groups 2018-02-13 20:20:06 +01:00			`filters.add_argument('--max-len', default=0, type=int,`
cosmetic changes 2018-02-13 21:49:30 +01:00			`help='Exclude any audio that is longer '`
			`'than MAX_LEN seconds')`
options are organized in groups 2018-02-13 20:20:06 +01:00			`filters.add_argument('--random', default=False,`
feed --min-len 2018-01-30 21:20:06 +01:00			`action='store_true', help='Pick randomly')`
			`filters.add_argument('--min-len', default=0, type=int,`
cosmetic changes 2018-02-13 21:49:30 +01:00			`help='Exclude any audio that is shorter '`
			`'than MIN_LEN seconds')`

Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`filters.add_argument('--min-age', default=datetime.timedelta(),`
			`type=TimeDeltaType,`
			`help='Exclude audio more recent than MIN_AGE')`
			`filters.add_argument('--max-age', default=datetime.timedelta(),`
			`type=TimeDeltaType,`
			`help='Exclude audio older than MAX_AGE')`

feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`p.add_argument('--start', default=0, type=int,`
			`help='0-indexed start number. '`
			`'By default, play from most recent')`
			`p.add_argument('--howmany', default=1, type=int,`
			`help='If not specified, only 1 will be played')`
cosmetic changes 2018-02-13 21:49:30 +01:00			`p.add_argument('--slotsize', type=int,`
			`help='Seconds between each audio. Still unsupported')`

			`general = p.add_argument_group('general', 'General options')`
			`general.add_argument('--copy', help='Copy files to $TMPDIR', default=False,`
			`action='store_true')`
			`general.add_argument('--debug', help='Debug messages', default=False,`
			`action='store_true')`

feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`p.add_argument('urls', metavar='URL', nargs='+')`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`return p`

feed --copy: sanitize url 2017-05-25 19:22:00 +02:00
add --copy 2017-05-25 16:52:57 +02:00			`def put(audio, copy=False):`
			`if not copy:`
fix group 2017-06-12 23:26:36 +02:00			`for url in audio.urls:`
			`print(url)`
add --copy 2017-05-25 16:52:57 +02:00			`else:`
fix group 2017-06-12 23:26:36 +02:00			`for url in audio.urls:`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`if url.split(':')[0] in ('http', 'https'):`
			`destdir = (os.environ.get('TMPDIR', '.'))`
			`fname = posixpath.basename(urlparse(url).path)`
			`# sanitize`
			`fname = "".join(c for c in fname`
			`if c.isalnum() or c in list('._-')).rstrip()`
			`dest = os.path.join(destdir, fname)`
			`os.makedirs(destdir, exist_ok=True)`
			`fname, headers = urllib.request.urlretrieve(url, dest)`
			`print('file://%s' % os.path.realpath(fname))`
			`else:`
			`# FIXME: file:// urls are just copied`
			`print(url)`
add --copy 2017-05-25 16:52:57 +02:00
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
			`def main():`
FIX #1: --source-weights source weights are meant to first of all pick a source, with the defined weights. This option is unrelated to --random. In fact, --random will switch from chronological order to random one. --source-weights will, instead, pick only one of the sources given in command line. A typical example is for jingles: if you have a big number of jingles that should run relatively rarely, and a small number of jingles that should run often, then putting them all in the same directory isn't a good idea. You'd better put them in two different dirs and use --source-weights 1:2 2017-12-09 17:51:56 +01:00			`parser = get_parser()`
			`args = parser.parse_args()`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`if not args.debug:`
			`logging.basicConfig(level=logging.WARNING)`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`else:`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`logging.basicConfig(level=logging.DEBUG)`
FIX #1: --source-weights source weights are meant to first of all pick a source, with the defined weights. This option is unrelated to --random. In fact, --random will switch from chronological order to random one. --source-weights will, instead, pick only one of the sources given in command line. A typical example is for jingles: if you have a big number of jingles that should run relatively rarely, and a small number of jingles that should run often, then putting them all in the same directory isn't a good idea. You'd better put them in two different dirs and use --source-weights 1:2 2017-12-09 17:51:56 +01:00			`sources = args.urls`

			`if args.source_weights:`
			`weights = tuple(map(int, args.source_weights.split(':')))`
			`if len(weights) != len(sources):`
			`parser.exit(status=2, message='Weight must be in the'`
			`' same number as sources\n')`
			`sources = [weighted_choice(sources, weights)]`

feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`audios = []`
FIX #1: --source-weights source weights are meant to first of all pick a source, with the defined weights. This option is unrelated to --random. In fact, --random will switch from chronological order to random one. --source-weights will, instead, pick only one of the sources given in command line. A typical example is for jingles: if you have a big number of jingles that should run relatively rarely, and a small number of jingles that should run often, then putting them all in the same directory isn't a good idea. You'd better put them in two different dirs and use --source-weights 1:2 2017-12-09 17:51:56 +01:00			`for url in sources:`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`if url.startswith('http:') or url.startswith('https:') \`
			`or os.path.isfile(url):`
			`# download the feed`
			`tree = get_tree(url)`
			`if not args.group:`
			`# get audio urls, removing those that are too long`
feed --min-len 2018-01-30 21:20:06 +01:00			`audios += [audio for audio in get_urls(tree) if`
cosmetic changes 2018-02-13 21:49:30 +01:00			`(args.max_len == 0 or`
			`audio.duration <= args.max_len) and`
			`(args.min_len == 0 or`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`audio.duration >= args.min_len) and`
			`(args.min_age.total_seconds() == 0 or`
			`audio.age >= args.min_age) and`
			`(args.max_age.total_seconds() == 0 or`
			`audio.age <= args.max_age)`
feed --min-len 2018-01-30 21:20:06 +01:00			`]`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`else:`
			`groups = get_grouped_urls(tree)`
			`audios += [groups[g] for g in groups.keys()`
Support audio date (and filter by date) 2018-02-13 22:47:35 +01:00			`if`
			`(args.max_len == 0 or`
			`groups[g].duration <= args.max_len) and`
			`(args.min_len == 0 or`
			`groups[g].duration >= args.max_len) and`
			`(args.min_age.total_seconds() == 0 or`
			`groups[g].age >= args.min_age) and`
			`(args.max_age.total_seconds() == 0 or`
			`groups[g].age <= args.max_age)`
feed: supports directories, too Now feeds and directories can easily be combined 2017-12-09 12:51:36 +01:00			`]`
			`elif os.path.isdir(url):`
			`audiodir = get_audio_from_dir(url)`
			`if not args.group:`
			`audios += audiodir`
			`else:`
			`for a in audiodir:`
			`ag = AudioGroup(os.path.basename(a.url))`
			`ag.append(a)`
			`audios.append(ag)`
			`else:`
			logging.info('unsupported url `%s`', url)

--random 2017-06-12 23:26:43 +02:00			`audios = audios[args.start:]`
			`if args.random:`
			`random.shuffle(audios)`
			`audios = audios[:args.howmany]`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00
			`# the for loop excludes the last one`
			`# this is to support the --slotsize option`
			`if not audios:`
			`return`
			`for audio in audios[:-1]:`
			`if args.debug:`
			`print(repr(audio))`
			`else:`
add --copy 2017-05-25 16:52:57 +02:00			`put(audio, args.copy)`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`if args.slotsize is not None:`
minor: english variable names 2018-02-13 20:28:01 +01:00			`duration = audio.duration`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`if duration < args.slotsize:`
			`print('## musica per {} secondi'`
			`.format(args.slotsize - duration))`
			`# finally, the last one`
			`if args.debug:`
			`print(repr(audios[-1]))`
			`else:`
add --copy 2017-05-25 16:52:57 +02:00			`put(audios[-1], args.copy)`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`# else: # grouping; TODO: support slotsize`
			`# for item in groups:`
			`# if args.debug:`
minor: english variable names 2018-02-13 20:28:01 +01:00			`# print('#', item, groups[item].duration)`
feed: pesca audio dai feed con molte magie 2017-04-19 11:18:45 +02:00			`# print(groups[item])`


			`if __name__ == '__main__':`
			`main()`