#!/usr/bin/env python3 ''' Feed parser with many features from a feed, it supports filtering, subslicing, random picking Beside feeds, it supports picking files from directories ''' import os import logging from argparse import ArgumentParser, ArgumentTypeError from subprocess import check_output from collections import OrderedDict import re import urllib.request from urllib.parse import urlparse, unquote import posixpath import random from bisect import bisect import datetime from lxml import html import requests from pytimeparse.timeparse import timeparse def DurationType(arg): if arg.isdecimal(): secs = int(arg) else: secs = timeparse(arg) if secs is None: raise ArgumentTypeError('%r is not a valid duration' % arg) return secs def TimeDeltaType(arg): if arg.isdecimal(): secs = int(arg) else: secs = timeparse(arg) if secs is None: raise ArgumentTypeError('%r is not a valid time range' % arg) return datetime.timedelta(seconds=secs) def weighted_choice(values, weights): ''' random.choice with weights weights must be integers greater than 0. Their meaning is "relative", that is [1,2,3] is the same as [2,4,6] ''' assert len(values) == len(weights) total = 0 cum_weights = [] for w in weights: total += w cum_weights.append(total) x = random.random() * total i = bisect(cum_weights, x) return values[i] def delta_humanreadable(tdelta): if tdelta is None: return '' days = tdelta.days hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600 if days: return '{}d{}h'.format(days, hours) return '{}h'.format(hours) class Audio(object): def __init__(self, url, duration=None, date=None): self.url = url if duration is None: duration = get_duration(url.encode('utf-8')) self.duration = duration self.date = date def __str__(self): return self.url def __repr__(self): return '