#!/usr/bin/env python3 """ Feed parser with many features from a feed, it supports filtering, subslicing, random picking Beside feeds, it supports picking files from directories """ import datetime import logging import os import glob import posixpath import random import re import sys import urllib.request from argparse import ArgumentParser, ArgumentTypeError from bisect import bisect from collections import OrderedDict from subprocess import CalledProcessError, check_output from urllib.parse import unquote, urlparse import requests from lxml import html from pytimeparse.timeparse import timeparse def debug(*args, **kwargs): kwargs.setdefault("file", sys.stderr) print(*args, **kwargs) def get_int(s): return int(re.findall(r"\d+", s)[0]) def DurationType(arg): if arg.isdecimal(): secs = int(arg) else: secs = timeparse(arg) if secs is None: raise ArgumentTypeError("%r is not a valid duration" % arg) return secs def TimeDeltaType(arg): if arg.isdecimal(): secs = int(arg) else: secs = timeparse(arg) if secs is None: raise ArgumentTypeError("%r is not a valid time range" % arg) return datetime.timedelta(seconds=secs) def weighted_choice(values, weights): """ random.choice with weights weights must be integers greater than 0. Their meaning is "relative", that is [1,2,3] is the same as [2,4,6] """ assert len(values) == len(weights) if not values: raise IndexError("Cannot do weighted choice from an empty sequence") if sum(weights) == 0: raise IndexError("Cannot do weighted choice where weight=0") total = 0 cum_weights = [] for w in weights: total += w cum_weights.append(total) x = random.random() * total i = bisect(cum_weights, x) return values[i] def delta_humanreadable(tdelta): if tdelta is None: return "" days = tdelta.days hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600 if days: return "{}d{}h".format(days, hours) return "{}h".format(hours) def duration_humanreadable(seconds): hours = seconds // 3600 minutes = (seconds - hours * 3600) // 60 seconds = seconds % 60 if hours > 0: return "{}h{}m{}s".format(hours, minutes, seconds) return "{}m{}s".format(minutes, seconds) class Audio(object): def __init__(self, url, duration=None, date=None): self.url = url self._duration = duration self.date = date self.end_date = datetime.datetime(9999, 12, 31, tzinfo=datetime.timezone.utc) def __str__(self): return self.url def __repr__(self): return "