#!/usr/bin/env python3 """ Feed parser with many features from a feed, it supports filtering, subslicing, random picking Beside feeds, it supports picking files from directories """ import datetime import logging import os import posixpath import random import re import urllib.request from argparse import ArgumentParser, ArgumentTypeError from bisect import bisect from collections import OrderedDict from subprocess import CalledProcessError, check_output from urllib.parse import unquote, urlparse import requests from lxml import html from pytimeparse.timeparse import timeparse def get_int(s): return int(re.findall(r"\d+", s)[0]) def DurationType(arg): if arg.isdecimal(): secs = int(arg) else: secs = timeparse(arg) if secs is None: raise ArgumentTypeError("%r is not a valid duration" % arg) return secs def TimeDeltaType(arg): if arg.isdecimal(): secs = int(arg) else: secs = timeparse(arg) if secs is None: raise ArgumentTypeError("%r is not a valid time range" % arg) return datetime.timedelta(seconds=secs) def weighted_choice(values, weights): """ random.choice with weights weights must be integers greater than 0. Their meaning is "relative", that is [1,2,3] is the same as [2,4,6] """ assert len(values) == len(weights) total = 0 cum_weights = [] for w in weights: total += w cum_weights.append(total) x = random.random() * total i = bisect(cum_weights, x) return values[i] def delta_humanreadable(tdelta): if tdelta is None: return "" days = tdelta.days hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600 if days: return "{}d{}h".format(days, hours) return "{}h".format(hours) def duration_humanreadable(seconds): hours = seconds // 3600 minutes = (seconds - hours * 3600) // 60 seconds = seconds % 60 if hours > 0: return "{}h{}m{}s".format(hours, minutes, seconds) return "{}m{}s".format(minutes, seconds) class Audio(object): def __init__(self, url, duration=None, date=None): self.url = url if duration is None: duration = get_duration(url.encode("utf-8")) self.duration = duration self.date = date self.end_date = datetime.datetime(9999, 12, 31, tzinfo=datetime.timezone.utc) def __str__(self): return self.url def __repr__(self): return "