Browse Source

Support audio date (and filter by date)

boyska 6 years ago
parent
commit
0740fb5f84
1 changed files with 89 additions and 12 deletions
  1. 89 12
      feed

+ 89 - 12
feed

@@ -8,7 +8,7 @@ Beside feeds, it supports picking files from directories
 '''
 import os
 import logging
-from argparse import ArgumentParser
+from argparse import ArgumentParser, ArgumentTypeError
 from subprocess import check_output
 from collections import OrderedDict
 import re
@@ -17,9 +17,18 @@ from urllib.parse import urlparse, unquote
 import posixpath
 import random
 from bisect import bisect
+import datetime
 
 from lxml import html
 import requests
+from pytimeparse.timeparse import timeparse
+
+
+def TimeDeltaType(arg):
+    secs = timeparse(arg)
+    if secs is None:
+        raise ArgumentTypeError('%r is not a valid time range' % arg)
+    return datetime.timedelta(seconds=timeparse(arg))
 
 
 def weighted_choice(values, weights):
@@ -41,23 +50,43 @@ def weighted_choice(values, weights):
     return values[i]
 
 
+def delta_humanreadable(tdelta):
+    if tdelta is None:
+        return ''
+    days = tdelta.days
+    hours = (tdelta - datetime.timedelta(days=days)).seconds // 3600
+    if days:
+        return '{}d{}h'.format(days, hours)
+    return '{}h'.format(hours)
+
+
 class Audio(object):
-    def __init__(self, url, duration=None):
+    def __init__(self, url, duration=None, date=None):
         self.url = url
         if duration is None:
             duration = get_duration(url.encode('utf-8'))
         self.duration = duration
+        self.date = date
 
     def __str__(self):
         return self.url
 
     def __repr__(self):
-        return '<Audio {} ({})>'.format(self.url, self.duration)
+        return '<Audio {} ({} {})>'.format(self.url, self.duration,
+                                           delta_humanreadable(self.age))
 
     @property
     def urls(self):
         return [self.url]
 
+    @property
+    def age(self):
+        if self.date is None:
+            return None
+        now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
+
+        return now - self.date
+
 
 class AudioGroup(list):
     def __init__(self, description=None):
@@ -74,8 +103,9 @@ class AudioGroup(list):
         return '\n'.join(str(a) for a in self.audios)
 
     def __repr__(self):
-        return '<AudioGroup "{}" ({})\n{} >'.\
+        return '<AudioGroup "{}" ({} {})\n{} >'.\
                 format(self.description, self.duration,
+                       delta_humanreadable(self.age),
                        '\n'.join('   ' + repr(a) for a in self.audios))
 
     @property
@@ -86,6 +116,22 @@ class AudioGroup(list):
     def urls(self):
         return [a.url for a in self.audios]
 
+    @property
+    def date(self):
+        for a in self.audios:
+            if hasattr(a, 'date'):
+                return a.date
+        return None
+
+    @property
+    def age(self):
+        if self.date is None:
+            return None
+        now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
+
+        return now - self.date
+
+
 
 def get_tree(feed_url):
     if feed_url.startswith('http:') or feed_url.startswith('https:'):
@@ -122,10 +168,22 @@ def get_audio_from_dir(dirpath):
     return [Audio('file://' + os.path.realpath(u)) for u in fpaths]
 
 
+def get_item_date(el):
+    el_date = el.find('pubdate')
+    if el_date is not None:
+        return datetime.datetime.strptime(
+            el_date.text, '%Y-%m-%dT%H:%M:%S%z')
+    return None
+
+
 def get_urls(tree):
-    urls = tree.xpath('//item/description')
-    for url_elem in urls:
-        yield get_audio_from_description(url_elem.text)
+    items = tree.xpath('//item')
+    for it in items:
+        el_body = it.find('description')
+        if el_body is not None:
+            audio = get_audio_from_description(el_body.text)
+            audio.date = get_item_date(it)
+            yield audio
 
 
 def get_grouped_urls(tree):
@@ -135,8 +193,9 @@ def get_grouped_urls(tree):
         guid = item.xpath('guid')[0].text.strip()
         if guid not in groups:
             groups[guid] = AudioGroup(guid)
-        groups[guid].append(get_audio_from_description(
-            item.xpath('description')[0].text))
+        audio = get_audio_from_description(item.xpath('description')[0].text)
+        audio.date = get_item_date(item)
+        groups[guid].append(audio)
     return groups
 
 
@@ -179,6 +238,13 @@ def get_parser():
                          help='Exclude any audio that is shorter '
                          'than MIN_LEN seconds')
 
+    filters.add_argument('--min-age', default=datetime.timedelta(),
+                         type=TimeDeltaType,
+                         help='Exclude audio more recent than MIN_AGE')
+    filters.add_argument('--max-age', default=datetime.timedelta(),
+                         type=TimeDeltaType,
+                         help='Exclude audio older than MAX_AGE')
+
     p.add_argument('--start', default=0, type=int,
                    help='0-indexed start number. '
                    'By default, play from most recent')
@@ -246,13 +312,24 @@ def main():
                            (args.max_len == 0 or
                             audio.duration <= args.max_len) and
                            (args.min_len == 0 or
-                            audio.duration >= args.min_len)
+                            audio.duration >= args.min_len) and
+                           (args.min_age.total_seconds() == 0 or
+                            audio.age >= args.min_age) and
+                           (args.max_age.total_seconds() == 0 or
+                            audio.age <= args.max_age)
                            ]
             else:
                 groups = get_grouped_urls(tree)
                 audios += [groups[g] for g in groups.keys()
-                           if args.max_len == 0 or
-                           groups[g].duration <= args.max_len
+                           if
+                           (args.max_len == 0 or
+                            groups[g].duration <= args.max_len) and
+                           (args.min_len == 0 or
+                            groups[g].duration >= args.max_len) and
+                           (args.min_age.total_seconds() == 0 or
+                            groups[g].age >= args.min_age) and
+                           (args.max_age.total_seconds() == 0 or
+                            groups[g].age <= args.max_age)
                            ]
         elif os.path.isdir(url):
             audiodir = get_audio_from_dir(url)