Browse Source

feed: supports directories, too

Now feeds and directories can easily be combined
boyska 6 years ago
parent
commit
a8f59b75a3
1 changed files with 70 additions and 26 deletions
  1. 70 26
      feed

+ 70 - 26
feed

@@ -1,6 +1,13 @@
 #!/usr/bin/env python3
-# pip install lxml requests
+'''
+Feed parser with many features
+
+from a feed, it supports filtering, subslicing, random picking
+
+Beside feeds, it supports picking files from directories
+'''
 import os
+import logging
 from argparse import ArgumentParser
 from subprocess import check_output
 from collections import OrderedDict
@@ -47,9 +54,9 @@ class AudioGroup(list):
         return '\n'.join(str(a) for a in self.audios)
 
     def __repr__(self):
-        return '<AudioGroup "{}" ({})\n{}>'.format(self.description,
-                                                   self.durata,
-                                                   '\n'.join(repr(a) for a in self.audios))
+        return '<AudioGroup "{}" ({})\n{} >'.\
+                format(self.description, self.durata,
+                       '\n'.join('   ' + repr(a) for a in self.audios))
 
     @property
     def durata(self):
@@ -82,6 +89,19 @@ def get_audio_from_description(text):
     return Audio(unquote(url), durata)
 
 
+# copied from larigira.fsutils
+def scan_dir_audio(dirname, extensions=('mp3', 'oga', 'wav', 'ogg')):
+    for root, dirnames, filenames in os.walk(dirname):
+        for fname in filenames:
+            if fname.split('.')[-1].lower() in extensions:
+                yield os.path.join(root, fname)
+
+
+def get_audio_from_dir(dirpath):
+    fpaths = scan_dir_audio(dirpath)
+    return [Audio('file://' + os.path.realpath(u)) for u in fpaths]
+
+
 def get_urls(tree):
     urls = tree.xpath('//item/description')
     for url_elem in urls:
@@ -127,7 +147,7 @@ def get_parser():
                    action='store_true')
     p.add_argument('--debug', help='Debug messages', default=False,
                    action='store_true')
-    p.add_argument('url')
+    p.add_argument('urls', metavar='URL', nargs='+')
     return p
 
 
@@ -137,32 +157,56 @@ def put(audio, copy=False):
             print(url)
     else:
         for url in audio.urls:
-            destdir = (os.environ.get('TMPDIR', '.'))
-            fname = posixpath.basename(urlparse(url).path)
-            # sanitize
-            fname = "".join(c for c in fname
-                            if c.isalnum() or c in list('._-')).rstrip()
-            dest = os.path.join(destdir, fname)
-            os.makedirs(destdir, exist_ok=True)
-            fname, headers = urllib.request.urlretrieve(url, dest)
-            print('file://%s' % os.path.realpath(fname))
+            if url.split(':')[0] in ('http', 'https'):
+                destdir = (os.environ.get('TMPDIR', '.'))
+                fname = posixpath.basename(urlparse(url).path)
+                # sanitize
+                fname = "".join(c for c in fname
+                                if c.isalnum() or c in list('._-')).rstrip()
+                dest = os.path.join(destdir, fname)
+                os.makedirs(destdir, exist_ok=True)
+                fname, headers = urllib.request.urlretrieve(url, dest)
+                print('file://%s' % os.path.realpath(fname))
+            else:
+                # FIXME: file:// urls are just copied
+                print(url)
 
 
 def main():
     args = get_parser().parse_args()
-    # download the feed
-    tree = get_tree(args.url)
-    if not args.group:
-        # get audio urls, removing those that are too long
-        audios = [audio for audio in get_urls(tree)
-                  if args.max_len == 0 or
-                  audio.durata <= args.max_len]
+    if not args.debug:
+        logging.basicConfig(level=logging.WARNING)
     else:
-        groups = get_grouped_urls(tree)
-        audios = [groups[g] for g in groups.keys()
-                  if args.max_len == 0 or
-                  groups[g].durata <= args.max_len
-                  ]
+        logging.basicConfig(level=logging.DEBUG)
+    audios = []
+    for url in args.urls:
+        if url.startswith('http:') or url.startswith('https:') \
+           or os.path.isfile(url):
+            # download the feed
+            tree = get_tree(url)
+            if not args.group:
+                # get audio urls, removing those that are too long
+                audios += [audio for audio in get_urls(tree)
+                           if args.max_len == 0 or
+                           audio.durata <= args.max_len]
+            else:
+                groups = get_grouped_urls(tree)
+                audios += [groups[g] for g in groups.keys()
+                           if args.max_len == 0 or
+                           groups[g].durata <= args.max_len
+                           ]
+        elif os.path.isdir(url):
+            audiodir = get_audio_from_dir(url)
+            if not args.group:
+                audios += audiodir
+            else:
+                for a in audiodir:
+                    ag = AudioGroup(os.path.basename(a.url))
+                    ag.append(a)
+                    audios.append(ag)
+        else:
+            logging.info('unsupported url `%s`', url)
+
     audios = audios[args.start:]
     if args.random:
         random.shuffle(audios)