|
@@ -1,6 +1,13 @@
|
|
|
#!/usr/bin/env python3
|
|
|
-# pip install lxml requests
|
|
|
+'''
|
|
|
+Feed parser with many features
|
|
|
+
|
|
|
+from a feed, it supports filtering, subslicing, random picking
|
|
|
+
|
|
|
+Beside feeds, it supports picking files from directories
|
|
|
+'''
|
|
|
import os
|
|
|
+import logging
|
|
|
from argparse import ArgumentParser
|
|
|
from subprocess import check_output
|
|
|
from collections import OrderedDict
|
|
@@ -47,9 +54,9 @@ class AudioGroup(list):
|
|
|
return '\n'.join(str(a) for a in self.audios)
|
|
|
|
|
|
def __repr__(self):
|
|
|
- return '<AudioGroup "{}" ({})\n{}>'.format(self.description,
|
|
|
- self.durata,
|
|
|
- '\n'.join(repr(a) for a in self.audios))
|
|
|
+ return '<AudioGroup "{}" ({})\n{} >'.\
|
|
|
+ format(self.description, self.durata,
|
|
|
+ '\n'.join(' ' + repr(a) for a in self.audios))
|
|
|
|
|
|
@property
|
|
|
def durata(self):
|
|
@@ -82,6 +89,19 @@ def get_audio_from_description(text):
|
|
|
return Audio(unquote(url), durata)
|
|
|
|
|
|
|
|
|
+# copied from larigira.fsutils
|
|
|
+def scan_dir_audio(dirname, extensions=('mp3', 'oga', 'wav', 'ogg')):
|
|
|
+ for root, dirnames, filenames in os.walk(dirname):
|
|
|
+ for fname in filenames:
|
|
|
+ if fname.split('.')[-1].lower() in extensions:
|
|
|
+ yield os.path.join(root, fname)
|
|
|
+
|
|
|
+
|
|
|
+def get_audio_from_dir(dirpath):
|
|
|
+ fpaths = scan_dir_audio(dirpath)
|
|
|
+ return [Audio('file://' + os.path.realpath(u)) for u in fpaths]
|
|
|
+
|
|
|
+
|
|
|
def get_urls(tree):
|
|
|
urls = tree.xpath('//item/description')
|
|
|
for url_elem in urls:
|
|
@@ -127,7 +147,7 @@ def get_parser():
|
|
|
action='store_true')
|
|
|
p.add_argument('--debug', help='Debug messages', default=False,
|
|
|
action='store_true')
|
|
|
- p.add_argument('url')
|
|
|
+ p.add_argument('urls', metavar='URL', nargs='+')
|
|
|
return p
|
|
|
|
|
|
|
|
@@ -137,32 +157,56 @@ def put(audio, copy=False):
|
|
|
print(url)
|
|
|
else:
|
|
|
for url in audio.urls:
|
|
|
- destdir = (os.environ.get('TMPDIR', '.'))
|
|
|
- fname = posixpath.basename(urlparse(url).path)
|
|
|
- # sanitize
|
|
|
- fname = "".join(c for c in fname
|
|
|
- if c.isalnum() or c in list('._-')).rstrip()
|
|
|
- dest = os.path.join(destdir, fname)
|
|
|
- os.makedirs(destdir, exist_ok=True)
|
|
|
- fname, headers = urllib.request.urlretrieve(url, dest)
|
|
|
- print('file://%s' % os.path.realpath(fname))
|
|
|
+ if url.split(':')[0] in ('http', 'https'):
|
|
|
+ destdir = (os.environ.get('TMPDIR', '.'))
|
|
|
+ fname = posixpath.basename(urlparse(url).path)
|
|
|
+ # sanitize
|
|
|
+ fname = "".join(c for c in fname
|
|
|
+ if c.isalnum() or c in list('._-')).rstrip()
|
|
|
+ dest = os.path.join(destdir, fname)
|
|
|
+ os.makedirs(destdir, exist_ok=True)
|
|
|
+ fname, headers = urllib.request.urlretrieve(url, dest)
|
|
|
+ print('file://%s' % os.path.realpath(fname))
|
|
|
+ else:
|
|
|
+ # FIXME: file:// urls are just copied
|
|
|
+ print(url)
|
|
|
|
|
|
|
|
|
def main():
|
|
|
args = get_parser().parse_args()
|
|
|
- # download the feed
|
|
|
- tree = get_tree(args.url)
|
|
|
- if not args.group:
|
|
|
- # get audio urls, removing those that are too long
|
|
|
- audios = [audio for audio in get_urls(tree)
|
|
|
- if args.max_len == 0 or
|
|
|
- audio.durata <= args.max_len]
|
|
|
+ if not args.debug:
|
|
|
+ logging.basicConfig(level=logging.WARNING)
|
|
|
else:
|
|
|
- groups = get_grouped_urls(tree)
|
|
|
- audios = [groups[g] for g in groups.keys()
|
|
|
- if args.max_len == 0 or
|
|
|
- groups[g].durata <= args.max_len
|
|
|
- ]
|
|
|
+ logging.basicConfig(level=logging.DEBUG)
|
|
|
+ audios = []
|
|
|
+ for url in args.urls:
|
|
|
+ if url.startswith('http:') or url.startswith('https:') \
|
|
|
+ or os.path.isfile(url):
|
|
|
+ # download the feed
|
|
|
+ tree = get_tree(url)
|
|
|
+ if not args.group:
|
|
|
+ # get audio urls, removing those that are too long
|
|
|
+ audios += [audio for audio in get_urls(tree)
|
|
|
+ if args.max_len == 0 or
|
|
|
+ audio.durata <= args.max_len]
|
|
|
+ else:
|
|
|
+ groups = get_grouped_urls(tree)
|
|
|
+ audios += [groups[g] for g in groups.keys()
|
|
|
+ if args.max_len == 0 or
|
|
|
+ groups[g].durata <= args.max_len
|
|
|
+ ]
|
|
|
+ elif os.path.isdir(url):
|
|
|
+ audiodir = get_audio_from_dir(url)
|
|
|
+ if not args.group:
|
|
|
+ audios += audiodir
|
|
|
+ else:
|
|
|
+ for a in audiodir:
|
|
|
+ ag = AudioGroup(os.path.basename(a.url))
|
|
|
+ ag.append(a)
|
|
|
+ audios.append(ag)
|
|
|
+ else:
|
|
|
+ logging.info('unsupported url `%s`', url)
|
|
|
+
|
|
|
audios = audios[args.start:]
|
|
|
if args.random:
|
|
|
random.shuffle(audios)
|