|
@@ -9,6 +9,7 @@ Beside feeds, it supports picking files from directories
|
|
|
import datetime
|
|
|
import logging
|
|
|
import os
|
|
|
+import glob
|
|
|
import posixpath
|
|
|
import random
|
|
|
import re
|
|
@@ -228,14 +229,28 @@ def get_audio_from_description(text):
|
|
|
return a
|
|
|
|
|
|
|
|
|
+def is_audio_file(fpath, extensions=("mp3", "oga", "wav", "ogg")):
|
|
|
+ if fpath.split(".")[-1].lower() in extensions:
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
# copied from larigira.fsutils
|
|
|
-def scan_dir_audio(dirname, extensions=("mp3", "oga", "wav", "ogg")):
|
|
|
+def scan_dir_audio(dirname):
|
|
|
for root, dirnames, filenames in os.walk(dirname):
|
|
|
for fname in filenames:
|
|
|
- if fname.split(".")[-1].lower() in extensions:
|
|
|
- yield os.path.join(root, fname)
|
|
|
+ if is_audio_file(fname):
|
|
|
+ path = os.path.join(root, fname)
|
|
|
+ yield path
|
|
|
|
|
|
|
|
|
+def get_audio_from_file(fpath):
|
|
|
+ a = Audio(
|
|
|
+ "file://" + os.path.realpath(fpath),
|
|
|
+ date=datetime.datetime.fromtimestamp(os.path.getmtime(fpath)).replace(
|
|
|
+ tzinfo=datetime.timezone.utc
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ return [a]
|
|
|
def get_audio_from_dir(dirpath):
|
|
|
fpaths = scan_dir_audio(dirpath)
|
|
|
ret = []
|
|
@@ -357,6 +372,12 @@ def get_parser():
|
|
|
action="store_true",
|
|
|
help="Group audios that belong to the same article",
|
|
|
)
|
|
|
+ src.add_argument(
|
|
|
+ "--glob",
|
|
|
+ default=False,
|
|
|
+ action="store_true",
|
|
|
+ help="Wildcards in filenames are interpreted",
|
|
|
+ )
|
|
|
|
|
|
filters = p.add_argument_group(
|
|
|
"filters", "Select only items that match " "these conditions"
|
|
@@ -487,6 +508,8 @@ def retrieve(url, args):
|
|
|
if os.path.isdir(url):
|
|
|
audiodir = get_audio_from_dir(url)
|
|
|
return audiodir
|
|
|
+ elif os.path.isfile(url) and is_audio_file(url):
|
|
|
+ return get_audio_from_file(url)
|
|
|
elif url.startswith("http:") or url.startswith("https:") or os.path.isfile(url):
|
|
|
getter = get_urls_factory(url, args)
|
|
|
tree = get_tree(url)
|
|
@@ -503,6 +526,11 @@ def retrieve(url, args):
|
|
|
ag.append(a)
|
|
|
agroups.append(ag)
|
|
|
return agroups
|
|
|
+ elif os.path.isfile(url) and is_audio_file(url):
|
|
|
+ audio = get_audio_from_file(url)[0]
|
|
|
+ ag = AudioGroup(url)
|
|
|
+ ag.append(audio)
|
|
|
+ return [ag]
|
|
|
elif url.startswith("http:") or url.startswith("https:") or os.path.isfile(url):
|
|
|
groups = get_grouped_urls(get_tree(url))
|
|
|
return groups.values()
|
|
@@ -528,6 +556,26 @@ def audio_passes_filters(audio, args):
|
|
|
return True
|
|
|
|
|
|
|
|
|
+def expand_glob(sources: list, weights: list) -> tuple:
|
|
|
+ '''
|
|
|
+ Let's say that sources=["foo", "bar*"] and weight=["2", "3"] and on filesystem there are bar1 and bar2.
|
|
|
+
|
|
|
+ Result: ["foo", "bar1", "bar2"], ["2", "3", "3"]
|
|
|
+ '''
|
|
|
+ new_sources = []
|
|
|
+ new_weights = []
|
|
|
+
|
|
|
+ for src, weight in zip(sources, weights):
|
|
|
+ expanded_source = glob.glob(src)
|
|
|
+ expanded_weight = [weight] * len(expanded_source)
|
|
|
+
|
|
|
+ new_sources += expanded_source
|
|
|
+ new_weights += expanded_weight
|
|
|
+
|
|
|
+
|
|
|
+ return new_sources, new_weights
|
|
|
+
|
|
|
+
|
|
|
def get_audio_by_source(args, parser):
|
|
|
sources = args.urls
|
|
|
if args.source_weights:
|
|
@@ -540,6 +588,9 @@ def get_audio_by_source(args, parser):
|
|
|
else:
|
|
|
weights = [1] * len(sources)
|
|
|
|
|
|
+ if args.glob:
|
|
|
+ sources, weights = expand_glob(sources, weights)
|
|
|
+
|
|
|
audio_by_source = OrderedDict()
|
|
|
for i, url in enumerate(sources):
|
|
|
url_audios = list(retrieve(url, args))
|