|
@@ -255,17 +255,23 @@ def get_audio_from_dir(dirpath):
|
|
|
|
|
|
def get_item_date(el):
|
|
|
el_date = el.find("pubdate")
|
|
|
+ # Wed, 15 Jan 2020 22:45:33 +0000
|
|
|
+ formats = ["%a, %d %b %Y %H:%M:%S %z", "%Y-%m-%dT%H:%M:%S%z"]
|
|
|
if el_date is not None:
|
|
|
- return datetime.datetime.strptime(el_date.text, "%Y-%m-%dT%H:%M:%S%z")
|
|
|
+ for fmt in formats:
|
|
|
+ try:
|
|
|
+ return datetime.datetime.strptime(el_date.text, fmt)
|
|
|
+ except ValueError:
|
|
|
+ pass
|
|
|
return None
|
|
|
|
|
|
|
|
|
-def get_urls(tree):
|
|
|
+def get_urls_generic(tree, url_selector="description[text()]", metadata_in_body=True):
|
|
|
items = tree.xpath("//item")
|
|
|
for it in items:
|
|
|
title = it.find("title").text
|
|
|
el_body = it.find("description")
|
|
|
- if el_body is not None:
|
|
|
+ if metadata_in_body and el_body is not None:
|
|
|
url = el_body.text
|
|
|
try:
|
|
|
audio = get_audio_from_description(url)
|
|
@@ -275,6 +281,27 @@ def get_urls(tree):
|
|
|
if audio.date is None:
|
|
|
audio.date = get_item_date(it)
|
|
|
yield audio
|
|
|
+ else:
|
|
|
+ url = it.xpath(url_selector)[0]
|
|
|
+ audio = Audio(url)
|
|
|
+ audio.date = get_item_date(it)
|
|
|
+ yield audio
|
|
|
+
|
|
|
+
|
|
|
+def get_urls_from_podcast(tree):
|
|
|
+ return get_urls_generic(tree, url_selector="enclosure/@url", metadata_in_body=False)
|
|
|
+
|
|
|
+
|
|
|
+def get_urls_from_custom_feed(tree):
|
|
|
+ return get_urls_generic(tree, metadata_in_body=True)
|
|
|
+
|
|
|
+
|
|
|
+def get_urls_factory(url, args):
|
|
|
+ if args.feed_type == "customrss":
|
|
|
+ return get_urls_from_custom_feed
|
|
|
+ if args.feed_type == "podcast":
|
|
|
+ return get_urls_from_podcast
|
|
|
+ raise ValueError("unsupported feeedtype %s" % args.feed_type)
|
|
|
|
|
|
|
|
|
def get_grouped_urls(tree):
|
|
@@ -316,6 +343,10 @@ Usage: """
|
|
|
|
|
|
def get_parser():
|
|
|
p = ArgumentParser(HELP)
|
|
|
+ parsing = p.add_argument_group("parsing", "Feed parsing")
|
|
|
+ parsing.add_argument(
|
|
|
+ "--feed-type", type=str, choices=["customrss", "podcast"], default="customrss"
|
|
|
+ )
|
|
|
src = p.add_argument_group("sources", "How to deal with sources")
|
|
|
src.add_argument(
|
|
|
"--source-weights", help='Select only one "source" based on this weights'
|
|
@@ -455,7 +486,9 @@ def retrieve(url, args):
|
|
|
audiodir = get_audio_from_dir(url)
|
|
|
return audiodir
|
|
|
elif url.startswith("http:") or url.startswith("https:") or os.path.isfile(url):
|
|
|
- return get_urls(get_tree(url))
|
|
|
+ getter = get_urls_factory(url, args)
|
|
|
+ tree = get_tree(url)
|
|
|
+ return getter(tree)
|
|
|
else:
|
|
|
logging.info("unsupported url `%s`", url)
|
|
|
return []
|