Browse Source

feedtype: supports standard podcast, too

boyska 3 years ago
parent
commit
7af0cfbe18
1 changed files with 37 additions and 4 deletions
  1. 37 4
      feed

+ 37 - 4
feed

@@ -255,17 +255,23 @@ def get_audio_from_dir(dirpath):
 
 def get_item_date(el):
     el_date = el.find("pubdate")
+    # Wed, 15 Jan 2020 22:45:33 +0000
+    formats = ["%a, %d %b %Y %H:%M:%S %z", "%Y-%m-%dT%H:%M:%S%z"]
     if el_date is not None:
-        return datetime.datetime.strptime(el_date.text, "%Y-%m-%dT%H:%M:%S%z")
+        for fmt in formats:
+            try:
+                return datetime.datetime.strptime(el_date.text, fmt)
+            except ValueError:
+                pass
     return None
 
 
-def get_urls(tree):
+def get_urls_generic(tree, url_selector="description[text()]", metadata_in_body=True):
     items = tree.xpath("//item")
     for it in items:
         title = it.find("title").text
         el_body = it.find("description")
-        if el_body is not None:
+        if metadata_in_body and el_body is not None:
             url = el_body.text
             try:
                 audio = get_audio_from_description(url)
@@ -275,6 +281,27 @@ def get_urls(tree):
             if audio.date is None:
                 audio.date = get_item_date(it)
             yield audio
+        else:
+            url = it.xpath(url_selector)[0]
+            audio = Audio(url)
+            audio.date = get_item_date(it)
+            yield audio
+
+
+def get_urls_from_podcast(tree):
+    return get_urls_generic(tree, url_selector="enclosure/@url", metadata_in_body=False)
+
+
+def get_urls_from_custom_feed(tree):
+    return get_urls_generic(tree, metadata_in_body=True)
+
+
+def get_urls_factory(url, args):
+    if args.feed_type == "customrss":
+        return get_urls_from_custom_feed
+    if args.feed_type == "podcast":
+        return get_urls_from_podcast
+    raise ValueError("unsupported feeedtype %s" % args.feed_type)
 
 
 def get_grouped_urls(tree):
@@ -316,6 +343,10 @@ Usage: """
 
 def get_parser():
     p = ArgumentParser(HELP)
+    parsing = p.add_argument_group("parsing", "Feed parsing")
+    parsing.add_argument(
+        "--feed-type", type=str, choices=["customrss", "podcast"], default="customrss"
+    )
     src = p.add_argument_group("sources", "How to deal with sources")
     src.add_argument(
         "--source-weights", help='Select only one "source" based on this weights'
@@ -455,7 +486,9 @@ def retrieve(url, args):
             audiodir = get_audio_from_dir(url)
             return audiodir
         elif url.startswith("http:") or url.startswith("https:") or os.path.isfile(url):
-            return get_urls(get_tree(url))
+            getter = get_urls_factory(url, args)
+            tree = get_tree(url)
+            return getter(tree)
         else:
             logging.info("unsupported url `%s`", url)
             return []