Browse Source

FIX: invalid URLs do not block the script

before that, a single invalid URL in the feed would have blocked all of
it. In fact, when there is no duration, and ffprobe fails, everything
was failing. This commit make it resilient to errors.
boyska 6 years ago
parent
commit
ce11addbcd
1 changed files with 17 additions and 6 deletions
  1. 17 6
      feed

+ 17 - 6
feed

@@ -9,7 +9,7 @@ Beside feeds, it supports picking files from directories
 import os
 import logging
 from argparse import ArgumentParser, ArgumentTypeError
-from subprocess import check_output
+from subprocess import check_output, CalledProcessError
 from collections import OrderedDict
 import re
 import urllib.request
@@ -163,7 +163,9 @@ def get_audio_from_description(text):
     url = lines[0]
     duration = None
     if len(lines) > 1:
-        duration = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
+        parts = lines[1].split('=')
+        if len(parts) > 1 and parts[1]:
+            duration = int(re.findall(r'\d+', parts[1].strip())[0])
     return Audio(unquote(url), duration)
 
 
@@ -191,9 +193,15 @@ def get_item_date(el):
 def get_urls(tree):
     items = tree.xpath('//item')
     for it in items:
+        title = it.find('title').text
         el_body = it.find('description')
         if el_body is not None:
-            audio = get_audio_from_description(el_body.text)
+            url = el_body.text
+            try:
+                audio = get_audio_from_description(url)
+            except Exception as exc:
+                logging.info('error getting duration for `%s`' % title)
+                continue
             audio.date = get_item_date(it)
             yield audio
 
@@ -212,9 +220,12 @@ def get_grouped_urls(tree):
 
 
 def get_duration(url):
-    lineout = check_output(['ffprobe', '-v', 'error',
-                            '-show_entries', 'format=duration',
-                            '-i', url]).split(b'\n')
+    try:
+        lineout = check_output(['ffprobe', '-v', 'error',
+                                '-show_entries', 'format=duration',
+                                '-i', url]).split(b'\n')
+    except CalledProcessError as exc:
+        raise ValueError('error probing `%s`' % url) from exc
     duration = next(l for l in lineout if l.startswith(b'duration='))
     value = duration.split(b'=')[1]
     return int(float(value))