FIX exception reporting causing another exception

podcast: ignores broken items
2024-05-30 10:06:26 +02:00 · 2022-05-06 13:31:07 +02:00
4 changed files with 15 additions and 68 deletions
--- a/larigira/audiogen_podcast.py
+++ b/larigira/audiogen_podcast.py
@ -142,16 +142,21 @@ def get_audio_from_item(item):
 def get_urls(tree):
    items = tree.xpath("//item")
-    for it in items:
+    for i, it in enumerate(items):
-        # title = it.find("title").text
+        try:
-        audio = get_audio_from_item(it)
+            audio = get_audio_from_item(it)
        except Exception:
            logging.error("Could not parse item #%d, skipping", i)
            continue
        if audio is None:
            continue
        if audio.date is None:
-            audio.date = get_item_date(it)
+            try:
                audio.date = get_item_date(it)
            except Exception:
                logging.warn("Could not find date for item #%d", i)
        yield audio
 def parse_duration(arg):
    if arg.isdecimal():
        secs = int(arg)
--- a/larigira/filters/basic.py
+++ b/larigira/filters/basic.py
@ -68,7 +68,7 @@ def percentwait(songs, context, conf, getdur=get_duration):
        # must be an error! mutagen support is not always perfect
        return (
            True,
-            ("mutagen could not calculate length of %s" % ",".songs["uris"]),
+            ("mutagen could not calculate length of %s" % ",".join(songs["uris"])),
        )
    wait = eventduration * (percentwait / 100.0)
    if remaining > wait:
--- a/larigira/fsutils.py
+++ b/larigira/fsutils.py
@ -6,10 +6,6 @@ import posixpath
 import urllib.request
 from tempfile import mkstemp
 from urllib.parse import urlparse
 from pathlib import Path
 import hashlib
 import requests
 log = logging.getLogger(__name__)
@ -50,14 +46,6 @@ def shortname(path):
    return name
 def http_expected_length(url):
    resp = requests.head(url, allow_redirects=True)
    resp.raise_for_status()
    header_value = resp.headers.get('content-length')
    expected_length = int(header_value)
    return expected_length
 def download_http(url, destdir=None, copy=False, prefix="httpdl"):
    if url.split(":")[0] not in ("http", "https"):
        log.warning("Not a valid URL: %s", url)
@ -68,43 +56,15 @@ def download_http(url, destdir=None, copy=False, prefix="httpdl"):
        return None
    if not copy:
        return url
    if destdir is None:
        destdir = os.getenv('TMPDIR', '/tmp/')
    fname = posixpath.basename(urlparse(url).path)
    # sanitize
    fname = "".join(
-        c for c in fname if c.isalnum() or c in list("_-")
+        c for c in fname if c.isalnum() or c in list("._-")
    ).rstrip()
    url_hash = hashlib.sha1(url.encode('utf8')).hexdigest()
    final_path = Path(destdir) / ('%s-%s-%s.%s' % (prefix, fname[:20], url_hash, ext))
    # it might be already fully downloaded, let's check
    if final_path.exists():
        # this "touch" helps avoiding a race condition in which the
        # UnusedCleaner could delete  this
        final_path.touch()
        actual_size = final_path.stat().st_size
        try:
            expected_size = http_expected_length(url)
        except Exception as exc:
            log.debug("Could not determine expected length for %s: %s", url, exc)
        else:
            if expected_size == actual_size:
                log.debug("File %s already present and complete, download not needed", final_path)
                return final_path.as_uri()
            else:
                log.debug("File %s is already present, but has the wrong length: %d but expected %d", final_path, actual_size, expected_size)
    else:
        log.debug("File %s does not exist", final_path)
    tmp = mkstemp(
-        suffix="." + ext, prefix="%s-%s-%s-" % (prefix, fname, url_hash), dir=destdir
+        suffix="." + ext, prefix="%s-%s-" % (prefix, fname), dir=destdir
    )
    os.close(tmp[0])
-    log.info("downloading %s -> %s -> %s", url, tmp[1], final_path)
+    log.info("downloading %s -> %s", url, tmp[1])
    fname, headers = urllib.request.urlretrieve(url, tmp[1])
-    Path(fname).rename(final_path)
+    return "file://%s" % os.path.realpath(tmp[1])
    return final_path.as_uri()
 # "file://%s" % os.path.realpath(final_path)
--- a/larigira/unused.py
+++ b/larigira/unused.py
@ -7,8 +7,6 @@ This component will look for files to be removed. There are some assumptions:
 import logging
 import os
 from os.path import normpath
 from pathlib import Path
 import time
 import mpd
@ -32,10 +30,6 @@ except ImportError:
 class UnusedCleaner:
    # ONLY_DELETE_OLDER_THAN is expressed in seconds.
    # It configures the maximum age a file can have before being removed.
    # Set it to "None" if you want to disable this feature.
    ONLY_DELETE_OLDER_THAN = 30
    def __init__(self, conf):
        self.conf = conf
        self.waiting_removal_files = set()
@ -75,19 +69,7 @@ class UnusedCleaner:
            for song in mpdc.playlistid()
            if song["file"].startswith("/")
        }
        now = time.time()
        for fpath in self.waiting_removal_files - files_in_playlist:
            # audio files are sometimes reused, as in download_http. To avoid
            # referencing a file that UnusedCleaner is going to remove, users
            # are invited to touch the file, so that UnusedCleaner doesn't
            # consider it for removal. While this doesn't conceptually solve
            # the race condition, it should now be extremely rare.
            if ONLY_DELETE_OLDER_THAN is not None:
                mtime = Path(fpath).stat().st_mtime
                if now - mtime < ONLY_DELETE_OLDER_THAN:
                    continue
            # we can remove it!
            self.log.debug("removing unused: %s", fpath)
            self.waiting_removal_files.remove(fpath)
Author	SHA1	Message	Date
boyska	dfc59e94f9	FIX exception reporting causing another exception	2024-05-30 10:06:26 +02:00
boyska	3e609581cf	podcast: ignores broken items	2022-05-06 13:31:07 +02:00