Compare commits

...

2 commits

Author SHA1 Message Date
c0cc90a2f3 very new files are not removed
this makes download_http more reliable when reusing a file that could otherwise have been removed by UnusedCleaner
2022-01-03 00:53:18 +01:00
b35da0a8d0 download_http doesn't download again
refs #17
2022-01-02 20:16:55 +01:00
2 changed files with 62 additions and 4 deletions

View file

@ -6,6 +6,10 @@ import posixpath
import urllib.request import urllib.request
from tempfile import mkstemp from tempfile import mkstemp
from urllib.parse import urlparse from urllib.parse import urlparse
from pathlib import Path
import hashlib
import requests
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -46,6 +50,14 @@ def shortname(path):
return name return name
def http_expected_length(url):
resp = requests.head(url, allow_redirects=True)
resp.raise_for_status()
header_value = resp.headers.get('content-length')
expected_length = int(header_value)
return expected_length
def download_http(url, destdir=None, copy=False, prefix="httpdl"): def download_http(url, destdir=None, copy=False, prefix="httpdl"):
if url.split(":")[0] not in ("http", "https"): if url.split(":")[0] not in ("http", "https"):
log.warning("Not a valid URL: %s", url) log.warning("Not a valid URL: %s", url)
@ -56,15 +68,43 @@ def download_http(url, destdir=None, copy=False, prefix="httpdl"):
return None return None
if not copy: if not copy:
return url return url
if destdir is None:
destdir = os.getenv('TMPDIR', '/tmp/')
fname = posixpath.basename(urlparse(url).path) fname = posixpath.basename(urlparse(url).path)
# sanitize # sanitize
fname = "".join( fname = "".join(
c for c in fname if c.isalnum() or c in list("._-") c for c in fname if c.isalnum() or c in list("_-")
).rstrip() ).rstrip()
url_hash = hashlib.sha1(url.encode('utf8')).hexdigest()
final_path = Path(destdir) / ('%s-%s-%s.%s' % (prefix, fname[:20], url_hash, ext))
# it might be already fully downloaded, let's check
if final_path.exists():
# this "touch" helps avoiding a race condition in which the
# UnusedCleaner could delete this
final_path.touch()
actual_size = final_path.stat().st_size
try:
expected_size = http_expected_length(url)
except Exception as exc:
log.debug("Could not determine expected length for %s: %s", url, exc)
else:
if expected_size == actual_size:
log.debug("File %s already present and complete, download not needed", final_path)
return final_path.as_uri()
else:
log.debug("File %s is already present, but has the wrong length: %d but expected %d", final_path, actual_size, expected_size)
else:
log.debug("File %s does not exist", final_path)
tmp = mkstemp( tmp = mkstemp(
suffix="." + ext, prefix="%s-%s-" % (prefix, fname), dir=destdir suffix="." + ext, prefix="%s-%s-%s-" % (prefix, fname, url_hash), dir=destdir
) )
os.close(tmp[0]) os.close(tmp[0])
log.info("downloading %s -> %s", url, tmp[1]) log.info("downloading %s -> %s -> %s", url, tmp[1], final_path)
fname, headers = urllib.request.urlretrieve(url, tmp[1]) fname, headers = urllib.request.urlretrieve(url, tmp[1])
return "file://%s" % os.path.realpath(tmp[1]) Path(fname).rename(final_path)
return final_path.as_uri()
# "file://%s" % os.path.realpath(final_path)

View file

@ -7,6 +7,8 @@ This component will look for files to be removed. There are some assumptions:
import logging import logging
import os import os
from os.path import normpath from os.path import normpath
from pathlib import Path
import time
import mpd import mpd
@ -30,6 +32,10 @@ except ImportError:
class UnusedCleaner: class UnusedCleaner:
# ONLY_DELETE_OLDER_THAN is expressed in seconds.
# It configures the maximum age a file can have before being removed.
# Set it to "None" if you want to disable this feature.
ONLY_DELETE_OLDER_THAN = 30
def __init__(self, conf): def __init__(self, conf):
self.conf = conf self.conf = conf
self.waiting_removal_files = set() self.waiting_removal_files = set()
@ -69,7 +75,19 @@ class UnusedCleaner:
for song in mpdc.playlistid() for song in mpdc.playlistid()
if song["file"].startswith("/") if song["file"].startswith("/")
} }
now = time.time()
for fpath in self.waiting_removal_files - files_in_playlist: for fpath in self.waiting_removal_files - files_in_playlist:
# audio files are sometimes reused, as in download_http. To avoid
# referencing a file that UnusedCleaner is going to remove, users
# are invited to touch the file, so that UnusedCleaner doesn't
# consider it for removal. While this doesn't conceptually solve
# the race condition, it should now be extremely rare.
if ONLY_DELETE_OLDER_THAN is not None:
mtime = Path(fpath).stat().st_mtime
if now - mtime < ONLY_DELETE_OLDER_THAN:
continue
# we can remove it! # we can remove it!
self.log.debug("removing unused: %s", fpath) self.log.debug("removing unused: %s", fpath)
self.waiting_removal_files.remove(fpath) self.waiting_removal_files.remove(fpath)