audiogen http: download factored in fsutils

This commit is contained in:
boyska 2020-06-21 12:56:53 +02:00
parent 9e3c2c5194
commit bf5eca28c3
2 changed files with 35 additions and 31 deletions

View file

@ -1,31 +1,4 @@
import os
import logging
import posixpath
from tempfile import mkstemp
import urllib.request
from urllib.parse import urlparse
log = logging.getLogger(__name__)
def put(url, destdir=None, copy=False):
if url.split(":")[0] not in ("http", "https"):
log.warning("Not a valid URL: %s", url)
return None
ext = url.split(".")[-1]
if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"):
log.warning('Invalid format (%s) for "%s"', ext, url)
return None
if not copy:
return url
fname = posixpath.basename(urlparse(url).path)
# sanitize
fname = "".join(c for c in fname if c.isalnum() or c in list("._-")).rstrip()
tmp = mkstemp(suffix="." + ext, prefix="http-%s-" % fname, dir=destdir)
os.close(tmp[0])
log.info("downloading %s -> %s", url, tmp[1])
fname, headers = urllib.request.urlretrieve(url, tmp[1])
return "file://%s" % os.path.realpath(tmp[1])
from larigira.fsutils import download_http
def generate(spec):
@ -35,10 +8,10 @@ def generate(spec):
Recognized argument is "paths" (list of static paths)
"""
if "urls" not in spec:
raise ValueError("Malformed audiospec: missing 'paths'")
raise ValueError("Malformed audiospec: missing 'urls'")
for url in spec["urls"]:
ret = put(url, copy=True)
ret = download_http(url, copy=True, prefix="http")
if ret is None:
continue
yield ret

View file

@ -1,6 +1,13 @@
import os
import fnmatch
import logging
import mimetypes
import os
import posixpath
import urllib.request
from tempfile import mkstemp
from urllib.parse import urlparse
log = logging.getLogger(__name__)
def scan_dir(dirname, extension=None):
@ -37,3 +44,27 @@ def shortname(path):
name = name.rsplit(".", 1)[0] # no extension
name = "".join(c for c in name if c.isalnum()) # no strange chars
return name
def download_http(url, destdir=None, copy=False, prefix="httpdl"):
if url.split(":")[0] not in ("http", "https"):
log.warning("Not a valid URL: %s", url)
return None
ext = url.split(".")[-1]
if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"):
log.warning('Invalid format (%s) for "%s"', ext, url)
return None
if not copy:
return url
fname = posixpath.basename(urlparse(url).path)
# sanitize
fname = "".join(
c for c in fname if c.isalnum() or c in list("._-")
).rstrip()
tmp = mkstemp(
suffix="." + ext, prefix="%s-%s-" % (prefix, fname), dir=destdir
)
os.close(tmp[0])
log.info("downloading %s -> %s", url, tmp[1])
fname, headers = urllib.request.urlretrieve(url, tmp[1])
return "file://%s" % os.path.realpath(tmp[1])