audiogen http: download factored in fsutils
This commit is contained in:
parent
9e3c2c5194
commit
bf5eca28c3
2 changed files with 35 additions and 31 deletions
|
@ -1,31 +1,4 @@
|
||||||
import os
|
from larigira.fsutils import download_http
|
||||||
import logging
|
|
||||||
import posixpath
|
|
||||||
from tempfile import mkstemp
|
|
||||||
import urllib.request
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def put(url, destdir=None, copy=False):
|
|
||||||
if url.split(":")[0] not in ("http", "https"):
|
|
||||||
log.warning("Not a valid URL: %s", url)
|
|
||||||
return None
|
|
||||||
ext = url.split(".")[-1]
|
|
||||||
if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"):
|
|
||||||
log.warning('Invalid format (%s) for "%s"', ext, url)
|
|
||||||
return None
|
|
||||||
if not copy:
|
|
||||||
return url
|
|
||||||
fname = posixpath.basename(urlparse(url).path)
|
|
||||||
# sanitize
|
|
||||||
fname = "".join(c for c in fname if c.isalnum() or c in list("._-")).rstrip()
|
|
||||||
tmp = mkstemp(suffix="." + ext, prefix="http-%s-" % fname, dir=destdir)
|
|
||||||
os.close(tmp[0])
|
|
||||||
log.info("downloading %s -> %s", url, tmp[1])
|
|
||||||
fname, headers = urllib.request.urlretrieve(url, tmp[1])
|
|
||||||
return "file://%s" % os.path.realpath(tmp[1])
|
|
||||||
|
|
||||||
|
|
||||||
def generate(spec):
|
def generate(spec):
|
||||||
|
@ -35,10 +8,10 @@ def generate(spec):
|
||||||
Recognized argument is "paths" (list of static paths)
|
Recognized argument is "paths" (list of static paths)
|
||||||
"""
|
"""
|
||||||
if "urls" not in spec:
|
if "urls" not in spec:
|
||||||
raise ValueError("Malformed audiospec: missing 'paths'")
|
raise ValueError("Malformed audiospec: missing 'urls'")
|
||||||
|
|
||||||
for url in spec["urls"]:
|
for url in spec["urls"]:
|
||||||
ret = put(url, copy=True)
|
ret = download_http(url, copy=True, prefix="http")
|
||||||
if ret is None:
|
if ret is None:
|
||||||
continue
|
continue
|
||||||
yield ret
|
yield ret
|
||||||
|
|
|
@ -1,6 +1,13 @@
|
||||||
import os
|
|
||||||
import fnmatch
|
import fnmatch
|
||||||
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import os
|
||||||
|
import posixpath
|
||||||
|
import urllib.request
|
||||||
|
from tempfile import mkstemp
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def scan_dir(dirname, extension=None):
|
def scan_dir(dirname, extension=None):
|
||||||
|
@ -37,3 +44,27 @@ def shortname(path):
|
||||||
name = name.rsplit(".", 1)[0] # no extension
|
name = name.rsplit(".", 1)[0] # no extension
|
||||||
name = "".join(c for c in name if c.isalnum()) # no strange chars
|
name = "".join(c for c in name if c.isalnum()) # no strange chars
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def download_http(url, destdir=None, copy=False, prefix="httpdl"):
|
||||||
|
if url.split(":")[0] not in ("http", "https"):
|
||||||
|
log.warning("Not a valid URL: %s", url)
|
||||||
|
return None
|
||||||
|
ext = url.split(".")[-1]
|
||||||
|
if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"):
|
||||||
|
log.warning('Invalid format (%s) for "%s"', ext, url)
|
||||||
|
return None
|
||||||
|
if not copy:
|
||||||
|
return url
|
||||||
|
fname = posixpath.basename(urlparse(url).path)
|
||||||
|
# sanitize
|
||||||
|
fname = "".join(
|
||||||
|
c for c in fname if c.isalnum() or c in list("._-")
|
||||||
|
).rstrip()
|
||||||
|
tmp = mkstemp(
|
||||||
|
suffix="." + ext, prefix="%s-%s-" % (prefix, fname), dir=destdir
|
||||||
|
)
|
||||||
|
os.close(tmp[0])
|
||||||
|
log.info("downloading %s -> %s", url, tmp[1])
|
||||||
|
fname, headers = urllib.request.urlretrieve(url, tmp[1])
|
||||||
|
return "file://%s" % os.path.realpath(tmp[1])
|
||||||
|
|
Loading…
Reference in a new issue