12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- import fnmatch
- import logging
- import mimetypes
- import os
- import posixpath
- import urllib.request
- from tempfile import mkstemp
- from urllib.parse import urlparse
- log = logging.getLogger(__name__)
- def scan_dir(dirname, extension=None):
- if extension is None:
- extension = "*"
- for root, dirnames, filenames in os.walk(dirname):
- for fname in fnmatch.filter(filenames, extension):
- yield os.path.join(root, fname)
- def multi_fnmatch(fname, extensions):
- for ext in extensions:
- if fnmatch.fnmatch(fname, "*." + ext):
- return True
- return False
- def is_audio(fname):
- mimetype = mimetypes.guess_type(fname)[0]
- if mimetype is None:
- return False
- return mimetype.split("/")[0] == "audio"
- def scan_dir_audio(dirname, extensions=("mp3", "oga", "wav", "ogg")):
- for root, dirnames, filenames in os.walk(dirname):
- for fname in filenames:
- if is_audio(fname):
- yield os.path.join(root, fname)
- def shortname(path):
- name = os.path.basename(path) # filename
- name = name.rsplit(".", 1)[0] # no extension
- name = "".join(c for c in name if c.isalnum()) # no strange chars
- return name
- def download_http(url, destdir=None, copy=False, prefix="httpdl"):
- if url.split(":")[0] not in ("http", "https"):
- log.warning("Not a valid URL: %s", url)
- return None
- ext = url.split(".")[-1]
- if ext.lower() not in ("mp3", "ogg", "oga", "wma", "m4a"):
- log.warning('Invalid format (%s) for "%s"', ext, url)
- return None
- if not copy:
- return url
- fname = posixpath.basename(urlparse(url).path)
- # sanitize
- fname = "".join(
- c for c in fname if c.isalnum() or c in list("._-")
- ).rstrip()
- tmp = mkstemp(
- suffix="." + ext, prefix="%s-%s-" % (prefix, fname), dir=destdir
- )
- os.close(tmp[0])
- log.info("downloading %s -> %s", url, tmp[1])
- fname, headers = urllib.request.urlretrieve(url, tmp[1])
- return "file://%s" % os.path.realpath(tmp[1])
|