Browse Source

feed --copy: sanitize url

boyska 7 years ago
parent
commit
1033871b60
1 changed files with 9 additions and 2 deletions
  1. 9 2
      feed

+ 9 - 2
feed

@@ -6,6 +6,8 @@ from subprocess import check_output
 from collections import OrderedDict
 import re
 import urllib.request
+from urllib.parse import urlparse, unquote
+import posixpath
 
 from lxml import html
 import requests
@@ -68,7 +70,7 @@ def get_audio_from_description(text):
     durata = None
     if len(lines) > 1:
         durata = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
-    return Audio(url, durata)
+    return Audio(unquote(url), durata)
 
 
 def get_urls(tree):
@@ -117,12 +119,17 @@ def get_parser():
     p.add_argument('url')
     return p
 
+
 def put(audio, copy=False):
     if not copy:
         print(audio.url)
     else:
         destdir = (os.environ.get('TMPDIR', '.'))
-        dest = os.path.join(destdir, audio.url.split('/')[-1])
+        fname = posixpath.basename(urlparse(audio.url).path)
+        # sanitize
+        fname = "".join(c for c in fname
+                        if c.isalnum() or c in list('._-')).rstrip()
+        dest = os.path.join(destdir, fname)
         os.makedirs(destdir, exist_ok=True)
         fname, headers = urllib.request.urlretrieve(audio.url, dest)
         print(fname)