Browse Source

serious packaging

boyska 7 years ago
parent
commit
9e77197ec4
6 changed files with 225 additions and 176 deletions
  1. 0 0
      bandcavall/__init__.py
  2. 35 0
      bandcavall/download.py
  3. 98 0
      bandcavall/main.py
  4. 54 0
      bandcavall/visit.py
  5. 0 176
      fetchcose.py
  6. 38 0
      setup.py

+ 0 - 0
bandcavall/__init__.py


+ 35 - 0
bandcavall/download.py

@@ -0,0 +1,35 @@
+import os
+from os.path import join, exists
+import requests
+import mutagen.mp3
+import logging
+log = logging.getLogger(__name__)
+
+
+def download_track(track, args):
+    log.debug('Downloading "%s"' % track['title'])
+    if 'album' in track:
+        fdir = join(track['artist'], track['album']).replace(' ', '_')
+    else:
+        fdir = join(track['artist'])
+    fname = track['title'].replace(' ', '_') + '.mp3'
+    fpath = join(args.download_base, fdir, fname)
+    os.makedirs(join(args.download_base, fdir), exist_ok=True)
+    if exists(fpath):
+        log.info('file %s exists, skipping' % fpath)
+    log.debug('download in ' + fpath)
+    resp = requests.get(track['url'], stream=True)
+    if not resp.ok:
+        log.error('Error downloading track ', track['title'])
+        return
+    with open(fpath, 'wb') as buf:
+        log.debug('writing ' + fname)
+        for block in resp.iter_content():
+            buf.write(block)
+    audio = mutagen.mp3.EasyMP3(fpath)  # TODO: is it always mp3?
+    audio['title'] = track['title']
+    audio['artist'] = track['artist']
+    if 'album' in track:
+        audio['album'] = track['album']
+    audio.save()
+    log.info('track %(title)s downloaded' % track)

+ 98 - 0
bandcavall/main.py

@@ -0,0 +1,98 @@
+from os.path import join
+import argparse
+from urllib.parse import urlparse
+import logging
+import json
+import multiprocessing
+import functools
+
+from splinter import Browser
+
+from .download import download_track
+from .visit import visit_track, visit_album, visit_artist
+
+log = logging.getLogger(__name__)
+
+
+def get_parser():
+    p = argparse.ArgumentParser(
+        description='Download tracks from bandcamp, sharing is caring!',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    p.add_argument('url')
+    p.add_argument('--loglevel', default='INFO', help='Log level',
+                   choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
+    info = p.add_argument_group('fetch track info')
+    info.add_argument('--driver', default='firefox',
+                      help='Internal splinter option',
+                      choices=['firefox', 'chrome'])
+    info.add_argument('--mode', default=None,
+                      choices=['album', 'track', 'artist'])
+    do = p.add_argument_group('what to do with fetched urls')
+    do.add_argument('--do', default='print',
+                    help='What to do with retrieved informations',
+                    choices=['print', 'wgetprint', 'download'])
+    do.add_argument('--download-base', default='.',
+                    help='Only applies to wget')
+    do.add_argument('-j', '--jobs', default=3, type=int,
+                    help='Parallel downloads')
+
+    return p
+
+
+def get_mode(url):
+    path = urlparse(url).path
+    if path.startswith('/track/'):
+        return 'track'
+    if path.startswith('/album/'):
+        return 'album'
+    raise ValueError('unsupported url: "{}"'.format(path))
+
+
+def main():
+    args = get_parser().parse_args()
+    logging.basicConfig(level=args.loglevel)
+    logging.getLogger('selenium.webdriver').setLevel(logging.WARNING)
+    logging.getLogger('requests').setLevel(logging.WARNING)
+
+    if args.mode is None:
+        args.mode = get_mode(args.url)
+        log.info('Going in %s mode' % args.mode)
+
+    prefs = {
+        'dom.audiochannel.mutedByDefault': True
+    }
+    with Browser(args.driver, profile_preferences=prefs) as br:
+        # get track info
+        if args.mode == 'track':
+            tracks = visit_track(br, args.url)
+        elif args.mode == 'album':
+            tracks = visit_album(br, args.url)
+        else:
+            tracks = visit_artist(br, args.url)
+
+        # do something
+        if args.do == 'print':
+            for track in tracks:
+                print(json.dumps(track))
+        elif args.do == 'wgetprint':
+            for track in tracks:
+                cmd = ['wget', '-nv', "'%s'" % track['url'],
+                       '-O', "'%s'" %
+                       join(args.download_base,
+                            '-'.join((track['artist'], track['album'],
+                                      track['title']))).replace(' ', '_')
+                       ]
+                print(' '.join(cmd))
+        elif args.do == 'download':
+            pool = multiprocessing.Pool(processes=args.jobs)
+            for res in pool.imap_unordered(functools.partial(download_track,
+                                                             args=args),
+                                           tracks):
+                print('R', res)
+            pool.close()
+            pool.join()
+        else:
+            raise NotImplementedError('action ' + args.do)
+
+if __name__ == '__main__':
+    main()

+ 54 - 0
bandcavall/visit.py

@@ -0,0 +1,54 @@
+import time
+import logging
+
+from splinter.exceptions import ElementDoesNotExist
+
+log = logging.getLogger(__name__)
+
+
+def visit_track(b, url):
+    log.debug('Visiting track %s' % url)
+    b.visit(url)
+    b.find_by_css('.playbutton').first.click()
+    info = {}
+    info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
+        .first.value
+    try:
+        info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
+            .first.value
+    except ElementDoesNotExist:  # non-album tracks
+        pass
+    info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
+    time.sleep(0.2)  # how nice!
+    b.find_by_css('.playbutton').first.click()
+    info['url'] = b.find_by_css('audio').first['src']
+    return info
+
+
+def visit_album(b, url):
+    log.debug('Visiting album %s' % url)
+    b.visit(url)
+    track_urls = [link['href'] for link in
+                  b.find_by_css('.track_row_view.linked .title a')
+                  if link['href'] and '/track/' in link['href']]
+    for track_url in track_urls:
+        yield visit_track(b, track_url)
+
+
+def visit_artist(b, url):
+    log.debug('Visiting artist %s' % url)
+    b.visit(url)
+    album_urls = [link['href'] for link in
+                  b.find_by_css('.square a')
+                  if link['href'] and '/album/' in link['href']]
+    nonalbumtrack_urls = [link['href'] for link in
+                          b.find_by_css('.square a')
+                          if link['href'] and '/track/' in link['href']]
+    log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
+                                                        len(nonalbumtrack_urls)
+                                                        ))
+    for track_url in nonalbumtrack_urls:
+        yield visit_track(b, track_url)
+    for album_url in album_urls:
+        for track in visit_album(b, album_url):
+            yield track

+ 0 - 176
fetchcose.py

@@ -1,176 +0,0 @@
-import time
-import os
-from os.path import join, exists
-import argparse
-from urllib.parse import urlparse
-import logging
-import json
-import multiprocessing
-
-from splinter import Browser
-from splinter.exceptions import ElementDoesNotExist
-
-log = logging.getLogger(__name__)
-
-
-def visit_track(b, url):
-    log.debug('Visiting track %s' % url)
-    b.visit(url)
-    b.find_by_css('.playbutton').first.click()
-    info = {}
-    info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
-        .first.value
-    try:
-        info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
-            .first.value
-    except ElementDoesNotExist:  # non-album tracks
-        pass
-    info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
-    time.sleep(0.2)  # how nice!
-    b.find_by_css('.playbutton').first.click()
-    info['url'] = b.find_by_css('audio').first['src']
-    return info
-
-
-def visit_album(b, url):
-    log.debug('Visiting album %s' % url)
-    b.visit(url)
-    track_urls = [link['href'] for link in
-                  b.find_by_css('.track_row_view.linked .title a')
-                  if link['href'] and '/track/' in link['href']]
-    for track_url in track_urls:
-        yield visit_track(b, track_url)
-
-
-def visit_artist(b, url):
-    log.debug('Visiting artist %s' % url)
-    b.visit(url)
-    album_urls = [link['href'] for link in
-                  b.find_by_css('.square a')
-                  if link['href'] and '/album/' in link['href']]
-    nonalbumtrack_urls = [link['href'] for link in
-                          b.find_by_css('.square a')
-                          if link['href'] and '/track/' in link['href']]
-    log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
-                                                        len(nonalbumtrack_urls)
-                                                        ))
-    for track_url in nonalbumtrack_urls:
-        yield visit_track(b, track_url)
-    for album_url in album_urls:
-        for track in visit_album(b, album_url):
-            yield track
-
-
-def download_track(track):
-    log.debug('Downloading "%s"' % track['title'])
-    if 'album' in track:
-        fdir = join(track['artist'], track['album']).replace(' ', '_')
-    else:
-        fdir = join(track['artist'])
-    fname = track['title'].replace(' ', '_') + '.mp3'
-    fpath = join(args.download_base, fdir, fname)
-    os.makedirs(join(args.download_base, fdir), exist_ok=True)
-    if exists(fpath):
-        log.info('file %s exists, skipping' % fpath)
-    log.debug('download in ' + fpath)
-    resp = requests.get(track['url'], stream=True)
-    if not resp.ok:
-        log.error('Error downloading track ', track['title'])
-        return
-    with open(fpath, 'wb') as buf:
-        log.debug('writing ' + fname)
-        for block in resp.iter_content():
-            buf.write(block)
-    audio = mutagen.mp3.EasyMP3(fpath)  # TODO: is it always mp3?
-    audio['title'] = track['title']
-    audio['artist'] = track['artist']
-    if 'album' in track:
-        audio['album'] = track['album']
-    audio.save()
-    log.info('track %(title)s downloaded' % track)
-    return fpath
-
-
-def get_parser():
-    p = argparse.ArgumentParser(
-        description='Download tracks from bandcamp, sharing is caring!',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    p.add_argument('url')
-    p.add_argument('--loglevel', default='INFO', help='Log level',
-                   choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
-    info = p.add_argument_group('fetch track info')
-    info.add_argument('--driver', default='firefox',
-                      help='Internal splinter option',
-                      choices=['firefox', 'chrome'])
-    info.add_argument('--mode', default=None,
-                      choices=['album', 'track', 'artist'])
-    do = p.add_argument_group('what to do with fetched urls')
-    do.add_argument('--do', default='print',
-                    help='What to do with retrieved informations',
-                    choices=['print', 'wgetprint', 'download'])
-    do.add_argument('--download-base', default='.',
-                    help='Only applies to wget')
-    do.add_argument('-j', '--jobs', default=3, type=int,
-                    help='Parallel downloads')
-
-    return p
-
-
-def get_mode(url):
-    path = urlparse(url).path
-    if path.startswith('/track/'):
-        return 'track'
-    if path.startswith('/album/'):
-        return 'album'
-    raise ValueError('unsupported url')
-
-
-args = get_parser().parse_args()
-logging.basicConfig(level=args.loglevel)
-logging.getLogger('selenium.webdriver').setLevel(logging.WARNING)
-logging.getLogger('requests').setLevel(logging.WARNING)
-if args.do == 'download':
-    try:
-        import requests
-        import mutagen.mp3
-    except ImportError:
-        log.critical('--do=download requires requests and download libraries')
-        raise
-
-if args.mode is None:
-    args.mode = get_mode(args.url)
-    log.info('Going in %s mode' % args.mode)
-
-prefs = {
-    'dom.audiochannel.mutedByDefault': True
-}
-with Browser(args.driver, profile_preferences=prefs) as br:
-    # get track info
-    if args.mode == 'track':
-        tracks = visit_track(br, args.url)
-    elif args.mode == 'album':
-        tracks = visit_album(br, args.url)
-    else:
-        tracks = visit_artist(br, args.url)
-
-    # do something
-    if args.do == 'print':
-        for track in tracks:
-            print(json.dumps(track))
-    elif args.do == 'wgetprint':
-        for track in tracks:
-            cmd = ['wget', '-nv', "'%s'" % track['url'],
-                   '-O', "'%s'" %
-                   join(args.download_base,
-                        '-'.join((track['artist'], track['album'],
-                                  track['title']))).replace(' ', '_')
-                   ]
-            print(' '.join(cmd))
-    elif args.do == 'download':
-        pool = multiprocessing.Pool(processes=args.jobs)
-        for res in pool.imap_unordered(download_track, tracks):
-            print('R', res)
-        pool.close()
-        pool.join()
-    else:
-        raise NotImplementedError('action ' + args.do)

+ 38 - 0
setup.py

@@ -0,0 +1,38 @@
+import os
+
+from setuptools import setup
+
+
+def read(fname):
+    with open(os.path.join(os.path.dirname(__file__), fname)) as buf:
+        return buf.read()
+
+
+conf = dict(
+        name='bandcavall',
+        version='0.1',
+        description='Download music from bandcamp',
+        long_description=read('README.md'),
+        author='boyska',
+        author_email='piuttosto@logorroici.org',
+        url='https://git.lattuga.net/boyska/bandcavall',
+        license='AGPL',
+        packages=['bandcavall'],
+        install_requires=[
+            'splinter',
+            'requests',
+            'mutagen',
+        ],
+        zip_safe=False,
+        entry_points={'console_scripts': [
+            'bandcavall=bandcavall.main:main',
+        ]},
+        classifiers=[
+          "License :: OSI Approved :: GNU Affero General Public License v3",
+          "Operating System :: POSIX :: Linux",
+          "Programming Language :: Python :: 3",
+        ])
+
+
+if __name__ == '__main__':
+    setup(**conf)