|
@@ -1,176 +0,0 @@
|
|
|
-import time
|
|
|
-import os
|
|
|
-from os.path import join, exists
|
|
|
-import argparse
|
|
|
-from urllib.parse import urlparse
|
|
|
-import logging
|
|
|
-import json
|
|
|
-import multiprocessing
|
|
|
-
|
|
|
-from splinter import Browser
|
|
|
-from splinter.exceptions import ElementDoesNotExist
|
|
|
-
|
|
|
-log = logging.getLogger(__name__)
|
|
|
-
|
|
|
-
|
|
|
-def visit_track(b, url):
|
|
|
- log.debug('Visiting track %s' % url)
|
|
|
- b.visit(url)
|
|
|
- b.find_by_css('.playbutton').first.click()
|
|
|
- info = {}
|
|
|
- info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
|
|
|
- .first.value
|
|
|
- try:
|
|
|
- info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
|
|
|
- .first.value
|
|
|
- except ElementDoesNotExist: # non-album tracks
|
|
|
- pass
|
|
|
- info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
|
|
|
- time.sleep(0.2) # how nice!
|
|
|
- b.find_by_css('.playbutton').first.click()
|
|
|
- info['url'] = b.find_by_css('audio').first['src']
|
|
|
- return info
|
|
|
-
|
|
|
-
|
|
|
-def visit_album(b, url):
|
|
|
- log.debug('Visiting album %s' % url)
|
|
|
- b.visit(url)
|
|
|
- track_urls = [link['href'] for link in
|
|
|
- b.find_by_css('.track_row_view.linked .title a')
|
|
|
- if link['href'] and '/track/' in link['href']]
|
|
|
- for track_url in track_urls:
|
|
|
- yield visit_track(b, track_url)
|
|
|
-
|
|
|
-
|
|
|
-def visit_artist(b, url):
|
|
|
- log.debug('Visiting artist %s' % url)
|
|
|
- b.visit(url)
|
|
|
- album_urls = [link['href'] for link in
|
|
|
- b.find_by_css('.square a')
|
|
|
- if link['href'] and '/album/' in link['href']]
|
|
|
- nonalbumtrack_urls = [link['href'] for link in
|
|
|
- b.find_by_css('.square a')
|
|
|
- if link['href'] and '/track/' in link['href']]
|
|
|
- log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
|
|
|
- len(nonalbumtrack_urls)
|
|
|
- ))
|
|
|
- for track_url in nonalbumtrack_urls:
|
|
|
- yield visit_track(b, track_url)
|
|
|
- for album_url in album_urls:
|
|
|
- for track in visit_album(b, album_url):
|
|
|
- yield track
|
|
|
-
|
|
|
-
|
|
|
-def download_track(track):
|
|
|
- log.debug('Downloading "%s"' % track['title'])
|
|
|
- if 'album' in track:
|
|
|
- fdir = join(track['artist'], track['album']).replace(' ', '_')
|
|
|
- else:
|
|
|
- fdir = join(track['artist'])
|
|
|
- fname = track['title'].replace(' ', '_') + '.mp3'
|
|
|
- fpath = join(args.download_base, fdir, fname)
|
|
|
- os.makedirs(join(args.download_base, fdir), exist_ok=True)
|
|
|
- if exists(fpath):
|
|
|
- log.info('file %s exists, skipping' % fpath)
|
|
|
- log.debug('download in ' + fpath)
|
|
|
- resp = requests.get(track['url'], stream=True)
|
|
|
- if not resp.ok:
|
|
|
- log.error('Error downloading track ', track['title'])
|
|
|
- return
|
|
|
- with open(fpath, 'wb') as buf:
|
|
|
- log.debug('writing ' + fname)
|
|
|
- for block in resp.iter_content():
|
|
|
- buf.write(block)
|
|
|
- audio = mutagen.mp3.EasyMP3(fpath) # TODO: is it always mp3?
|
|
|
- audio['title'] = track['title']
|
|
|
- audio['artist'] = track['artist']
|
|
|
- if 'album' in track:
|
|
|
- audio['album'] = track['album']
|
|
|
- audio.save()
|
|
|
- log.info('track %(title)s downloaded' % track)
|
|
|
- return fpath
|
|
|
-
|
|
|
-
|
|
|
-def get_parser():
|
|
|
- p = argparse.ArgumentParser(
|
|
|
- description='Download tracks from bandcamp, sharing is caring!',
|
|
|
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
|
- p.add_argument('url')
|
|
|
- p.add_argument('--loglevel', default='INFO', help='Log level',
|
|
|
- choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
|
|
|
- info = p.add_argument_group('fetch track info')
|
|
|
- info.add_argument('--driver', default='firefox',
|
|
|
- help='Internal splinter option',
|
|
|
- choices=['firefox', 'chrome'])
|
|
|
- info.add_argument('--mode', default=None,
|
|
|
- choices=['album', 'track', 'artist'])
|
|
|
- do = p.add_argument_group('what to do with fetched urls')
|
|
|
- do.add_argument('--do', default='print',
|
|
|
- help='What to do with retrieved informations',
|
|
|
- choices=['print', 'wgetprint', 'download'])
|
|
|
- do.add_argument('--download-base', default='.',
|
|
|
- help='Only applies to wget')
|
|
|
- do.add_argument('-j', '--jobs', default=3, type=int,
|
|
|
- help='Parallel downloads')
|
|
|
-
|
|
|
- return p
|
|
|
-
|
|
|
-
|
|
|
-def get_mode(url):
|
|
|
- path = urlparse(url).path
|
|
|
- if path.startswith('/track/'):
|
|
|
- return 'track'
|
|
|
- if path.startswith('/album/'):
|
|
|
- return 'album'
|
|
|
- raise ValueError('unsupported url')
|
|
|
-
|
|
|
-
|
|
|
-args = get_parser().parse_args()
|
|
|
-logging.basicConfig(level=args.loglevel)
|
|
|
-logging.getLogger('selenium.webdriver').setLevel(logging.WARNING)
|
|
|
-logging.getLogger('requests').setLevel(logging.WARNING)
|
|
|
-if args.do == 'download':
|
|
|
- try:
|
|
|
- import requests
|
|
|
- import mutagen.mp3
|
|
|
- except ImportError:
|
|
|
- log.critical('--do=download requires requests and download libraries')
|
|
|
- raise
|
|
|
-
|
|
|
-if args.mode is None:
|
|
|
- args.mode = get_mode(args.url)
|
|
|
- log.info('Going in %s mode' % args.mode)
|
|
|
-
|
|
|
-prefs = {
|
|
|
- 'dom.audiochannel.mutedByDefault': True
|
|
|
-}
|
|
|
-with Browser(args.driver, profile_preferences=prefs) as br:
|
|
|
- # get track info
|
|
|
- if args.mode == 'track':
|
|
|
- tracks = visit_track(br, args.url)
|
|
|
- elif args.mode == 'album':
|
|
|
- tracks = visit_album(br, args.url)
|
|
|
- else:
|
|
|
- tracks = visit_artist(br, args.url)
|
|
|
-
|
|
|
- # do something
|
|
|
- if args.do == 'print':
|
|
|
- for track in tracks:
|
|
|
- print(json.dumps(track))
|
|
|
- elif args.do == 'wgetprint':
|
|
|
- for track in tracks:
|
|
|
- cmd = ['wget', '-nv', "'%s'" % track['url'],
|
|
|
- '-O', "'%s'" %
|
|
|
- join(args.download_base,
|
|
|
- '-'.join((track['artist'], track['album'],
|
|
|
- track['title']))).replace(' ', '_')
|
|
|
- ]
|
|
|
- print(' '.join(cmd))
|
|
|
- elif args.do == 'download':
|
|
|
- pool = multiprocessing.Pool(processes=args.jobs)
|
|
|
- for res in pool.imap_unordered(download_track, tracks):
|
|
|
- print('R', res)
|
|
|
- pool.close()
|
|
|
- pool.join()
|
|
|
- else:
|
|
|
- raise NotImplementedError('action ' + args.do)
|