import time import os from os.path import join, exists import argparse from urllib.parse import urlparse import logging import json import multiprocessing from splinter import Browser log = logging.getLogger(__name__) def visit_track(b, url): log.debug('Visiting track %s' % url) b.visit(url) b.find_by_css('.playbutton').first.click() artist = b.find_by_xpath('//span[@itemprop="byArtist"]').first.value album = b.find_by_xpath('//span[@itemprop="inAlbum"]').first.value title = b.find_by_css('#name-section h2.trackTitle').first.value time.sleep(0.2) # how nice! b.find_by_css('.playbutton').first.click() return dict( artist=artist, album=album, title=title, url=b.find_by_css('audio').first['src'] ) def visit_album(b, url): log.debug('Visiting album %s' % url) b.visit(url) track_urls = [link['href'] for link in b.find_by_css('.track_row_view.linked .title a') if link['href'] and '/track/' in link['href']] for track_url in track_urls: yield visit_track(b, track_url) def download_track(track): log.debug('Downloading "%s"' % track['title']) fdir = join(track['artist'], track['album']).replace(' ', '_') fname = track['title'].replace(' ', '_') + '.mp3' fpath = join(args.download_base, fdir, fname) os.makedirs(join(args.download_base, fdir), exist_ok=True) if exists(fpath): log.info('file %s exists, skipping' % fpath) log.debug('download in ' + fpath) resp = requests.get(track['url'], stream=True) if not resp.ok: log.error('Error downloading track ', track['title']) return with open(fpath, 'wb') as buf: log.debug('writing ' + fname) for block in resp.iter_content(): buf.write(block) audio = mutagen.mp3.EasyMP3(fpath) # TODO: is it always mp3? audio['title'] = track['title'] audio['artist'] = track['artist'] audio['album'] = track['album'] audio.save() log.info('track %(title)s downloaded' % track) return fpath def get_parser(): p = argparse.ArgumentParser( description='Download tracks from bandcamp, sharing is caring!', formatter_class=argparse.ArgumentDefaultsHelpFormatter) p.add_argument('url') p.add_argument('--loglevel', default='INFO', help='Log level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR']) info = p.add_argument_group('fetch track info') info.add_argument('--driver', default='firefox', help='Internal splinter option', choices=['firefox', 'chrome']) info.add_argument('--mode', default=None, choices=['album', 'track', 'artist']) do = p.add_argument_group('what to do with fetched urls') do.add_argument('--do', default='print', help='What to do with retrieved informations', choices=['print', 'wgetprint', 'download']) do.add_argument('--download-base', default='.', help='Only applies to wget') do.add_argument('-j', '--jobs', default=3, type=int, help='Parallel downloads') return p def get_mode(url): path = urlparse(url).path if path.startswith('/track/'): return 'track' if path.startswith('/album/'): return 'album' raise ValueError('unsupported url') args = get_parser().parse_args() logging.basicConfig(level=args.loglevel) logging.getLogger('selenium.webdriver').setLevel(logging.WARNING) logging.getLogger('requests').setLevel(logging.WARNING) if args.do == 'download': try: import requests import mutagen.mp3 except ImportError: log.critical('--do=download requires requests and download libraries') raise if args.mode is None: args.mode = get_mode(args.url) log.info('Going in %s mode' % args.mode) prefs = { 'dom.audiochannel.mutedByDefault': True } with Browser(args.driver, profile_preferences=prefs) as br: # get track info if args.mode == 'track': tracks = visit_track(br, args.url) elif args.mode == 'album': tracks = visit_album(br, args.url) else: raise NotImplementedError('info retrieval ' + args.mode) # do something if args.do == 'print': for track in tracks: print(json.dumps(track)) elif args.do == 'wgetprint': for track in tracks: cmd = ['wget', '-nv', "'%s'" % track['url'], '-O', "'%s'" % join(args.download_base, '-'.join((track['artist'], track['album'], track['title']))).replace(' ', '_') ] print(' '.join(cmd)) elif args.do == 'download': pool = multiprocessing.Pool(processes=args.jobs) for res in pool.imap_unordered(download_track, tracks): print('R', res) pool.close() pool.join() else: raise NotImplementedError('action ' + args.do)