serious packaging

This commit is contained in:
boyska 2016-08-07 12:47:31 +02:00
parent fc0c73b0dd
commit 9e77197ec4
6 changed files with 225 additions and 176 deletions

0
bandcavall/__init__.py Normal file
View file

35
bandcavall/download.py Normal file
View file

@ -0,0 +1,35 @@
import os
from os.path import join, exists
import requests
import mutagen.mp3
import logging
log = logging.getLogger(__name__)
def download_track(track, args):
log.debug('Downloading "%s"' % track['title'])
if 'album' in track:
fdir = join(track['artist'], track['album']).replace(' ', '_')
else:
fdir = join(track['artist'])
fname = track['title'].replace(' ', '_') + '.mp3'
fpath = join(args.download_base, fdir, fname)
os.makedirs(join(args.download_base, fdir), exist_ok=True)
if exists(fpath):
log.info('file %s exists, skipping' % fpath)
log.debug('download in ' + fpath)
resp = requests.get(track['url'], stream=True)
if not resp.ok:
log.error('Error downloading track ', track['title'])
return
with open(fpath, 'wb') as buf:
log.debug('writing ' + fname)
for block in resp.iter_content():
buf.write(block)
audio = mutagen.mp3.EasyMP3(fpath) # TODO: is it always mp3?
audio['title'] = track['title']
audio['artist'] = track['artist']
if 'album' in track:
audio['album'] = track['album']
audio.save()
log.info('track %(title)s downloaded' % track)

98
bandcavall/main.py Normal file
View file

@ -0,0 +1,98 @@
from os.path import join
import argparse
from urllib.parse import urlparse
import logging
import json
import multiprocessing
import functools
from splinter import Browser
from .download import download_track
from .visit import visit_track, visit_album, visit_artist
log = logging.getLogger(__name__)
def get_parser():
p = argparse.ArgumentParser(
description='Download tracks from bandcamp, sharing is caring!',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
p.add_argument('url')
p.add_argument('--loglevel', default='INFO', help='Log level',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
info = p.add_argument_group('fetch track info')
info.add_argument('--driver', default='firefox',
help='Internal splinter option',
choices=['firefox', 'chrome'])
info.add_argument('--mode', default=None,
choices=['album', 'track', 'artist'])
do = p.add_argument_group('what to do with fetched urls')
do.add_argument('--do', default='print',
help='What to do with retrieved informations',
choices=['print', 'wgetprint', 'download'])
do.add_argument('--download-base', default='.',
help='Only applies to wget')
do.add_argument('-j', '--jobs', default=3, type=int,
help='Parallel downloads')
return p
def get_mode(url):
path = urlparse(url).path
if path.startswith('/track/'):
return 'track'
if path.startswith('/album/'):
return 'album'
raise ValueError('unsupported url: "{}"'.format(path))
def main():
args = get_parser().parse_args()
logging.basicConfig(level=args.loglevel)
logging.getLogger('selenium.webdriver').setLevel(logging.WARNING)
logging.getLogger('requests').setLevel(logging.WARNING)
if args.mode is None:
args.mode = get_mode(args.url)
log.info('Going in %s mode' % args.mode)
prefs = {
'dom.audiochannel.mutedByDefault': True
}
with Browser(args.driver, profile_preferences=prefs) as br:
# get track info
if args.mode == 'track':
tracks = visit_track(br, args.url)
elif args.mode == 'album':
tracks = visit_album(br, args.url)
else:
tracks = visit_artist(br, args.url)
# do something
if args.do == 'print':
for track in tracks:
print(json.dumps(track))
elif args.do == 'wgetprint':
for track in tracks:
cmd = ['wget', '-nv', "'%s'" % track['url'],
'-O', "'%s'" %
join(args.download_base,
'-'.join((track['artist'], track['album'],
track['title']))).replace(' ', '_')
]
print(' '.join(cmd))
elif args.do == 'download':
pool = multiprocessing.Pool(processes=args.jobs)
for res in pool.imap_unordered(functools.partial(download_track,
args=args),
tracks):
print('R', res)
pool.close()
pool.join()
else:
raise NotImplementedError('action ' + args.do)
if __name__ == '__main__':
main()

54
bandcavall/visit.py Normal file
View file

@ -0,0 +1,54 @@
import time
import logging
from splinter.exceptions import ElementDoesNotExist
log = logging.getLogger(__name__)
def visit_track(b, url):
log.debug('Visiting track %s' % url)
b.visit(url)
b.find_by_css('.playbutton').first.click()
info = {}
info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
.first.value
try:
info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
.first.value
except ElementDoesNotExist: # non-album tracks
pass
info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
time.sleep(0.2) # how nice!
b.find_by_css('.playbutton').first.click()
info['url'] = b.find_by_css('audio').first['src']
return info
def visit_album(b, url):
log.debug('Visiting album %s' % url)
b.visit(url)
track_urls = [link['href'] for link in
b.find_by_css('.track_row_view.linked .title a')
if link['href'] and '/track/' in link['href']]
for track_url in track_urls:
yield visit_track(b, track_url)
def visit_artist(b, url):
log.debug('Visiting artist %s' % url)
b.visit(url)
album_urls = [link['href'] for link in
b.find_by_css('.square a')
if link['href'] and '/album/' in link['href']]
nonalbumtrack_urls = [link['href'] for link in
b.find_by_css('.square a')
if link['href'] and '/track/' in link['href']]
log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
len(nonalbumtrack_urls)
))
for track_url in nonalbumtrack_urls:
yield visit_track(b, track_url)
for album_url in album_urls:
for track in visit_album(b, album_url):
yield track

View file

@ -1,176 +0,0 @@
import time
import os
from os.path import join, exists
import argparse
from urllib.parse import urlparse
import logging
import json
import multiprocessing
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
log = logging.getLogger(__name__)
def visit_track(b, url):
log.debug('Visiting track %s' % url)
b.visit(url)
b.find_by_css('.playbutton').first.click()
info = {}
info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
.first.value
try:
info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
.first.value
except ElementDoesNotExist: # non-album tracks
pass
info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
time.sleep(0.2) # how nice!
b.find_by_css('.playbutton').first.click()
info['url'] = b.find_by_css('audio').first['src']
return info
def visit_album(b, url):
log.debug('Visiting album %s' % url)
b.visit(url)
track_urls = [link['href'] for link in
b.find_by_css('.track_row_view.linked .title a')
if link['href'] and '/track/' in link['href']]
for track_url in track_urls:
yield visit_track(b, track_url)
def visit_artist(b, url):
log.debug('Visiting artist %s' % url)
b.visit(url)
album_urls = [link['href'] for link in
b.find_by_css('.square a')
if link['href'] and '/album/' in link['href']]
nonalbumtrack_urls = [link['href'] for link in
b.find_by_css('.square a')
if link['href'] and '/track/' in link['href']]
log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
len(nonalbumtrack_urls)
))
for track_url in nonalbumtrack_urls:
yield visit_track(b, track_url)
for album_url in album_urls:
for track in visit_album(b, album_url):
yield track
def download_track(track):
log.debug('Downloading "%s"' % track['title'])
if 'album' in track:
fdir = join(track['artist'], track['album']).replace(' ', '_')
else:
fdir = join(track['artist'])
fname = track['title'].replace(' ', '_') + '.mp3'
fpath = join(args.download_base, fdir, fname)
os.makedirs(join(args.download_base, fdir), exist_ok=True)
if exists(fpath):
log.info('file %s exists, skipping' % fpath)
log.debug('download in ' + fpath)
resp = requests.get(track['url'], stream=True)
if not resp.ok:
log.error('Error downloading track ', track['title'])
return
with open(fpath, 'wb') as buf:
log.debug('writing ' + fname)
for block in resp.iter_content():
buf.write(block)
audio = mutagen.mp3.EasyMP3(fpath) # TODO: is it always mp3?
audio['title'] = track['title']
audio['artist'] = track['artist']
if 'album' in track:
audio['album'] = track['album']
audio.save()
log.info('track %(title)s downloaded' % track)
return fpath
def get_parser():
p = argparse.ArgumentParser(
description='Download tracks from bandcamp, sharing is caring!',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
p.add_argument('url')
p.add_argument('--loglevel', default='INFO', help='Log level',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
info = p.add_argument_group('fetch track info')
info.add_argument('--driver', default='firefox',
help='Internal splinter option',
choices=['firefox', 'chrome'])
info.add_argument('--mode', default=None,
choices=['album', 'track', 'artist'])
do = p.add_argument_group('what to do with fetched urls')
do.add_argument('--do', default='print',
help='What to do with retrieved informations',
choices=['print', 'wgetprint', 'download'])
do.add_argument('--download-base', default='.',
help='Only applies to wget')
do.add_argument('-j', '--jobs', default=3, type=int,
help='Parallel downloads')
return p
def get_mode(url):
path = urlparse(url).path
if path.startswith('/track/'):
return 'track'
if path.startswith('/album/'):
return 'album'
raise ValueError('unsupported url')
args = get_parser().parse_args()
logging.basicConfig(level=args.loglevel)
logging.getLogger('selenium.webdriver').setLevel(logging.WARNING)
logging.getLogger('requests').setLevel(logging.WARNING)
if args.do == 'download':
try:
import requests
import mutagen.mp3
except ImportError:
log.critical('--do=download requires requests and download libraries')
raise
if args.mode is None:
args.mode = get_mode(args.url)
log.info('Going in %s mode' % args.mode)
prefs = {
'dom.audiochannel.mutedByDefault': True
}
with Browser(args.driver, profile_preferences=prefs) as br:
# get track info
if args.mode == 'track':
tracks = visit_track(br, args.url)
elif args.mode == 'album':
tracks = visit_album(br, args.url)
else:
tracks = visit_artist(br, args.url)
# do something
if args.do == 'print':
for track in tracks:
print(json.dumps(track))
elif args.do == 'wgetprint':
for track in tracks:
cmd = ['wget', '-nv', "'%s'" % track['url'],
'-O', "'%s'" %
join(args.download_base,
'-'.join((track['artist'], track['album'],
track['title']))).replace(' ', '_')
]
print(' '.join(cmd))
elif args.do == 'download':
pool = multiprocessing.Pool(processes=args.jobs)
for res in pool.imap_unordered(download_track, tracks):
print('R', res)
pool.close()
pool.join()
else:
raise NotImplementedError('action ' + args.do)

38
setup.py Normal file
View file

@ -0,0 +1,38 @@
import os
from setuptools import setup
def read(fname):
with open(os.path.join(os.path.dirname(__file__), fname)) as buf:
return buf.read()
conf = dict(
name='bandcavall',
version='0.1',
description='Download music from bandcamp',
long_description=read('README.md'),
author='boyska',
author_email='piuttosto@logorroici.org',
url='https://git.lattuga.net/boyska/bandcavall',
license='AGPL',
packages=['bandcavall'],
install_requires=[
'splinter',
'requests',
'mutagen',
],
zip_safe=False,
entry_points={'console_scripts': [
'bandcavall=bandcavall.main:main',
]},
classifiers=[
"License :: OSI Approved :: GNU Affero General Public License v3",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
])
if __name__ == '__main__':
setup(**conf)