serious packaging
This commit is contained in:
parent
fc0c73b0dd
commit
9e77197ec4
6 changed files with 225 additions and 176 deletions
0
bandcavall/__init__.py
Normal file
0
bandcavall/__init__.py
Normal file
35
bandcavall/download.py
Normal file
35
bandcavall/download.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
import os
|
||||||
|
from os.path import join, exists
|
||||||
|
import requests
|
||||||
|
import mutagen.mp3
|
||||||
|
import logging
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def download_track(track, args):
|
||||||
|
log.debug('Downloading "%s"' % track['title'])
|
||||||
|
if 'album' in track:
|
||||||
|
fdir = join(track['artist'], track['album']).replace(' ', '_')
|
||||||
|
else:
|
||||||
|
fdir = join(track['artist'])
|
||||||
|
fname = track['title'].replace(' ', '_') + '.mp3'
|
||||||
|
fpath = join(args.download_base, fdir, fname)
|
||||||
|
os.makedirs(join(args.download_base, fdir), exist_ok=True)
|
||||||
|
if exists(fpath):
|
||||||
|
log.info('file %s exists, skipping' % fpath)
|
||||||
|
log.debug('download in ' + fpath)
|
||||||
|
resp = requests.get(track['url'], stream=True)
|
||||||
|
if not resp.ok:
|
||||||
|
log.error('Error downloading track ', track['title'])
|
||||||
|
return
|
||||||
|
with open(fpath, 'wb') as buf:
|
||||||
|
log.debug('writing ' + fname)
|
||||||
|
for block in resp.iter_content():
|
||||||
|
buf.write(block)
|
||||||
|
audio = mutagen.mp3.EasyMP3(fpath) # TODO: is it always mp3?
|
||||||
|
audio['title'] = track['title']
|
||||||
|
audio['artist'] = track['artist']
|
||||||
|
if 'album' in track:
|
||||||
|
audio['album'] = track['album']
|
||||||
|
audio.save()
|
||||||
|
log.info('track %(title)s downloaded' % track)
|
98
bandcavall/main.py
Normal file
98
bandcavall/main.py
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
from os.path import join
|
||||||
|
import argparse
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
import multiprocessing
|
||||||
|
import functools
|
||||||
|
|
||||||
|
from splinter import Browser
|
||||||
|
|
||||||
|
from .download import download_track
|
||||||
|
from .visit import visit_track, visit_album, visit_artist
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_parser():
|
||||||
|
p = argparse.ArgumentParser(
|
||||||
|
description='Download tracks from bandcamp, sharing is caring!',
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
p.add_argument('url')
|
||||||
|
p.add_argument('--loglevel', default='INFO', help='Log level',
|
||||||
|
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
|
||||||
|
info = p.add_argument_group('fetch track info')
|
||||||
|
info.add_argument('--driver', default='firefox',
|
||||||
|
help='Internal splinter option',
|
||||||
|
choices=['firefox', 'chrome'])
|
||||||
|
info.add_argument('--mode', default=None,
|
||||||
|
choices=['album', 'track', 'artist'])
|
||||||
|
do = p.add_argument_group('what to do with fetched urls')
|
||||||
|
do.add_argument('--do', default='print',
|
||||||
|
help='What to do with retrieved informations',
|
||||||
|
choices=['print', 'wgetprint', 'download'])
|
||||||
|
do.add_argument('--download-base', default='.',
|
||||||
|
help='Only applies to wget')
|
||||||
|
do.add_argument('-j', '--jobs', default=3, type=int,
|
||||||
|
help='Parallel downloads')
|
||||||
|
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
def get_mode(url):
|
||||||
|
path = urlparse(url).path
|
||||||
|
if path.startswith('/track/'):
|
||||||
|
return 'track'
|
||||||
|
if path.startswith('/album/'):
|
||||||
|
return 'album'
|
||||||
|
raise ValueError('unsupported url: "{}"'.format(path))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_parser().parse_args()
|
||||||
|
logging.basicConfig(level=args.loglevel)
|
||||||
|
logging.getLogger('selenium.webdriver').setLevel(logging.WARNING)
|
||||||
|
logging.getLogger('requests').setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
if args.mode is None:
|
||||||
|
args.mode = get_mode(args.url)
|
||||||
|
log.info('Going in %s mode' % args.mode)
|
||||||
|
|
||||||
|
prefs = {
|
||||||
|
'dom.audiochannel.mutedByDefault': True
|
||||||
|
}
|
||||||
|
with Browser(args.driver, profile_preferences=prefs) as br:
|
||||||
|
# get track info
|
||||||
|
if args.mode == 'track':
|
||||||
|
tracks = visit_track(br, args.url)
|
||||||
|
elif args.mode == 'album':
|
||||||
|
tracks = visit_album(br, args.url)
|
||||||
|
else:
|
||||||
|
tracks = visit_artist(br, args.url)
|
||||||
|
|
||||||
|
# do something
|
||||||
|
if args.do == 'print':
|
||||||
|
for track in tracks:
|
||||||
|
print(json.dumps(track))
|
||||||
|
elif args.do == 'wgetprint':
|
||||||
|
for track in tracks:
|
||||||
|
cmd = ['wget', '-nv', "'%s'" % track['url'],
|
||||||
|
'-O', "'%s'" %
|
||||||
|
join(args.download_base,
|
||||||
|
'-'.join((track['artist'], track['album'],
|
||||||
|
track['title']))).replace(' ', '_')
|
||||||
|
]
|
||||||
|
print(' '.join(cmd))
|
||||||
|
elif args.do == 'download':
|
||||||
|
pool = multiprocessing.Pool(processes=args.jobs)
|
||||||
|
for res in pool.imap_unordered(functools.partial(download_track,
|
||||||
|
args=args),
|
||||||
|
tracks):
|
||||||
|
print('R', res)
|
||||||
|
pool.close()
|
||||||
|
pool.join()
|
||||||
|
else:
|
||||||
|
raise NotImplementedError('action ' + args.do)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
54
bandcavall/visit.py
Normal file
54
bandcavall/visit.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from splinter.exceptions import ElementDoesNotExist
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def visit_track(b, url):
|
||||||
|
log.debug('Visiting track %s' % url)
|
||||||
|
b.visit(url)
|
||||||
|
b.find_by_css('.playbutton').first.click()
|
||||||
|
info = {}
|
||||||
|
info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
|
||||||
|
.first.value
|
||||||
|
try:
|
||||||
|
info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
|
||||||
|
.first.value
|
||||||
|
except ElementDoesNotExist: # non-album tracks
|
||||||
|
pass
|
||||||
|
info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
|
||||||
|
time.sleep(0.2) # how nice!
|
||||||
|
b.find_by_css('.playbutton').first.click()
|
||||||
|
info['url'] = b.find_by_css('audio').first['src']
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
def visit_album(b, url):
|
||||||
|
log.debug('Visiting album %s' % url)
|
||||||
|
b.visit(url)
|
||||||
|
track_urls = [link['href'] for link in
|
||||||
|
b.find_by_css('.track_row_view.linked .title a')
|
||||||
|
if link['href'] and '/track/' in link['href']]
|
||||||
|
for track_url in track_urls:
|
||||||
|
yield visit_track(b, track_url)
|
||||||
|
|
||||||
|
|
||||||
|
def visit_artist(b, url):
|
||||||
|
log.debug('Visiting artist %s' % url)
|
||||||
|
b.visit(url)
|
||||||
|
album_urls = [link['href'] for link in
|
||||||
|
b.find_by_css('.square a')
|
||||||
|
if link['href'] and '/album/' in link['href']]
|
||||||
|
nonalbumtrack_urls = [link['href'] for link in
|
||||||
|
b.find_by_css('.square a')
|
||||||
|
if link['href'] and '/track/' in link['href']]
|
||||||
|
log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
|
||||||
|
len(nonalbumtrack_urls)
|
||||||
|
))
|
||||||
|
for track_url in nonalbumtrack_urls:
|
||||||
|
yield visit_track(b, track_url)
|
||||||
|
for album_url in album_urls:
|
||||||
|
for track in visit_album(b, album_url):
|
||||||
|
yield track
|
176
fetchcose.py
176
fetchcose.py
|
@ -1,176 +0,0 @@
|
||||||
import time
|
|
||||||
import os
|
|
||||||
from os.path import join, exists
|
|
||||||
import argparse
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
import multiprocessing
|
|
||||||
|
|
||||||
from splinter import Browser
|
|
||||||
from splinter.exceptions import ElementDoesNotExist
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def visit_track(b, url):
|
|
||||||
log.debug('Visiting track %s' % url)
|
|
||||||
b.visit(url)
|
|
||||||
b.find_by_css('.playbutton').first.click()
|
|
||||||
info = {}
|
|
||||||
info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
|
|
||||||
.first.value
|
|
||||||
try:
|
|
||||||
info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
|
|
||||||
.first.value
|
|
||||||
except ElementDoesNotExist: # non-album tracks
|
|
||||||
pass
|
|
||||||
info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
|
|
||||||
time.sleep(0.2) # how nice!
|
|
||||||
b.find_by_css('.playbutton').first.click()
|
|
||||||
info['url'] = b.find_by_css('audio').first['src']
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
def visit_album(b, url):
|
|
||||||
log.debug('Visiting album %s' % url)
|
|
||||||
b.visit(url)
|
|
||||||
track_urls = [link['href'] for link in
|
|
||||||
b.find_by_css('.track_row_view.linked .title a')
|
|
||||||
if link['href'] and '/track/' in link['href']]
|
|
||||||
for track_url in track_urls:
|
|
||||||
yield visit_track(b, track_url)
|
|
||||||
|
|
||||||
|
|
||||||
def visit_artist(b, url):
|
|
||||||
log.debug('Visiting artist %s' % url)
|
|
||||||
b.visit(url)
|
|
||||||
album_urls = [link['href'] for link in
|
|
||||||
b.find_by_css('.square a')
|
|
||||||
if link['href'] and '/album/' in link['href']]
|
|
||||||
nonalbumtrack_urls = [link['href'] for link in
|
|
||||||
b.find_by_css('.square a')
|
|
||||||
if link['href'] and '/track/' in link['href']]
|
|
||||||
log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
|
|
||||||
len(nonalbumtrack_urls)
|
|
||||||
))
|
|
||||||
for track_url in nonalbumtrack_urls:
|
|
||||||
yield visit_track(b, track_url)
|
|
||||||
for album_url in album_urls:
|
|
||||||
for track in visit_album(b, album_url):
|
|
||||||
yield track
|
|
||||||
|
|
||||||
|
|
||||||
def download_track(track):
|
|
||||||
log.debug('Downloading "%s"' % track['title'])
|
|
||||||
if 'album' in track:
|
|
||||||
fdir = join(track['artist'], track['album']).replace(' ', '_')
|
|
||||||
else:
|
|
||||||
fdir = join(track['artist'])
|
|
||||||
fname = track['title'].replace(' ', '_') + '.mp3'
|
|
||||||
fpath = join(args.download_base, fdir, fname)
|
|
||||||
os.makedirs(join(args.download_base, fdir), exist_ok=True)
|
|
||||||
if exists(fpath):
|
|
||||||
log.info('file %s exists, skipping' % fpath)
|
|
||||||
log.debug('download in ' + fpath)
|
|
||||||
resp = requests.get(track['url'], stream=True)
|
|
||||||
if not resp.ok:
|
|
||||||
log.error('Error downloading track ', track['title'])
|
|
||||||
return
|
|
||||||
with open(fpath, 'wb') as buf:
|
|
||||||
log.debug('writing ' + fname)
|
|
||||||
for block in resp.iter_content():
|
|
||||||
buf.write(block)
|
|
||||||
audio = mutagen.mp3.EasyMP3(fpath) # TODO: is it always mp3?
|
|
||||||
audio['title'] = track['title']
|
|
||||||
audio['artist'] = track['artist']
|
|
||||||
if 'album' in track:
|
|
||||||
audio['album'] = track['album']
|
|
||||||
audio.save()
|
|
||||||
log.info('track %(title)s downloaded' % track)
|
|
||||||
return fpath
|
|
||||||
|
|
||||||
|
|
||||||
def get_parser():
|
|
||||||
p = argparse.ArgumentParser(
|
|
||||||
description='Download tracks from bandcamp, sharing is caring!',
|
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
||||||
p.add_argument('url')
|
|
||||||
p.add_argument('--loglevel', default='INFO', help='Log level',
|
|
||||||
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
|
|
||||||
info = p.add_argument_group('fetch track info')
|
|
||||||
info.add_argument('--driver', default='firefox',
|
|
||||||
help='Internal splinter option',
|
|
||||||
choices=['firefox', 'chrome'])
|
|
||||||
info.add_argument('--mode', default=None,
|
|
||||||
choices=['album', 'track', 'artist'])
|
|
||||||
do = p.add_argument_group('what to do with fetched urls')
|
|
||||||
do.add_argument('--do', default='print',
|
|
||||||
help='What to do with retrieved informations',
|
|
||||||
choices=['print', 'wgetprint', 'download'])
|
|
||||||
do.add_argument('--download-base', default='.',
|
|
||||||
help='Only applies to wget')
|
|
||||||
do.add_argument('-j', '--jobs', default=3, type=int,
|
|
||||||
help='Parallel downloads')
|
|
||||||
|
|
||||||
return p
|
|
||||||
|
|
||||||
|
|
||||||
def get_mode(url):
|
|
||||||
path = urlparse(url).path
|
|
||||||
if path.startswith('/track/'):
|
|
||||||
return 'track'
|
|
||||||
if path.startswith('/album/'):
|
|
||||||
return 'album'
|
|
||||||
raise ValueError('unsupported url')
|
|
||||||
|
|
||||||
|
|
||||||
args = get_parser().parse_args()
|
|
||||||
logging.basicConfig(level=args.loglevel)
|
|
||||||
logging.getLogger('selenium.webdriver').setLevel(logging.WARNING)
|
|
||||||
logging.getLogger('requests').setLevel(logging.WARNING)
|
|
||||||
if args.do == 'download':
|
|
||||||
try:
|
|
||||||
import requests
|
|
||||||
import mutagen.mp3
|
|
||||||
except ImportError:
|
|
||||||
log.critical('--do=download requires requests and download libraries')
|
|
||||||
raise
|
|
||||||
|
|
||||||
if args.mode is None:
|
|
||||||
args.mode = get_mode(args.url)
|
|
||||||
log.info('Going in %s mode' % args.mode)
|
|
||||||
|
|
||||||
prefs = {
|
|
||||||
'dom.audiochannel.mutedByDefault': True
|
|
||||||
}
|
|
||||||
with Browser(args.driver, profile_preferences=prefs) as br:
|
|
||||||
# get track info
|
|
||||||
if args.mode == 'track':
|
|
||||||
tracks = visit_track(br, args.url)
|
|
||||||
elif args.mode == 'album':
|
|
||||||
tracks = visit_album(br, args.url)
|
|
||||||
else:
|
|
||||||
tracks = visit_artist(br, args.url)
|
|
||||||
|
|
||||||
# do something
|
|
||||||
if args.do == 'print':
|
|
||||||
for track in tracks:
|
|
||||||
print(json.dumps(track))
|
|
||||||
elif args.do == 'wgetprint':
|
|
||||||
for track in tracks:
|
|
||||||
cmd = ['wget', '-nv', "'%s'" % track['url'],
|
|
||||||
'-O', "'%s'" %
|
|
||||||
join(args.download_base,
|
|
||||||
'-'.join((track['artist'], track['album'],
|
|
||||||
track['title']))).replace(' ', '_')
|
|
||||||
]
|
|
||||||
print(' '.join(cmd))
|
|
||||||
elif args.do == 'download':
|
|
||||||
pool = multiprocessing.Pool(processes=args.jobs)
|
|
||||||
for res in pool.imap_unordered(download_track, tracks):
|
|
||||||
print('R', res)
|
|
||||||
pool.close()
|
|
||||||
pool.join()
|
|
||||||
else:
|
|
||||||
raise NotImplementedError('action ' + args.do)
|
|
38
setup.py
Normal file
38
setup.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
|
||||||
|
def read(fname):
|
||||||
|
with open(os.path.join(os.path.dirname(__file__), fname)) as buf:
|
||||||
|
return buf.read()
|
||||||
|
|
||||||
|
|
||||||
|
conf = dict(
|
||||||
|
name='bandcavall',
|
||||||
|
version='0.1',
|
||||||
|
description='Download music from bandcamp',
|
||||||
|
long_description=read('README.md'),
|
||||||
|
author='boyska',
|
||||||
|
author_email='piuttosto@logorroici.org',
|
||||||
|
url='https://git.lattuga.net/boyska/bandcavall',
|
||||||
|
license='AGPL',
|
||||||
|
packages=['bandcavall'],
|
||||||
|
install_requires=[
|
||||||
|
'splinter',
|
||||||
|
'requests',
|
||||||
|
'mutagen',
|
||||||
|
],
|
||||||
|
zip_safe=False,
|
||||||
|
entry_points={'console_scripts': [
|
||||||
|
'bandcavall=bandcavall.main:main',
|
||||||
|
]},
|
||||||
|
classifiers=[
|
||||||
|
"License :: OSI Approved :: GNU Affero General Public License v3",
|
||||||
|
"Operating System :: POSIX :: Linux",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
setup(**conf)
|
Loading…
Reference in a new issue