123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import time
- import logging
- from splinter.exceptions import ElementDoesNotExist
- log = logging.getLogger(__name__)
- def visit_track(b, url):
- log.debug('Visiting track %s' % url)
- b.visit(url)
- b.find_by_css('.playbutton').first.click()
- info = {}
- info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
- .first.value
- try:
- info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
- .first.value
- except ElementDoesNotExist: # non-album tracks
- pass
- info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
- time.sleep(0.2) # how nice!
- b.find_by_css('.playbutton').first.click()
- info['url'] = b.find_by_css('audio').first['src']
- return info
- def visit_album(b, url):
- log.debug('Visiting album %s' % url)
- b.visit(url)
- track_urls = [link['href'] for link in
- b.find_by_css('.track_row_view.linked .title a')
- if link['href'] and '/track/' in link['href']]
- for track_url in track_urls:
- yield visit_track(b, track_url)
- def visit_artist(b, url):
- log.debug('Visiting artist %s' % url)
- b.visit(url)
- album_urls = [link['href'] for link in
- b.find_by_css('.square a')
- if link['href'] and '/album/' in link['href']]
- nonalbumtrack_urls = [link['href'] for link in
- b.find_by_css('.track_row_view a')
- if link['href'] and '/track/' in link['href']]
- log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
- len(nonalbumtrack_urls)
- ))
- for track_url in nonalbumtrack_urls:
- yield visit_track(b, track_url)
- for album_url in album_urls:
- for track in visit_album(b, album_url):
- yield track
|