visit.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import time
  2. import logging
  3. from splinter.exceptions import ElementDoesNotExist
  4. log = logging.getLogger(__name__)
  5. def visit_track(b, url):
  6. log.debug('Visiting track %s' % url)
  7. b.visit(url)
  8. b.find_by_css('.playbutton').first.click()
  9. info = {}
  10. info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
  11. .first.value
  12. try:
  13. info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
  14. .first.value
  15. except ElementDoesNotExist: # non-album tracks
  16. pass
  17. try:
  18. info['lyrics'] = b.find_by_css('.lyricsText').first.value
  19. except ElementDoesNotExist:
  20. pass
  21. info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
  22. time.sleep(0.2) # how nice!
  23. b.find_by_css('.playbutton').first.click()
  24. info['url'] = b.find_by_css('audio').first['src']
  25. b.visit('data:,')
  26. return info
  27. def visit_album(b, url):
  28. log.debug('Visiting album %s' % url)
  29. b.visit(url)
  30. track_urls = [link['href'] for link in
  31. b.find_by_css('.track_row_view.linked .title a')
  32. if link['href'] and '/track/' in link['href']]
  33. for track_url in track_urls:
  34. yield visit_track(b, track_url)
  35. def visit_artist(b, url):
  36. log.debug('Visiting artist %s' % url)
  37. b.visit(url)
  38. album_urls = [link['href'] for link in
  39. b.find_by_css('.square a')
  40. if link['href'] and '/album/' in link['href']]
  41. nonalbumtrack_urls = [link['href'] for link in
  42. b.find_by_css('.track_row_view a')
  43. if link['href'] and '/track/' in link['href']]
  44. log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
  45. len(nonalbumtrack_urls)
  46. ))
  47. for track_url in nonalbumtrack_urls:
  48. yield visit_track(b, track_url)
  49. for album_url in album_urls:
  50. for track in visit_album(b, album_url):
  51. yield track