visit.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import time
  2. import logging
  3. from splinter.exceptions import ElementDoesNotExist
  4. log = logging.getLogger(__name__)
  5. def visit_track(b, url):
  6. log.debug('Visiting track %s' % url)
  7. b.visit(url)
  8. b.find_by_css('.playbutton').first.click()
  9. info = {}
  10. info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
  11. .first.value
  12. try:
  13. info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
  14. .first.value
  15. except ElementDoesNotExist: # non-album tracks
  16. pass
  17. info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
  18. time.sleep(0.2) # how nice!
  19. b.find_by_css('.playbutton').first.click()
  20. info['url'] = b.find_by_css('audio').first['src']
  21. b.visit('data:,')
  22. return info
  23. def visit_album(b, url):
  24. log.debug('Visiting album %s' % url)
  25. b.visit(url)
  26. track_urls = [link['href'] for link in
  27. b.find_by_css('.track_row_view.linked .title a')
  28. if link['href'] and '/track/' in link['href']]
  29. for track_url in track_urls:
  30. yield visit_track(b, track_url)
  31. def visit_artist(b, url):
  32. log.debug('Visiting artist %s' % url)
  33. b.visit(url)
  34. album_urls = [link['href'] for link in
  35. b.find_by_css('.square a')
  36. if link['href'] and '/album/' in link['href']]
  37. nonalbumtrack_urls = [link['href'] for link in
  38. b.find_by_css('.track_row_view a')
  39. if link['href'] and '/track/' in link['href']]
  40. log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
  41. len(nonalbumtrack_urls)
  42. ))
  43. for track_url in nonalbumtrack_urls:
  44. yield visit_track(b, track_url)
  45. for album_url in album_urls:
  46. for track in visit_album(b, album_url):
  47. yield track