visit.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. import time
  2. import logging
  3. from splinter.exceptions import ElementDoesNotExist
  4. log = logging.getLogger(__name__)
  5. def visit_track(b, url):
  6. log.debug('Visiting track %s' % url)
  7. b.visit(url)
  8. b.find_by_css('.playbutton').first.click()
  9. info = {}
  10. info['artist'] = b.find_by_xpath('//span[@itemprop="byArtist"]') \
  11. .first.value
  12. try:
  13. info['album'] = b.find_by_xpath('//span[@itemprop="inAlbum"]') \
  14. .first.value
  15. except ElementDoesNotExist: # non-album tracks
  16. pass
  17. info['title'] = b.find_by_css('#name-section h2.trackTitle').first.value
  18. time.sleep(0.2) # how nice!
  19. b.find_by_css('.playbutton').first.click()
  20. info['url'] = b.find_by_css('audio').first['src']
  21. return info
  22. def visit_album(b, url):
  23. log.debug('Visiting album %s' % url)
  24. b.visit(url)
  25. track_urls = [link['href'] for link in
  26. b.find_by_css('.track_row_view.linked .title a')
  27. if link['href'] and '/track/' in link['href']]
  28. for track_url in track_urls:
  29. yield visit_track(b, track_url)
  30. def visit_artist(b, url):
  31. log.debug('Visiting artist %s' % url)
  32. b.visit(url)
  33. album_urls = [link['href'] for link in
  34. b.find_by_css('.square a')
  35. if link['href'] and '/album/' in link['href']]
  36. nonalbumtrack_urls = [link['href'] for link in
  37. b.find_by_css('.track_row_view a')
  38. if link['href'] and '/track/' in link['href']]
  39. log.debug('Found %d album + %d non-album tracks' % (len(album_urls),
  40. len(nonalbumtrack_urls)
  41. ))
  42. for track_url in nonalbumtrack_urls:
  43. yield visit_track(b, track_url)
  44. for album_url in album_urls:
  45. for track in visit_album(b, album_url):
  46. yield track