get_rep_text.py 629 B

12345678910111213141516171819202122
  1. #!/usr/bin/env python
  2. #from: https://paste.debian.net/hidden/55e325f5/
  3. import lxml.html
  4. import requests
  5. import re
  6. import sys
  7. import subprocess
  8. import tempfile
  9. def get_rep_text(url):
  10. text_url = re.sub(r'/pwa/', '/ws/detail/', url)
  11. page = requests.get(text_url)
  12. root = lxml.html.fromstring(page.text)
  13. print(page.text)
  14. return ''.join(
  15. div.text_content() for div in root.xpath('//div[@class="paywall"]'))
  16. if __name__ == '__main__':
  17. with tempfile.NamedTemporaryFile(delete=False) as tmpf:
  18. tmpf.write(get_rep_text(sys.argv[1]).encode('utf-8'))
  19. subprocess.call(['firefox', tmpf.name])