12345678910111213141516171819202122 |
- #!/usr/bin/env python
- #from: https://paste.debian.net/hidden/55e325f5/
- import lxml.html
- import requests
- import re
- import sys
- import subprocess
- import tempfile
- def get_rep_text(url):
- text_url = re.sub(r'/pwa/', '/ws/detail/', url)
- page = requests.get(text_url)
- root = lxml.html.fromstring(page.text)
- print(page.text)
- return ''.join(
- div.text_content() for div in root.xpath('//div[@class="paywall"]'))
- if __name__ == '__main__':
- with tempfile.NamedTemporaryFile(delete=False) as tmpf:
- tmpf.write(get_rep_text(sys.argv[1]).encode('utf-8'))
- subprocess.call(['firefox', tmpf.name])
|