alepposcraper.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. #!/usr/bin/env python3
  2. from bs4 import BeautifulSoup
  3. # import cloudscraper
  4. import requests
  5. def AleppoScraper():
  6. url = "https://www.justeat.it/restaurants-saporedialeppo/menu"
  7. # scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False})
  8. # page = scraper.get(url).content
  9. headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
  10. page = requests.get(url, headers=headers).content
  11. # with open('aleppo.html', 'wb') as f:
  12. # f.write(page)
  13. # with open('aleppo.html', 'rb') as f:
  14. # page = f.read()
  15. soup = BeautifulSoup(page, "html.parser")
  16. menu = soup.find(attrs={"data-test-id": "menu-tab"})
  17. for category in menu.find_all(attrs={"data-test-id": "menu-category-item"}):
  18. catname = category.find(attrs={"data-test-id": "category-header"}).text.strip()
  19. print ("- " + catname)
  20. for product in category.find_all(attrs={"data-js-test": "menu-item"}):
  21. prodname = product.find(attrs={"data-js-test": "menu-item-name"}).text.strip()
  22. prodid = product.attrs["data-product-id"]
  23. description = [i.text.strip() for i in product.find_all(attrs={"data-js-test": "menu-item-description"})]
  24. label = [i.text.strip() for i in product.find_all(attrs={"data-js-test": "menu-item-labels"})]
  25. price = product.find(attrs={"data-js-test": "menu-item-price"}).text.replace("€", "").replace(",", ".").strip()
  26. l = [prodname, description[0] if 0 < len(description) else "" , description[1] if 1 < len(description) else "", " ".join(label), price]
  27. print("\t".join(l))
  28. if __name__ == "__main__":
  29. AleppoScraper()