12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- #!/usr/bin/env python3
- from bs4 import BeautifulSoup
- # import cloudscraper
- import requests
- def AleppoScraper():
- url = "https://www.justeat.it/restaurants-saporedialeppo/menu"
-
- # scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False})
- # page = scraper.get(url).content
- headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
- page = requests.get(url, headers=headers).content
- # with open('aleppo.html', 'wb') as f:
- # f.write(page)
- # with open('aleppo.html', 'rb') as f:
- # page = f.read()
- soup = BeautifulSoup(page, "html.parser")
- menu = soup.find(attrs={"data-test-id": "menu-tab"})
- for category in menu.find_all(attrs={"data-test-id": "menu-category-item"}):
- catname = category.find(attrs={"data-test-id": "category-header"}).text.strip()
- print ("- " + catname)
- for product in category.find_all(attrs={"data-js-test": "menu-item"}):
-
- prodname = product.find(attrs={"data-js-test": "menu-item-name"}).text.strip()
- prodid = product.attrs["data-product-id"]
- description = [i.text.strip() for i in product.find_all(attrs={"data-js-test": "menu-item-description"})]
- label = [i.text.strip() for i in product.find_all(attrs={"data-js-test": "menu-item-labels"})]
- price = product.find(attrs={"data-js-test": "menu-item-price"}).text.replace("€", "").replace(",", ".").strip()
- l = [prodname, description[0] if 0 < len(description) else "" , description[1] if 1 < len(description) else "", " ".join(label), price]
- print("\t".join(l))
- if __name__ == "__main__":
- AleppoScraper()
|