|
@@ -0,0 +1,42 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+import cloudscraper
|
|
|
+
|
|
|
+def AleppoScraper():
|
|
|
+
|
|
|
+ scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False})
|
|
|
+ page = scraper.get("https://www.justeat.it/restaurants-saporedialeppo/menu").content
|
|
|
+ # with open('aleppo.html', 'wb') as f:
|
|
|
+ # f.write(page)
|
|
|
+
|
|
|
+ # scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False})
|
|
|
+ # page = scraper.get("https://www.justeat.it/restaurants-bar-new-bridge-bologna/menu").content
|
|
|
+ # with open('pandabao.html', 'wb') as f:
|
|
|
+ # f.write(page)
|
|
|
+
|
|
|
+ # with open('aleppo.html', 'rb') as f:
|
|
|
+ # page = f.read()
|
|
|
+
|
|
|
+ soup = BeautifulSoup(page, "html.parser")
|
|
|
+ menu = soup.find(attrs={"data-test-id": "menu-tab"})
|
|
|
+
|
|
|
+ for category in menu.find_all(attrs={"data-test-id": "menu-category-item"}):
|
|
|
+
|
|
|
+ catname = category.find(attrs={"data-test-id": "category-header"}).text.strip()
|
|
|
+ print ("- " + catname)
|
|
|
+
|
|
|
+ for product in category.find_all(attrs={"data-js-test": "menu-item"}):
|
|
|
+
|
|
|
+ prodname = product.find(attrs={"data-js-test": "menu-item-name"}).text.strip()
|
|
|
+ prodid = product.attrs["data-product-id"]
|
|
|
+ description = [i.text.strip() for i in product.find_all(attrs={"data-js-test": "menu-item-description"})]
|
|
|
+ label = [i.text.strip() for i in product.find_all(attrs={"data-js-test": "menu-item-labels"})]
|
|
|
+ price = product.find(attrs={"data-js-test": "menu-item-price"}).text.replace("€", "").replace(",", ".").strip()
|
|
|
+
|
|
|
+ l = [prodname, description[0] if 0 < len(description) else "" , description[1] if 1 < len(description) else "", " ".join(label), price]
|
|
|
+ print("\t".join(l))
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ AleppoScraper()
|
|
|
+
|