#!/usr/bin/env python3 from bs4 import BeautifulSoup # import cloudscraper import requests def AleppoScraper(): url = "https://www.justeat.it/restaurants-saporedialeppo/menu" # scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False}) # page = scraper.get(url).content headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} page = requests.get(url, headers=headers).content # with open('aleppo.html', 'wb') as f: # f.write(page) # with open('aleppo.html', 'rb') as f: # page = f.read() soup = BeautifulSoup(page, "html.parser") menu = soup.find(attrs={"data-test-id": "menu-tab"}) for category in menu.find_all(attrs={"data-test-id": "menu-category-item"}): catname = category.find(attrs={"data-test-id": "category-header"}).text.strip() print ("- " + catname) for product in category.find_all(attrs={"data-js-test": "menu-item"}): prodname = product.find(attrs={"data-js-test": "menu-item-name"}).text.strip() prodid = product.attrs["data-product-id"] description = [i.text.strip() for i in product.find_all(attrs={"data-js-test": "menu-item-description"})] label = [i.text.strip() for i in product.find_all(attrs={"data-js-test": "menu-item-labels"})] price = product.find(attrs={"data-js-test": "menu-item-price"}).text.replace("€", "").replace(",", ".").strip() l = [prodname, description[0] if 0 < len(description) else "" , description[1] if 1 < len(description) else "", " ".join(label), price] print("\t".join(l)) if __name__ == "__main__": AleppoScraper()