From 7f1a701abc969fc374c838c7407b747826aec8ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Sabinsky=20T=C3=B8gern?= Date: Tue, 8 May 2018 15:39:08 +0200 Subject: [PATCH] Updated to follow Wikipedias change of h3 to h4 Apparently Wikipedia has changed its HTML, introducing another header level. This has resulted in what previously was tagged h3 is now tagged h4, thus breaking this script (which fails silently, btw.). Changing h3 to h4 in the script fixes the issue. --- mcc-mnc/update_codes.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mcc-mnc/update_codes.py b/mcc-mnc/update_codes.py index fa3f9e2..1a93c3f 100644 --- a/mcc-mnc/update_codes.py +++ b/mcc-mnc/update_codes.py @@ -46,12 +46,12 @@ for t in soup.find_all("table", class_="wikitable"): try: if not 'MCC' in t.text: continue - h3=t.find_previous_sibling("h3") - if not h3 or ' - ' not in h3.text or '[edit]' not in h3.text: + h4=t.find_previous_sibling("h4") + if not h4 or ' - ' not in h4.text or '[edit]' not in h4.text: continue - h3=h3.text.split(' - ') - country_name=h3[0] - country_code=h3[1][:-6] # rm '[edit]' + h4=h4.text.split(' - ') + country_name=h4[0] + country_code=h4[1][:-6] # rm '[edit]' for tr in t.find_all('tr'): td=tr.find_all('td') @@ -71,4 +71,4 @@ for t in soup.find_all("table", class_="wikitable"): if mcc_codes: with io.open('mcc_codes.json', 'w', encoding='utf8') as outfile: - outfile.write(json.dumps(mcc_codes, ensure_ascii=False, encoding="utf-8")) \ No newline at end of file + outfile.write(json.dumps(mcc_codes, ensure_ascii=False, encoding="utf-8"))