Updated to follow Wikipedias change of h3 to h4

Apparently Wikipedia has changed its HTML, introducing another header level. This has resulted in what previously was tagged h3 is now tagged h4, thus breaking this script (which fails silently, btw.).

Changing h3 to h4 in the script fixes the issue.
This commit is contained in:
Anders Sabinsky Tøgern 2018-05-08 15:39:08 +02:00 committed by GitHub
parent def53e7d05
commit 7f1a701abc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -46,12 +46,12 @@ for t in soup.find_all("table", class_="wikitable"):
try:
if not 'MCC' in t.text:
continue
h3=t.find_previous_sibling("h3")
if not h3 or ' - ' not in h3.text or '[edit]' not in h3.text:
h4=t.find_previous_sibling("h4")
if not h4 or ' - ' not in h4.text or '[edit]' not in h4.text:
continue
h3=h3.text.split(' - ')
country_name=h3[0]
country_code=h3[1][:-6] # rm '[edit]'
h4=h4.text.split(' - ')
country_name=h4[0]
country_code=h4[1][:-6] # rm '[edit]'
for tr in t.find_all('tr'):
td=tr.find_all('td')
@ -71,4 +71,4 @@ for t in soup.find_all("table", class_="wikitable"):
if mcc_codes:
with io.open('mcc_codes.json', 'w', encoding='utf8') as outfile:
outfile.write(json.dumps(mcc_codes, ensure_ascii=False, encoding="utf-8"))
outfile.write(json.dumps(mcc_codes, ensure_ascii=False, encoding="utf-8"))