Updated to follow Wikipedias change of h3 to h4
Apparently Wikipedia has changed its HTML, introducing another header level. This has resulted in what previously was tagged h3 is now tagged h4, thus breaking this script (which fails silently, btw.). Changing h3 to h4 in the script fixes the issue.
This commit is contained in:
parent
def53e7d05
commit
7f1a701abc
1 changed files with 6 additions and 6 deletions
|
@ -46,12 +46,12 @@ for t in soup.find_all("table", class_="wikitable"):
|
|||
try:
|
||||
if not 'MCC' in t.text:
|
||||
continue
|
||||
h3=t.find_previous_sibling("h3")
|
||||
if not h3 or ' - ' not in h3.text or '[edit]' not in h3.text:
|
||||
h4=t.find_previous_sibling("h4")
|
||||
if not h4 or ' - ' not in h4.text or '[edit]' not in h4.text:
|
||||
continue
|
||||
h3=h3.text.split(' - ')
|
||||
country_name=h3[0]
|
||||
country_code=h3[1][:-6] # rm '[edit]'
|
||||
h4=h4.text.split(' - ')
|
||||
country_name=h4[0]
|
||||
country_code=h4[1][:-6] # rm '[edit]'
|
||||
|
||||
for tr in t.find_all('tr'):
|
||||
td=tr.find_all('td')
|
||||
|
@ -71,4 +71,4 @@ for t in soup.find_all("table", class_="wikitable"):
|
|||
|
||||
if mcc_codes:
|
||||
with io.open('mcc_codes.json', 'w', encoding='utf8') as outfile:
|
||||
outfile.write(json.dumps(mcc_codes, ensure_ascii=False, encoding="utf-8"))
|
||||
outfile.write(json.dumps(mcc_codes, ensure_ascii=False, encoding="utf-8"))
|
||||
|
|
Loading…
Reference in a new issue