diff --git a/diffido.py b/diffido.py index 5c6cafa..bd47379 100755 --- a/diffido.py +++ b/diffido.py @@ -149,9 +149,17 @@ def select_xpath(content, xpath): return content selected_content = [] for elem in elems: - selected_content.append(''.join([elem.text] + - [ElementTree.tostring(e, method='html').decode('utf-8', 'replace') - for e in elem.getchildren()])) + pieces = [] + if elem.text: + pieces.append(elem.text) + for sub_el in elem.getchildren(): + try: + sub_el_text = ElementTree.tostring(sub_el, method='html').decode('utf-8', 'replace') + except: + continue + if sub_el_text: + pieces.append(sub_el_text) + selected_content.append(''.join(pieces)) content = ''.join(selected_content).strip() return content