diff --git a/larigira/audiogen_podcast.py b/larigira/audiogen_podcast.py index 5b4fee5..3ee7b77 100644 --- a/larigira/audiogen_podcast.py +++ b/larigira/audiogen_podcast.py @@ -115,7 +115,10 @@ def get_item_date(el): def get_audio_from_item(item): encl = item.find("enclosure") - url = encl.get("url") + if encl is not None: + url = encl.get("url") + else: + return None audio_args = {} if item.find("duration") is not None: duration_parts = item.findtext("duration").split(":") @@ -125,8 +128,11 @@ def get_audio_from_item(item): if total_seconds: audio_args["duration"] = total_seconds else: - for child in item.xpath("group/content"): - if child.get("url") == url: + contents = item.xpath("group/content") + if not contents: + contents = item.xpath("content") + for child in contents: + if child.get("url") == url and child.get("duration") is not None: audio_args["duration"] = int(float(child.get("duration"))) break return Audio(url, **audio_args) @@ -137,6 +143,8 @@ def get_urls(tree): for it in items: # title = it.find("title").text audio = get_audio_from_item(it) + if audio is None: + continue if audio.date is None: audio.date = get_item_date(it) yield audio