From 3929dcdebfb3ab658e5b7b7a77f93df1dff5ccf1 Mon Sep 17 00:00:00 2001 From: itec78 Date: Mon, 27 Mar 2023 22:43:36 +0200 Subject: [PATCH] index fix --- oloturia2pdf.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/oloturia2pdf.py b/oloturia2pdf.py index 0106cac..98952d6 100755 --- a/oloturia2pdf.py +++ b/oloturia2pdf.py @@ -10,6 +10,7 @@ import html2text import pdfkit import locale import PyPDF2 +import html locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8') @@ -161,9 +162,15 @@ def main(): for vgo in all_vgos: vgo_num = html2text.html2text(vgo['content']).split(' ')[0] - vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] - if len(vgo_name) < 10: - vgo_name = [s for s in html2text.html2text(vgo['content']).split("\n\n") if s][-1].replace("\n"," ") + # vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] + # if len(vgo_name) < 10: + # vgo_name = [s for s in html2text.html2text(vgo['content']).split("\n\n") if s][-1].replace("\n"," ") + + vgo_name = vgo['content'].split("

")[-1].replace("

","") + vgo_name = vgo_name.split("
")[-1] + vgo_name = vgo_name.split("")[-1] + vgo_name = html.unescape(vgo_name).strip() + #print(vgo_num +' - '+ vgo_name) #print(str(vgo['id']) +' '+ vgo['uri']) vgo_dict[vgo_num] = vgo_name