diff --git a/.gitignore b/.gitignore index d136780..18f6652 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ media/ pdf/ *.json -*.pdf + diff --git a/copertina.pdf b/copertina.pdf new file mode 100644 index 0000000..c699015 Binary files /dev/null and b/copertina.pdf differ diff --git a/copertina.svg b/copertina.svg deleted file mode 100644 index eff183a..0000000 --- a/copertina.svg +++ /dev/null @@ -1,344 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 001 - 050 - - diff --git a/oloturia2pdf.py b/oloturia2pdf.py index c5d8238..3b946cc 100755 --- a/oloturia2pdf.py +++ b/oloturia2pdf.py @@ -21,28 +21,26 @@ def copertina(text): from PyPDF2 import PdfFileWriter, PdfFileReader import io from reportlab.pdfgen import canvas - from reportlab.lib.pagesizes import A4 + from reportlab.lib.pagesizes import A5 from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase.pdfmetrics import stringWidth - from reportlab.rl_config import defaultPageSize FONT = 'Roboto' - SIZE = 48 + SIZE = 36 packet = io.BytesIO() # create a new PDF with Reportlab pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf")) - can = canvas.Canvas(packet, pagesize=A4) + can = canvas.Canvas(packet, pagesize=A5) can.setFont(FONT, SIZE) - PAGE_WIDTH = defaultPageSize[0] - #PAGE_HEIGHT = defaultPageSize[1] + PAGE_WIDTH = A5[0] text_width = stringWidth(text,FONT, SIZE) - can.drawString((PAGE_WIDTH - text_width) / 2, 150, text) + can.drawString((PAGE_WIDTH - text_width) / 2, 100, text) can.save() #move to the beginning of the StringIO buffer @@ -60,178 +58,180 @@ def copertina(text): return(page) +def main(): + # Scarica tutti i post da Mastodon -# Scarica tutti i post da Mastodon + print("Scarico i post") -print("Scarico i post") + def default(o): + if isinstance(o, (datetime.date, datetime.datetime)): + return o.isoformat() -def default(o): - if isinstance(o, (datetime.date, datetime.datetime)): - return o.isoformat() + if not os.path.isfile('oloturiadump.json'): + mastodon = Mastodon(api_base_url = "https://mastodon.bida.im") + all_vgos = [] + last_id = None -if not os.path.isfile('oloturiadump.json'): - mastodon = Mastodon(api_base_url = "https://mastodon.bida.im") - all_vgos = [] - last_id = None + while True: + statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id))) + if not statuses: + break + all_vgos += list(map( + lambda s: { + 'id': s['id'], + 'uri': s['uri'], + 'content': s['content'], + 'replies_count': s['replies_count'], + #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [], + 'created': s['created_at'], + 'reblogs': s['reblogs_count'], + 'favourites': s['favourites_count'], + 'media': s['media_attachments'] + } + , statuses)) + last_id = statuses[-1]['id'] - while True: - statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id))) - if not statuses: - break - all_vgos += list(map( - lambda s: { - 'id': s['id'], - 'uri': s['uri'], - 'content': s['content'], - 'replies_count': s['replies_count'], - #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [], - 'created': s['created_at'], - 'reblogs': s['reblogs_count'], - 'favourites': s['favourites_count'], - 'media': s['media_attachments'] - } - , statuses)) - last_id = statuses[-1]['id'] + #print(all_vgos) + #print(json.dumps(all_vgos, default=default)) - #print(all_vgos) - #print(json.dumps(all_vgos, default=default)) - - with open('oloturiadump.json', 'w') as json_file: - json.dump(all_vgos, json_file, indent=4, default=default) + with open('oloturiadump.json', 'w') as json_file: + json.dump(all_vgos, json_file, indent=4, default=default) -# Scarica tutte le immagini + # Scarica tutte le immagini -print("Scarico le immagini") -with open('oloturiadump.json') as json_file: - all_vgos = json.load(json_file) - os.makedirs('media', exist_ok=True) + print("Scarico le immagini") + with open('oloturiadump.json') as json_file: + all_vgos = json.load(json_file) + os.makedirs('media', exist_ok=True) - vgo_dict={} + vgo_dict={} - for vgo in all_vgos: - vgo_num = html2text.html2text(vgo['content']).split(' ')[0] - vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] - #print(vgo_num +' - '+ vgo_name) - #print(str(vgo['id']) +' '+ vgo['uri']) - vgo_dict[vgo_num] = vgo_name - - for media in vgo['media']: - #print(str(media['id']) +' '+ media['url']) - - ext = os.path.splitext(media['preview_url'])[1] - img_name = os.path.join('media',str(media['id']) + ext) - - if not os.path.isfile(img_name): - print(img_name) - img_data = requests.get(media['preview_url']).content - with open(img_name, 'wb') as handler: - handler.write(img_data) - - with open('template.html') as html_file: - html_base = html_file.read() - with open('mediagallery.html') as html_file: - html_mediagallery = html_file.read() - - - # Genera i PDF - - print("Genero i PDF") - os.makedirs('pdf', exist_ok=True) - for vgo in all_vgos: - vgo_num = html2text.html2text(vgo['content']).split(' ')[0] - vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] - - html_name = 'oloturia.html' - pdf_name = os.path.join('pdf', vgo_num + '.pdf') - - if not os.path.isfile(pdf_name): - print(vgo_num +' - '+ vgo_name) - - - media_num = 0 - mediagallery_tot = '' - media_tot = len(vgo['media']) - - sizes = "622px" if media_tot == 1 else "311px" - style = [ - ["inset: auto; width: 100%; height: 100%;"], - ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"], - ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"], - ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"] - ] + for vgo in all_vgos: + vgo_num = html2text.html2text(vgo['content']).split(' ')[0] + vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] + #print(vgo_num +' - '+ vgo_name) + #print(str(vgo['id']) +' '+ vgo['uri']) + vgo_dict[vgo_num] = vgo_name for media in vgo['media']: - mediagallery = html_mediagallery - ext = os.path.splitext(media['url'])[1] + #print(str(media['id']) +' '+ media['url']) + + ext = os.path.splitext(media['preview_url'])[1] img_name = os.path.join('media',str(media['id']) + ext) - mediagallery = mediagallery.replace("[media]", img_name) - mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num]) - mediagallery = mediagallery.replace("[sizes]", sizes) - mediagallery_tot = mediagallery_tot + mediagallery - media_num = media_num + 1 + + if not os.path.isfile(img_name): + print(img_name) + img_data = requests.get(media['preview_url']).content + with open(img_name, 'wb') as handler: + handler.write(img_data) - content = html_base - content = content.replace("[content]", vgo['content']) - content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M")) - content = content.replace("[reply]", str(vgo['replies_count'])) - content = content.replace("[reblogs]", str(vgo['reblogs'])) - content = content.replace("[favourites]", str(vgo['favourites'])) - content = content.replace("[mediagallery]", mediagallery_tot) + with open('template.html') as html_file: + html_base = html_file.read() + with open('mediagallery.html') as html_file: + html_mediagallery = html_file.read() - with open(html_name, 'w') as handler: - handler.write(content) + + # Genera i PDF + + print("Genero i PDF") + os.makedirs('pdf', exist_ok=True) + for vgo in all_vgos: + vgo_num = html2text.html2text(vgo['content']).split(' ')[0] + vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] + + html_name = 'oloturia.html' + pdf_name = os.path.join('pdf', vgo_num + '.pdf') + + if not os.path.isfile(pdf_name): + print(vgo_num +' - '+ vgo_name) + + + media_num = 0 + mediagallery_tot = '' + media_tot = len(vgo['media']) + + sizes = "622px" if media_tot == 1 else "311px" + style = [ + ["inset: auto; width: 100%; height: 100%;"], + ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"], + ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"], + ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"] + ] + + for media in vgo['media']: + mediagallery = html_mediagallery + ext = os.path.splitext(media['url'])[1] + img_name = os.path.join('media',str(media['id']) + ext) + mediagallery = mediagallery.replace("[media]", img_name) + mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num]) + mediagallery = mediagallery.replace("[sizes]", sizes) + mediagallery_tot = mediagallery_tot + mediagallery + media_num = media_num + 1 + + content = html_base + content = content.replace("[content]", vgo['content']) + content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M")) + content = content.replace("[reply]", str(vgo['replies_count'])) + content = content.replace("[reblogs]", str(vgo['reblogs'])) + content = content.replace("[favourites]", str(vgo['favourites'])) + content = content.replace("[mediagallery]", mediagallery_tot) + + with open(html_name, 'w') as handler: + handler.write(content) + + options = { + 'page-size': 'A5', + 'margin-top': '0.5cm', + 'margin-right': '0.5cm', + 'margin-bottom': '0.5cm', + 'margin-left': '0.5cm', + 'encoding': "UTF-8", + 'quiet': '' + } + + try: + pdfkit.from_file(html_name, pdf_name, options=options) + except: + pass + + os.remove(html_name) + + + # Genera i libretti + + print("Genero i libretti") + os.makedirs('books', exist_ok=True) + for book_num in range(1, int(len(vgo_dict) / 50) + 1): + pdfWriter = PyPDF2.PdfFileWriter() + print(book_num) + + pagstart = (book_num - 1) * 50 + 1 + pagend = book_num * 50 + + # aggiungere copertina + pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3))) + + for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]: + pdf_name = os.path.join('pdf', vgo_num + '.pdf') - options = { - 'page-size': 'A5', - 'margin-top': '0.5cm', - 'margin-right': '0.5cm', - 'margin-bottom': '0.5cm', - 'margin-left': '0.5cm', - 'encoding': "UTF-8", - 'quiet': '' - } - try: - pdfkit.from_file(html_name, pdf_name, options=options) + #print(vgo_num + " - " + vgo_dict[vgo_num]) + pdfFileObj = open(pdf_name, 'rb') + pdfReader = PyPDF2.PdfFileReader(pdfFileObj) + pageObj = pdfReader.getPage(0) + pdfWriter.addPage(pageObj) except: pass - - os.remove(html_name) - - -# Genera i libretti - -print("Genero i libretti") -os.makedirs('books', exist_ok=True) -for book_num in range(1, int(len(vgo_dict) / 50) + 1): - pdfWriter = PyPDF2.PdfFileWriter() - print(book_num) - - pagstart = (book_num - 1) * 50 + 1 - pagend = book_num * 50 - - # aggiungere copertina - pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3))) - - for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]: - pdf_name = os.path.join('pdf', vgo_num + '.pdf') - try: - #print(vgo_num + " - " + vgo_dict[vgo_num]) - pdfFileObj = open(pdf_name, 'rb') - pdfReader = PyPDF2.PdfFileReader(pdfFileObj) - pageObj = pdfReader.getPage(0) - pdfWriter.addPage(pageObj) - except: - pass - - # aggiungere indice ed eventualmente pagina finale - - book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf') - with open(book_name, 'wb') as pdfOutput: - pdfWriter.write(pdfOutput) + # aggiungere indice ed eventualmente pagina finale + + book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf') + with open(book_name, 'wb') as pdfOutput: + pdfWriter.write(pdfOutput) +if __name__ == "__main__": + main()