#!/usr/bin/env python3 from mastodon import Mastodon import json import datetime import os.path from reportlab.lib import pagesizes import requests import html2text import pdfkit import locale import PyPDF2 locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8') def copertina(text): from PyPDF2 import PdfFileWriter, PdfFileReader import io from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A5 from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase.pdfmetrics import stringWidth FONT = 'Roboto' SIZE = 36 packet = io.BytesIO() # create a new PDF with Reportlab pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf")) can = canvas.Canvas(packet, pagesize=A5) can.setFont(FONT, SIZE) PAGE_WIDTH = A5[0] text_width = stringWidth(text,FONT, SIZE) can.drawString((PAGE_WIDTH - text_width) / 2, 100, text) can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(open("copertina.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) return(page) def indice(text): # PDF GENERATION LIBRARIES # import the report lab PDF generation tools from reportlab.lib.pagesizes import letter from reportlab.lib.styles import ParagraphStyle from reportlab.lib.units import inch from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib.pagesizes import A5 import io from PyPDF2 import PdfFileWriter, PdfFileReader parts = [] pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf")) style = ParagraphStyle( name='Normal', fontName='Roboto', fontSize=12, leading = 14, leftIndent=32, firstLineIndent=-32, spaceBefore=5 ) for ro in text.splitlines(): ro = ro.replace(' ',' ') ro = ro.replace('\t','    ') parts.append(Paragraph(ro, style = style)) packet = io.BytesIO() doc = SimpleDocTemplate(packet, pagesize=A5, rightMargin=20, leftMargin=20, topMargin=40, bottomMargin=30) doc.build(parts) pdfReader = PdfFileReader(packet) pdfWriter = PdfFileWriter() for page in pdfReader.pages: pdfWriter.addPage(page) pdfWriter.addBlankPage() return([pdfWriter.getPage(0),pdfWriter.getPage(1)]) def main(): # Scarica tutti i post da Mastodon print("Scarico i post") def default(o): if isinstance(o, (datetime.date, datetime.datetime)): return o.isoformat() if not os.path.isfile('oloturiadump.json'): mastodon = Mastodon(api_base_url = "https://mastodon.bida.im") all_vgos = [] last_id = None while True: statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id))) if not statuses: break all_vgos += list(map( lambda s: { 'id': s['id'], 'uri': s['uri'], 'content': s['content'], 'replies_count': s['replies_count'], #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [], 'created': s['created_at'], 'reblogs': s['reblogs_count'], 'favourites': s['favourites_count'], 'media': s['media_attachments'] } , statuses)) last_id = statuses[-1]['id'] #print(all_vgos) #print(json.dumps(all_vgos, default=default)) with open('oloturiadump.json', 'w') as json_file: json.dump(all_vgos, json_file, indent=4, default=default) # Scarica tutte le immagini print("Scarico le immagini") with open('oloturiadump.json') as json_file: all_vgos = json.load(json_file) os.makedirs('media', exist_ok=True) vgo_dict={} for vgo in all_vgos: vgo_num = html2text.html2text(vgo['content']).split(' ')[0] vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] if len(vgo_name) < 10: vgo_name = [s for s in html2text.html2text(vgo['content']).split("\n\n") if s][-1].replace("\n"," ") #print(vgo_num +' - '+ vgo_name) #print(str(vgo['id']) +' '+ vgo['uri']) vgo_dict[vgo_num] = vgo_name for media in vgo['media']: #print(str(media['id']) +' '+ media['url']) ext = os.path.splitext(media['preview_url'])[1] img_name = os.path.join('media',str(media['id']) + ext) if not os.path.isfile(img_name): print(img_name) img_data = requests.get(media['preview_url']).content with open(img_name, 'wb') as handler: handler.write(img_data) with open('template.html') as html_file: html_base = html_file.read() with open('mediagallery.html') as html_file: html_mediagallery = html_file.read() # Genera i PDF print("Genero i PDF") os.makedirs('pdf', exist_ok=True) for vgo in all_vgos: vgo_num = html2text.html2text(vgo['content']).split(' ')[0] vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] html_name = 'oloturia.html' pdf_name = os.path.join('pdf', vgo_num + '.pdf') if not os.path.isfile(pdf_name): print(vgo_num +' - '+ vgo_name) media_num = 0 mediagallery_tot = '' media_tot = len(vgo['media']) sizes = "622px" if media_tot == 1 else "311px" style = [ ["inset: auto; width: 100%; height: 100%;"], ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"], ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"], ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"] ] for media in vgo['media']: mediagallery = html_mediagallery ext = os.path.splitext(media['url'])[1] img_name = os.path.join('media',str(media['id']) + ext) mediagallery = mediagallery.replace("[media]", img_name) mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num]) mediagallery = mediagallery.replace("[sizes]", sizes) mediagallery_tot = mediagallery_tot + mediagallery media_num = media_num + 1 content = html_base content = content.replace("[content]", vgo['content']) content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M")) content = content.replace("[reply]", str(vgo['replies_count'])) content = content.replace("[reblogs]", str(vgo['reblogs'])) content = content.replace("[favourites]", str(vgo['favourites'])) content = content.replace("[mediagallery]", mediagallery_tot) with open(html_name, 'w') as handler: handler.write(content) options = { 'page-size': 'A5', 'margin-top': '0.5cm', 'margin-right': '0.5cm', 'margin-bottom': '0.5cm', 'margin-left': '0.5cm', 'encoding': "UTF-8", 'quiet': '' } try: pdfkit.from_file(html_name, pdf_name, options=options) except: pass os.remove(html_name) # Genera i libretti print("Genero i libretti") os.makedirs('books', exist_ok=True) for pagstart in range(1, len(vgo_dict), 50): pdfWriter = PyPDF2.PdfFileWriter() book_num = int(pagstart / 50) + 1 pagend = min(book_num * 50, len(vgo_dict)) print(book_num) # aggiungere copertina pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3))) pdfWriter.addBlankPage() indtext = "" for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]: pdf_name = os.path.join('pdf', vgo_num + '.pdf') try: #print(vgo_num + " - " + vgo_dict[vgo_num]) indtext = indtext + vgo_num + "\t" + vgo_dict[vgo_num] + "\n" pdfFileObj = open(pdf_name, 'rb') pdfReader = PyPDF2.PdfFileReader(pdfFileObj) pageObj = pdfReader.getPage(0) pdfWriter.addPage(pageObj) except: pass for i in range(0, 8 - ((((pagend - 1) % 50) + 1 + 5) % 8)): pdfWriter.addBlankPage() # aggiungere indice for indpag in indice(indtext): pdfWriter.addPage(indpag) #Aggiungere pagina finale #pdfWriter.addBlankPage() pdfFileObj = open("quarta.pdf", 'rb') pdfReader = PyPDF2.PdfFileReader(pdfFileObj) pageObj = pdfReader.getPage(0) pdfWriter.addPage(pageObj) book_name = os.path.join('books', 'vgo_' + str(book_num).zfill(2) + '.pdf') with open(book_name, 'wb') as pdfOutput: pdfWriter.write(pdfOutput) if __name__ == "__main__": main()