#!/usr/bin/env python3 from mastodon import Mastodon import json import datetime import os.path from reportlab.lib import pagesizes import requests import html2text import pdfkit import locale import PyPDF2 locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8') def copertina(text): from PyPDF2 import PdfFileWriter, PdfFileReader import io from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase.pdfmetrics import stringWidth from reportlab.rl_config import defaultPageSize FONT = 'Roboto' SIZE = 48 packet = io.BytesIO() # create a new PDF with Reportlab pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf")) can = canvas.Canvas(packet, pagesize=A4) can.setFont(FONT, SIZE) PAGE_WIDTH = defaultPageSize[0] #PAGE_HEIGHT = defaultPageSize[1] text_width = stringWidth(text,FONT, SIZE) can.drawString((PAGE_WIDTH - text_width) / 2, 150, text) can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(open("copertina.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) return(page) # Scarica tutti i post da Mastodon print("Scarico i post") def default(o): if isinstance(o, (datetime.date, datetime.datetime)): return o.isoformat() if not os.path.isfile('oloturiadump.json'): mastodon = Mastodon(api_base_url = "https://mastodon.bida.im") all_vgos = [] last_id = None while True: statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id))) if not statuses: break all_vgos += list(map( lambda s: { 'id': s['id'], 'uri': s['uri'], 'content': s['content'], 'replies_count': s['replies_count'], #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [], 'created': s['created_at'], 'reblogs': s['reblogs_count'], 'favourites': s['favourites_count'], 'media': s['media_attachments'] } , statuses)) last_id = statuses[-1]['id'] #print(all_vgos) #print(json.dumps(all_vgos, default=default)) with open('oloturiadump.json', 'w') as json_file: json.dump(all_vgos, json_file, indent=4, default=default) # Scarica tutte le immagini print("Scarico le immagini") with open('oloturiadump.json') as json_file: all_vgos = json.load(json_file) os.makedirs('media', exist_ok=True) vgo_dict={} for vgo in all_vgos: vgo_num = html2text.html2text(vgo['content']).split(' ')[0] vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] #print(vgo_num +' - '+ vgo_name) #print(str(vgo['id']) +' '+ vgo['uri']) vgo_dict[vgo_num] = vgo_name for media in vgo['media']: #print(str(media['id']) +' '+ media['url']) ext = os.path.splitext(media['preview_url'])[1] img_name = os.path.join('media',str(media['id']) + ext) if not os.path.isfile(img_name): print(img_name) img_data = requests.get(media['preview_url']).content with open(img_name, 'wb') as handler: handler.write(img_data) with open('template.html') as html_file: html_base = html_file.read() with open('mediagallery.html') as html_file: html_mediagallery = html_file.read() # Genera i PDF print("Genero i PDF") os.makedirs('pdf', exist_ok=True) for vgo in all_vgos: vgo_num = html2text.html2text(vgo['content']).split(' ')[0] vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1] html_name = 'oloturia.html' pdf_name = os.path.join('pdf', vgo_num + '.pdf') if not os.path.isfile(pdf_name): print(vgo_num +' - '+ vgo_name) media_num = 0 mediagallery_tot = '' media_tot = len(vgo['media']) sizes = "622px" if media_tot == 1 else "311px" style = [ ["inset: auto; width: 100%; height: 100%;"], ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"], ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"], ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"] ] for media in vgo['media']: mediagallery = html_mediagallery ext = os.path.splitext(media['url'])[1] img_name = os.path.join('media',str(media['id']) + ext) mediagallery = mediagallery.replace("[media]", img_name) mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num]) mediagallery = mediagallery.replace("[sizes]", sizes) mediagallery_tot = mediagallery_tot + mediagallery media_num = media_num + 1 content = html_base content = content.replace("[content]", vgo['content']) content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M")) content = content.replace("[reply]", str(vgo['replies_count'])) content = content.replace("[reblogs]", str(vgo['reblogs'])) content = content.replace("[favourites]", str(vgo['favourites'])) content = content.replace("[mediagallery]", mediagallery_tot) with open(html_name, 'w') as handler: handler.write(content) options = { 'page-size': 'A5', 'margin-top': '0.5cm', 'margin-right': '0.5cm', 'margin-bottom': '0.5cm', 'margin-left': '0.5cm', 'encoding': "UTF-8", 'quiet': '' } try: pdfkit.from_file(html_name, pdf_name, options=options) except: pass os.remove(html_name) # Genera i libretti print("Genero i libretti") os.makedirs('books', exist_ok=True) for book_num in range(1, int(len(vgo_dict) / 50) + 1): pdfWriter = PyPDF2.PdfFileWriter() print(book_num) pagstart = (book_num - 1) * 50 + 1 pagend = book_num * 50 # aggiungere copertina pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3))) for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]: pdf_name = os.path.join('pdf', vgo_num + '.pdf') try: #print(vgo_num + " - " + vgo_dict[vgo_num]) pdfFileObj = open(pdf_name, 'rb') pdfReader = PyPDF2.PdfFileReader(pdfFileObj) pageObj = pdfReader.getPage(0) pdfWriter.addPage(pageObj) except: pass # aggiungere indice ed eventualmente pagina finale book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf') with open(book_name, 'wb') as pdfOutput: pdfWriter.write(pdfOutput)