123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290 |
- #!/usr/bin/env python3
- from mastodon import Mastodon
- import json
- import datetime
- import os.path
- from reportlab.lib import pagesizes
- import requests
- import html2text
- import pdfkit
- import locale
- import PyPDF2
- locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
- def copertina(text):
- from PyPDF2 import PdfFileWriter, PdfFileReader
- import io
- from reportlab.pdfgen import canvas
- from reportlab.lib.pagesizes import A5
- from reportlab.pdfbase import pdfmetrics
- from reportlab.pdfbase.ttfonts import TTFont
- from reportlab.pdfbase.pdfmetrics import stringWidth
- FONT = 'Roboto'
- SIZE = 36
- packet = io.BytesIO()
- # create a new PDF with Reportlab
- pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))
- can = canvas.Canvas(packet, pagesize=A5)
- can.setFont(FONT, SIZE)
- PAGE_WIDTH = A5[0]
- text_width = stringWidth(text,FONT, SIZE)
- can.drawString((PAGE_WIDTH - text_width) / 2, 100, text)
- can.save()
- #move to the beginning of the StringIO buffer
- packet.seek(0)
- new_pdf = PdfFileReader(packet)
- # read your existing PDF
- existing_pdf = PdfFileReader(open("copertina.pdf", "rb"))
- output = PdfFileWriter()
- # add the "watermark" (which is the new pdf) on the existing page
- page = existing_pdf.getPage(0)
- page.mergePage(new_pdf.getPage(0))
- output.addPage(page)
- return(page)
- def indice(text):
- # PDF GENERATION LIBRARIES
- # import the report lab PDF generation tools
- from reportlab.lib.pagesizes import letter
- from reportlab.lib.styles import ParagraphStyle
- from reportlab.lib.units import inch
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
- from reportlab.pdfbase import pdfmetrics
- from reportlab.pdfbase.ttfonts import TTFont
- from reportlab.lib.pagesizes import A5
- import io
- from PyPDF2 import PdfFileWriter, PdfFileReader
- parts = []
- pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))
- style = ParagraphStyle(
- name='Normal',
- fontName='Roboto',
- fontSize=12,
- leading = 14,
- leftIndent=32,
- firstLineIndent=-32,
- spaceBefore=5
- )
- for ro in text.splitlines():
- ro = ro.replace(' ',' ')
- ro = ro.replace('\t',' ')
- parts.append(Paragraph(ro, style = style))
- packet = io.BytesIO()
- doc = SimpleDocTemplate(packet,
- pagesize=A5,
- rightMargin=20,
- leftMargin=20,
- topMargin=40,
- bottomMargin=30)
- doc.build(parts)
- pdfReader = PdfFileReader(packet)
- return([pdfReader.getPage(0),pdfReader.getPage(1)])
- def main():
- # Scarica tutti i post da Mastodon
- print("Scarico i post")
- def default(o):
- if isinstance(o, (datetime.date, datetime.datetime)):
- return o.isoformat()
- if not os.path.isfile('oloturiadump.json'):
- mastodon = Mastodon(api_base_url = "https://mastodon.bida.im")
- all_vgos = []
- last_id = None
- while True:
- statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id)))
- if not statuses:
- break
- all_vgos += list(map(
- lambda s: {
- 'id': s['id'],
- 'uri': s['uri'],
- 'content': s['content'],
- 'replies_count': s['replies_count'],
- #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [],
- 'created': s['created_at'],
- 'reblogs': s['reblogs_count'],
- 'favourites': s['favourites_count'],
- 'media': s['media_attachments']
- }
- , statuses))
- last_id = statuses[-1]['id']
- #print(all_vgos)
- #print(json.dumps(all_vgos, default=default))
- with open('oloturiadump.json', 'w') as json_file:
- json.dump(all_vgos, json_file, indent=4, default=default)
- # Scarica tutte le immagini
- print("Scarico le immagini")
- with open('oloturiadump.json') as json_file:
- all_vgos = json.load(json_file)
- os.makedirs('media', exist_ok=True)
- vgo_dict={}
- for vgo in all_vgos:
- vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
- vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
- if len(vgo_name) < 10:
- vgo_name = [s for s in html2text.html2text(vgo['content']).split("\n\n") if s][-1].replace("\n"," ")
- #print(vgo_num +' - '+ vgo_name)
- #print(str(vgo['id']) +' '+ vgo['uri'])
- vgo_dict[vgo_num] = vgo_name
- for media in vgo['media']:
- #print(str(media['id']) +' '+ media['url'])
- ext = os.path.splitext(media['preview_url'])[1]
- img_name = os.path.join('media',str(media['id']) + ext)
-
- if not os.path.isfile(img_name):
- print(img_name)
- img_data = requests.get(media['preview_url']).content
- with open(img_name, 'wb') as handler:
- handler.write(img_data)
- with open('template.html') as html_file:
- html_base = html_file.read()
- with open('mediagallery.html') as html_file:
- html_mediagallery = html_file.read()
- # Genera i PDF
- print("Genero i PDF")
- os.makedirs('pdf', exist_ok=True)
- for vgo in all_vgos:
- vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
- vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
- html_name = 'oloturia.html'
- pdf_name = os.path.join('pdf', vgo_num + '.pdf')
- if not os.path.isfile(pdf_name):
- print(vgo_num +' - '+ vgo_name)
-
- media_num = 0
- mediagallery_tot = ''
- media_tot = len(vgo['media'])
- sizes = "622px" if media_tot == 1 else "311px"
- style = [
- ["inset: auto; width: 100%; height: 100%;"],
- ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"],
- ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"],
- ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"]
- ]
- for media in vgo['media']:
- mediagallery = html_mediagallery
- ext = os.path.splitext(media['url'])[1]
- img_name = os.path.join('media',str(media['id']) + ext)
- mediagallery = mediagallery.replace("[media]", img_name)
- mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num])
- mediagallery = mediagallery.replace("[sizes]", sizes)
- mediagallery_tot = mediagallery_tot + mediagallery
- media_num = media_num + 1
- content = html_base
- content = content.replace("[content]", vgo['content'])
- content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M"))
- content = content.replace("[reply]", str(vgo['replies_count']))
- content = content.replace("[reblogs]", str(vgo['reblogs']))
- content = content.replace("[favourites]", str(vgo['favourites']))
- content = content.replace("[mediagallery]", mediagallery_tot)
- with open(html_name, 'w') as handler:
- handler.write(content)
-
- options = {
- 'page-size': 'A5',
- 'margin-top': '0.5cm',
- 'margin-right': '0.5cm',
- 'margin-bottom': '0.5cm',
- 'margin-left': '0.5cm',
- 'encoding': "UTF-8",
- 'quiet': ''
- }
- try:
- pdfkit.from_file(html_name, pdf_name, options=options)
- except:
- pass
- os.remove(html_name)
- # Genera i libretti
- print("Genero i libretti")
- os.makedirs('books', exist_ok=True)
- for book_num in range(1, int(len(vgo_dict) / 50) + 1):
- pdfWriter = PyPDF2.PdfFileWriter()
- print(book_num)
- pagstart = (book_num - 1) * 50 + 1
- pagend = book_num * 50
- # aggiungere copertina
- pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3)))
-
- indtext = ""
- for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]:
- pdf_name = os.path.join('pdf', vgo_num + '.pdf')
-
- try:
- #print(vgo_num + " - " + vgo_dict[vgo_num])
- indtext = indtext + vgo_num + "\t" + vgo_dict[vgo_num] + "\n"
- pdfFileObj = open(pdf_name, 'rb')
- pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
- pageObj = pdfReader.getPage(0)
- pdfWriter.addPage(pageObj)
- except:
- pass
-
- # aggiungere indice ed eventualmente pagina finale
- for indpag in indice(indtext):
- pdfWriter.addPage(indpag)
-
- book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf')
- with open(book_name, 'wb') as pdfOutput:
- pdfWriter.write(pdfOutput)
- if __name__ == "__main__":
- main()
|