itec
/
oloturia2pdf


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
							#!/usr/bin/env python3

from mastodon import Mastodon
import json
import datetime
import os.path
from reportlab.lib import pagesizes
import requests
import html2text
import pdfkit
import locale
import PyPDF2

locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')


def copertina(text):
    from PyPDF2 import PdfFileWriter, PdfFileReader
    import io
    from reportlab.pdfgen import canvas
    from reportlab.lib.pagesizes import A5
    from reportlab.pdfbase import pdfmetrics
    from reportlab.pdfbase.ttfonts import TTFont
    from reportlab.pdfbase.pdfmetrics import stringWidth

    FONT = 'Roboto'
    SIZE = 36

    packet = io.BytesIO()
    # create a new PDF with Reportlab

    pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))

    can = canvas.Canvas(packet, pagesize=A5)
    can.setFont(FONT, SIZE)

    PAGE_WIDTH  = A5[0]

    text_width = stringWidth(text,FONT, SIZE)
    can.drawString((PAGE_WIDTH - text_width) / 2, 100, text)
    can.save()

    #move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    # read your existing PDF
    existing_pdf = PdfFileReader(open("copertina.pdf", "rb"))
    output = PdfFileWriter()
    # add the "watermark" (which is the new pdf) on the existing page
    page = existing_pdf.getPage(0)
    page.mergePage(new_pdf.getPage(0))

    output.addPage(page)

    return(page)


def main():
    # Scarica tutti i post da Mastodon

    print("Scarico i post")

    def default(o):
        if isinstance(o, (datetime.date, datetime.datetime)):
            return o.isoformat()

    if not os.path.isfile('oloturiadump.json'):
        mastodon = Mastodon(api_base_url = "https://mastodon.bida.im")
        all_vgos = []
        last_id = None

        while True:
            statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id)))
            if not statuses:
                break
            all_vgos += list(map(
                lambda s: {
                    'id': s['id'],
                    'uri': s['uri'],
                    'content': s['content'],
                    'replies_count': s['replies_count'],
                    #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0  else [],
                    'created': s['created_at'],
                    'reblogs': s['reblogs_count'],
                    'favourites': s['favourites_count'],
                    'media': s['media_attachments']
                }        
                , statuses))
            last_id = statuses[-1]['id']  

        #print(all_vgos)
        #print(json.dumps(all_vgos, default=default))

        with open('oloturiadump.json', 'w') as json_file:
            json.dump(all_vgos, json_file, indent=4, default=default)


    # Scarica tutte le immagini

    print("Scarico le immagini")
    with open('oloturiadump.json') as json_file:
        all_vgos = json.load(json_file)
        os.makedirs('media', exist_ok=True)

        vgo_dict={}

        for vgo in all_vgos:
            vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
            vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
            #print(vgo_num +' - '+ vgo_name)
            #print(str(vgo['id']) +' '+ vgo['uri'])
            vgo_dict[vgo_num] = vgo_name

            for media in vgo['media']:
                #print(str(media['id']) +' '+ media['url'])

                ext = os.path.splitext(media['preview_url'])[1]
                img_name = os.path.join('media',str(media['id']) + ext)
                
                if not os.path.isfile(img_name):
                    print(img_name)
                    img_data = requests.get(media['preview_url']).content
                    with open(img_name, 'wb') as handler:
                        handler.write(img_data)

        with open('template.html') as html_file:
            html_base = html_file.read()
        with open('mediagallery.html') as html_file:
            html_mediagallery = html_file.read()


        # Genera i PDF

        print("Genero i PDF")
        os.makedirs('pdf', exist_ok=True)
        for vgo in all_vgos:
            vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
            vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]

            html_name = 'oloturia.html'
            pdf_name = os.path.join('pdf', vgo_num + '.pdf')

            if not os.path.isfile(pdf_name): 
                print(vgo_num +' - '+ vgo_name)
                

                media_num = 0
                mediagallery_tot = ''
                media_tot = len(vgo['media'])

                sizes = "622px" if media_tot == 1 else "311px"
                style = [
                    ["inset: auto; width: 100%; height: 100%;"],
                    ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"],
                    ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"],
                    ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"]
                ]

                for media in vgo['media']:
                    mediagallery = html_mediagallery
                    ext = os.path.splitext(media['url'])[1]
                    img_name = os.path.join('media',str(media['id']) + ext)
                    mediagallery = mediagallery.replace("[media]", img_name)
                    mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num])
                    mediagallery = mediagallery.replace("[sizes]", sizes)
                    mediagallery_tot = mediagallery_tot + mediagallery
                    media_num = media_num + 1

                content = html_base        
                content = content.replace("[content]", vgo['content'])
                content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M"))
                content = content.replace("[reply]", str(vgo['replies_count']))
                content = content.replace("[reblogs]", str(vgo['reblogs']))
                content = content.replace("[favourites]", str(vgo['favourites']))
                content = content.replace("[mediagallery]", mediagallery_tot)

                with open(html_name, 'w') as handler:
                    handler.write(content)
                
                options = {
                    'page-size': 'A5',
                    'margin-top': '0.5cm',
                    'margin-right': '0.5cm',
                    'margin-bottom': '0.5cm',
                    'margin-left': '0.5cm',
                    'encoding': "UTF-8",
                    'quiet': ''
                    }

                try:
                    pdfkit.from_file(html_name, pdf_name, options=options)
                except:
                    pass

                os.remove(html_name)


    # Genera i libretti

    print("Genero i libretti")
    os.makedirs('books', exist_ok=True)
    for book_num in range(1, int(len(vgo_dict) / 50) + 1):
        pdfWriter = PyPDF2.PdfFileWriter()
        print(book_num)

        pagstart = (book_num - 1) * 50 + 1
        pagend = book_num * 50

        # aggiungere copertina
        pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3)))

        for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]:
            pdf_name = os.path.join('pdf', vgo_num + '.pdf')
            
            try:
                #print(vgo_num + " - " + vgo_dict[vgo_num])
                pdfFileObj = open(pdf_name, 'rb')
                pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
                pageObj = pdfReader.getPage(0)
                pdfWriter.addPage(pageObj)
            except:
                pass
        
        # aggiungere indice ed eventualmente pagina finale
        
        book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf')
        with open(book_name, 'wb') as pdfOutput:
            pdfWriter.write(pdfOutput)


if __name__ == "__main__":
    main()