itec
/
oloturia2pdf


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
							#!/usr/bin/env python3

from mastodon import Mastodon
import json
import datetime
import os.path
import requests
import html2text
import pdfkit
import locale
import PyPDF2

locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')


# Scarica tutti i post da Mastodon

print("Scarico i post")

def default(o):
    if isinstance(o, (datetime.date, datetime.datetime)):
        return o.isoformat()

if not os.path.isfile('oloturiadump.json'):
    mastodon = Mastodon(api_base_url = "https://mastodon.bida.im")
    all_vgos = []
    last_id = None

    while True:
        statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id)))
        if not statuses:
            break
        all_vgos += list(map(
            lambda s: {
                'id': s['id'],
                'uri': s['uri'],
                'content': s['content'],
                'replies_count': s['replies_count'],
                #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0  else [],
                'created': s['created_at'],
                'reblogs': s['reblogs_count'],
                'favourites': s['favourites_count'],
                'media': s['media_attachments']
            }        
            , statuses))
        last_id = statuses[-1]['id']  

    #print(all_vgos)
    #print(json.dumps(all_vgos, default=default))

    with open('oloturiadump.json', 'w') as json_file:
        json.dump(all_vgos, json_file, indent=4, default=default)


# Scarica tutte le immagini

print("Scarico le immagini")
with open('oloturiadump.json') as json_file:
    all_vgos = json.load(json_file)
    os.makedirs('media', exist_ok=True)

    vgo_dict={}

    for vgo in all_vgos:
        vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
        vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
        #print(vgo_num +' - '+ vgo_name)
        #print(str(vgo['id']) +' '+ vgo['uri'])
        vgo_dict[vgo_num] = vgo_name

        for media in vgo['media']:
            #print(str(media['id']) +' '+ media['url'])

            ext = os.path.splitext(media['preview_url'])[1]
            img_name = os.path.join('media',str(media['id']) + ext)
            
            if not os.path.isfile(img_name):
                print(img_name)
                img_data = requests.get(media['preview_url']).content
                with open(img_name, 'wb') as handler:
                    handler.write(img_data)

    with open('template.html') as html_file:
        html_base = html_file.read()
    with open('mediagallery.html') as html_file:
        html_mediagallery = html_file.read()


    # Genera i PDF

    print("Genero i PDF")
    os.makedirs('pdf', exist_ok=True)
    for vgo in all_vgos:
        vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
        vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]

        html_name = 'oloturia.html'
        pdf_name = os.path.join('pdf', vgo_num + '.pdf')

        if not os.path.isfile(pdf_name): 
            print(vgo_num +' - '+ vgo_name)
            

            media_num = 0
            mediagallery_tot = ''
            media_tot = len(vgo['media'])

            sizes = "622px" if media_tot == 1 else "311px"
            style = [
                ["inset: auto; width: 100%; height: 100%;"],
                ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"],
                ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"],
                ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"]
            ]

            for media in vgo['media']:
                mediagallery = html_mediagallery
                ext = os.path.splitext(media['url'])[1]
                img_name = os.path.join('media',str(media['id']) + ext)
                mediagallery = mediagallery.replace("[media]", img_name)
                mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num])
                mediagallery = mediagallery.replace("[sizes]", sizes)
                mediagallery_tot = mediagallery_tot + mediagallery
                media_num = media_num + 1

            content = html_base        
            content = content.replace("[content]", vgo['content'])
            content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M"))
            content = content.replace("[reply]", str(vgo['replies_count']))
            content = content.replace("[reblogs]", str(vgo['reblogs']))
            content = content.replace("[favourites]", str(vgo['favourites']))
            content = content.replace("[mediagallery]", mediagallery_tot)

            with open(html_name, 'w') as handler:
                handler.write(content)
            
            options = {
                'page-size': 'A5',
                'margin-top': '0.5cm',
                'margin-right': '0.5cm',
                'margin-bottom': '0.5cm',
                'margin-left': '0.5cm',
                'encoding': "UTF-8",
                'quiet': ''
                }

            try:
                pdfkit.from_file(html_name, pdf_name, options=options)
            except:
                pass

            os.remove(html_name)


# Genera i libretti

print("Genero i libretti")
os.makedirs('books', exist_ok=True)
for book_num in range(1, int(len(vgo_dict) / 50) + 2):
    pdfWriter = PyPDF2.PdfFileWriter()
    print(book_num)

    # aggiungere copertina

    for vgo_num in [str(x).zfill(3) for x in range((book_num - 1) * 50 + 1, book_num * 50 + 1)]:
        pdf_name = os.path.join('pdf', vgo_num + '.pdf')
        
        try:
            #print(vgo_num + " - " + vgo_dict[vgo_num])
            pdfFileObj = open(pdf_name, 'rb')
            pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
            pageObj = pdfReader.getPage(0)
            pdfWriter.addPage(pageObj)
        except:
            pass
    
    # aggiungere indice ed eventualmente pagina finale
    
    book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf')
    with open(book_name, 'wb') as pdfOutput:
        pdfWriter.write(pdfOutput)