2021-08-17 10:45:53 +02:00
#!/usr/bin/env python3
from mastodon import Mastodon
import json
import datetime
import os . path
2021-08-19 10:33:31 +02:00
from reportlab . lib import pagesizes
2021-08-17 10:45:53 +02:00
import requests
import html2text
import pdfkit
import locale
import PyPDF2
2023-03-27 22:43:36 +02:00
import html
2021-08-17 10:45:53 +02:00
locale . setlocale ( locale . LC_TIME , ' it_IT.UTF-8 ' )
2021-08-19 10:33:31 +02:00
def copertina ( text ) :
from PyPDF2 import PdfFileWriter , PdfFileReader
import io
from reportlab . pdfgen import canvas
2021-08-19 10:58:31 +02:00
from reportlab . lib . pagesizes import A5
2021-08-19 10:33:31 +02:00
from reportlab . pdfbase import pdfmetrics
from reportlab . pdfbase . ttfonts import TTFont
from reportlab . pdfbase . pdfmetrics import stringWidth
FONT = ' Roboto '
2021-08-19 10:58:31 +02:00
SIZE = 36
2021-08-19 10:33:31 +02:00
packet = io . BytesIO ( )
# create a new PDF with Reportlab
pdfmetrics . registerFont ( TTFont ( " Roboto " , " template/roboto-regular-webfont.ttf " ) )
2021-08-19 10:58:31 +02:00
can = canvas . Canvas ( packet , pagesize = A5 )
2021-08-19 10:33:31 +02:00
can . setFont ( FONT , SIZE )
2021-08-19 10:58:31 +02:00
PAGE_WIDTH = A5 [ 0 ]
2021-08-19 10:33:31 +02:00
text_width = stringWidth ( text , FONT , SIZE )
2021-08-19 10:58:31 +02:00
can . drawString ( ( PAGE_WIDTH - text_width ) / 2 , 100 , text )
2021-08-19 10:33:31 +02:00
can . save ( )
#move to the beginning of the StringIO buffer
packet . seek ( 0 )
new_pdf = PdfFileReader ( packet )
# read your existing PDF
existing_pdf = PdfFileReader ( open ( " copertina.pdf " , " rb " ) )
output = PdfFileWriter ( )
# add the "watermark" (which is the new pdf) on the existing page
page = existing_pdf . getPage ( 0 )
page . mergePage ( new_pdf . getPage ( 0 ) )
output . addPage ( page )
return ( page )
2021-08-19 13:40:39 +02:00
def indice ( text ) :
# PDF GENERATION LIBRARIES
# import the report lab PDF generation tools
from reportlab . lib . pagesizes import letter
from reportlab . lib . styles import ParagraphStyle
from reportlab . lib . units import inch
from reportlab . platypus import SimpleDocTemplate , Paragraph , Spacer , PageBreak
from reportlab . pdfbase import pdfmetrics
from reportlab . pdfbase . ttfonts import TTFont
from reportlab . lib . pagesizes import A5
import io
from PyPDF2 import PdfFileWriter , PdfFileReader
parts = [ ]
pdfmetrics . registerFont ( TTFont ( " Roboto " , " template/roboto-regular-webfont.ttf " ) )
style = ParagraphStyle (
name = ' Normal ' ,
fontName = ' Roboto ' ,
fontSize = 12 ,
leading = 14 ,
leftIndent = 32 ,
firstLineIndent = - 32 ,
spaceBefore = 5
)
for ro in text . splitlines ( ) :
ro = ro . replace ( ' ' , ' ' )
ro = ro . replace ( ' \t ' , ' ' )
parts . append ( Paragraph ( ro , style = style ) )
packet = io . BytesIO ( )
doc = SimpleDocTemplate ( packet ,
pagesize = A5 ,
rightMargin = 20 ,
leftMargin = 20 ,
topMargin = 40 ,
bottomMargin = 30 )
doc . build ( parts )
pdfReader = PdfFileReader ( packet )
2021-09-06 22:06:51 +02:00
pdfWriter = PdfFileWriter ( )
for page in pdfReader . pages :
pdfWriter . addPage ( page )
pdfWriter . addBlankPage ( )
return ( [ pdfWriter . getPage ( 0 ) , pdfWriter . getPage ( 1 ) ] )
2021-08-19 13:40:39 +02:00
2021-08-19 10:33:31 +02:00
2021-08-19 10:58:31 +02:00
def main ( ) :
# Scarica tutti i post da Mastodon
2021-08-19 10:33:31 +02:00
2021-08-19 10:58:31 +02:00
print ( " Scarico i post " )
2021-08-17 10:45:53 +02:00
2021-08-19 10:58:31 +02:00
def default ( o ) :
if isinstance ( o , ( datetime . date , datetime . datetime ) ) :
return o . isoformat ( )
2021-08-17 10:45:53 +02:00
2021-08-19 10:58:31 +02:00
if not os . path . isfile ( ' oloturiadump.json ' ) :
mastodon = Mastodon ( api_base_url = " https://mastodon.bida.im " )
all_vgos = [ ]
last_id = None
2021-08-17 10:45:53 +02:00
2021-08-19 10:58:31 +02:00
while True :
statuses = list ( filter ( lambda s : s [ ' account ' ] [ ' username ' ] == ' oloturia ' , mastodon . timeline_hashtag ( " vgo " , local = True , max_id = last_id ) ) )
if not statuses :
break
all_vgos + = list ( map (
lambda s : {
' id ' : s [ ' id ' ] ,
' uri ' : s [ ' uri ' ] ,
' content ' : s [ ' content ' ] ,
' replies_count ' : s [ ' replies_count ' ] ,
#'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [],
' created ' : s [ ' created_at ' ] ,
' reblogs ' : s [ ' reblogs_count ' ] ,
' favourites ' : s [ ' favourites_count ' ] ,
' media ' : s [ ' media_attachments ' ]
}
, statuses ) )
last_id = statuses [ - 1 ] [ ' id ' ]
2021-08-19 02:45:01 +02:00
2021-08-19 10:58:31 +02:00
#print(all_vgos)
#print(json.dumps(all_vgos, default=default))
2021-08-17 10:45:53 +02:00
2021-08-19 10:58:31 +02:00
with open ( ' oloturiadump.json ' , ' w ' ) as json_file :
json . dump ( all_vgos , json_file , indent = 4 , default = default )
2021-08-17 10:45:53 +02:00
2021-08-19 10:58:31 +02:00
# Scarica tutte le immagini
2021-08-17 10:45:53 +02:00
2021-08-19 10:58:31 +02:00
print ( " Scarico le immagini " )
with open ( ' oloturiadump.json ' ) as json_file :
all_vgos = json . load ( json_file )
os . makedirs ( ' media ' , exist_ok = True )
2021-08-17 10:45:53 +02:00
2021-08-19 10:58:31 +02:00
vgo_dict = { }
2021-08-17 10:45:53 +02:00
2021-08-19 10:58:31 +02:00
for vgo in all_vgos :
vgo_num = html2text . html2text ( vgo [ ' content ' ] ) . split ( ' ' ) [ 0 ]
2023-03-27 22:43:36 +02:00
# vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
# if len(vgo_name) < 10:
# vgo_name = [s for s in html2text.html2text(vgo['content']).split("\n\n") if s][-1].replace("\n"," ")
vgo_name = vgo [ ' content ' ] . split ( " <p> " ) [ - 1 ] . replace ( " </p> " , " " )
vgo_name = vgo_name . split ( " <br /> " ) [ - 1 ]
vgo_name = vgo_name . split ( " </a> " ) [ - 1 ]
vgo_name = html . unescape ( vgo_name ) . strip ( )
2021-08-19 10:58:31 +02:00
#print(vgo_num +' - '+ vgo_name)
#print(str(vgo['id']) +' '+ vgo['uri'])
vgo_dict [ vgo_num ] = vgo_name
2021-08-17 10:45:53 +02:00
for media in vgo [ ' media ' ] :
2021-08-19 10:58:31 +02:00
#print(str(media['id']) +' '+ media['url'])
ext = os . path . splitext ( media [ ' preview_url ' ] ) [ 1 ]
2021-08-17 10:45:53 +02:00
img_name = os . path . join ( ' media ' , str ( media [ ' id ' ] ) + ext )
2021-08-19 10:58:31 +02:00
if not os . path . isfile ( img_name ) :
print ( img_name )
img_data = requests . get ( media [ ' preview_url ' ] ) . content
with open ( img_name , ' wb ' ) as handler :
handler . write ( img_data )
with open ( ' template.html ' ) as html_file :
html_base = html_file . read ( )
with open ( ' mediagallery.html ' ) as html_file :
html_mediagallery = html_file . read ( )
# Genera i PDF
print ( " Genero i PDF " )
os . makedirs ( ' pdf ' , exist_ok = True )
for vgo in all_vgos :
vgo_num = html2text . html2text ( vgo [ ' content ' ] ) . split ( ' ' ) [ 0 ]
vgo_name = os . linesep . join ( [ s for s in html2text . html2text ( vgo [ ' content ' ] ) . splitlines ( ) if s ] ) . splitlines ( ) [ - 1 ]
html_name = ' oloturia.html '
pdf_name = os . path . join ( ' pdf ' , vgo_num + ' .pdf ' )
if not os . path . isfile ( pdf_name ) :
print ( vgo_num + ' - ' + vgo_name )
media_num = 0
mediagallery_tot = ' '
media_tot = len ( vgo [ ' media ' ] )
sizes = " 622px " if media_tot == 1 else " 311px "
style = [
[ " inset: auto; width: 100 % ; height: 100 % ; " ] ,
[ " inset: auto 2px auto auto; width: 50 % ; height: 100 % ; " , " inset: auto auto auto 2px; width: 50 % ; height: 100 % ; " ] ,
[ " inset: auto 2px auto auto; width: 50 % ; height: 100 % ; " , " inset: auto auto 2px 2px; width: 50 % ; height: 50 % ; " , " inset: 2px auto auto 2px; width: 50 % ; height: 50 % ; " ] ,
[ " inset: auto 2px 2px auto; width: 50 % ; height: 50 % ; " , " inset: auto auto 2px 2px; width: 50 % ; height: 50 % ; " , " inset: 2px 2px auto auto; width: 50 % ; height: 50 % ; " , " inset: 2px auto auto 2px; width: 50 % ; height: 50 % ; " ]
]
for media in vgo [ ' media ' ] :
mediagallery = html_mediagallery
ext = os . path . splitext ( media [ ' url ' ] ) [ 1 ]
img_name = os . path . join ( ' media ' , str ( media [ ' id ' ] ) + ext )
mediagallery = mediagallery . replace ( " [media] " , img_name )
mediagallery = mediagallery . replace ( " [style] " , style [ media_tot - 1 ] [ media_num ] )
mediagallery = mediagallery . replace ( " [sizes] " , sizes )
mediagallery_tot = mediagallery_tot + mediagallery
media_num = media_num + 1
content = html_base
content = content . replace ( " [content] " , vgo [ ' content ' ] )
content = content . replace ( " [date] " , datetime . datetime . fromisoformat ( vgo [ ' created ' ] ) . strftime ( " %-d % B % Y, % H: % M " ) )
content = content . replace ( " [reply] " , str ( vgo [ ' replies_count ' ] ) )
content = content . replace ( " [reblogs] " , str ( vgo [ ' reblogs ' ] ) )
content = content . replace ( " [favourites] " , str ( vgo [ ' favourites ' ] ) )
content = content . replace ( " [mediagallery] " , mediagallery_tot )
with open ( html_name , ' w ' ) as handler :
handler . write ( content )
options = {
2022-08-25 11:21:37 +02:00
' enable-local-file-access ' : None ,
2021-08-19 10:58:31 +02:00
' page-size ' : ' A5 ' ,
' margin-top ' : ' 0.5cm ' ,
' margin-right ' : ' 0.5cm ' ,
' margin-bottom ' : ' 0.5cm ' ,
' margin-left ' : ' 0.5cm ' ,
' encoding ' : " UTF-8 " ,
' quiet ' : ' '
}
try :
pdfkit . from_file ( html_name , pdf_name , options = options )
except :
pass
os . remove ( html_name )
# Genera i libretti
print ( " Genero i libretti " )
os . makedirs ( ' books ' , exist_ok = True )
2021-09-06 22:06:51 +02:00
for pagstart in range ( 1 , len ( vgo_dict ) , 50 ) :
book_num = int ( pagstart / 50 ) + 1
pagend = min ( book_num * 50 , len ( vgo_dict ) )
2021-08-19 13:40:39 +02:00
2022-08-25 11:20:34 +02:00
book_name = os . path . join ( ' books ' , ' vgo_ ' + str ( book_num ) . zfill ( 2 ) + ' .pdf ' )
if not os . path . isfile ( book_name ) :
pdfWriter = PyPDF2 . PdfFileWriter ( )
print ( book_num )
2021-08-19 13:45:18 +02:00
2022-08-25 11:20:34 +02:00
# aggiungere copertina
pdfWriter . addPage ( copertina ( str ( pagstart ) . zfill ( 3 ) + " - " + str ( pagend ) . zfill ( 3 ) ) )
2021-08-17 10:45:53 +02:00
2021-09-06 22:06:51 +02:00
pdfWriter . addBlankPage ( )
2021-08-19 13:45:18 +02:00
2022-08-25 11:20:34 +02:00
indtext = " "
for vgo_num in [ str ( x ) . zfill ( 3 ) for x in range ( pagstart , pagend + 1 ) ] :
pdf_name = os . path . join ( ' pdf ' , vgo_num + ' .pdf ' )
try :
#print(vgo_num + " - " + vgo_dict[vgo_num])
indtext = indtext + vgo_num + " \t " + vgo_dict [ vgo_num ] + " \n "
pdfFileObj = open ( pdf_name , ' rb ' )
pdfReader = PyPDF2 . PdfFileReader ( pdfFileObj )
pageObj = pdfReader . getPage ( 0 )
pdfWriter . addPage ( pageObj )
except :
pass
for i in range ( 0 , 8 - ( ( ( ( pagend - 1 ) % 50 ) + 1 + 5 ) % 8 ) ) :
pdfWriter . addBlankPage ( )
# aggiungere indice
for indpag in indice ( indtext ) :
pdfWriter . addPage ( indpag )
#Aggiungere pagina finale
#pdfWriter.addBlankPage()
pdfFileObj = open ( " quarta.pdf " , ' rb ' )
pdfReader = PyPDF2 . PdfFileReader ( pdfFileObj )
pageObj = pdfReader . getPage ( 0 )
pdfWriter . addPage ( pageObj )
with open ( book_name , ' wb ' ) as pdfOutput :
pdfWriter . write ( pdfOutput )
# # Genera indice
# with open("index.txt", 'w') as handler:
# for key in sorted(vgo_dict):
# handler.write(key + ' - ' + vgo_dict[key] + '\n')
2021-08-19 10:33:31 +02:00
2021-08-19 10:58:31 +02:00
if __name__ == " __main__ " :
main ( )
2021-08-19 10:33:31 +02:00