special books
This commit is contained in:
parent
3929dcdebf
commit
62318db285
1 changed files with 197 additions and 0 deletions
197
oloturia2special.py
Executable file
197
oloturia2special.py
Executable file
|
@ -0,0 +1,197 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os.path
|
||||||
|
import html2text
|
||||||
|
import locale
|
||||||
|
import PyPDF2
|
||||||
|
import html
|
||||||
|
|
||||||
|
locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def copertina(text):
|
||||||
|
from PyPDF2 import PdfFileWriter, PdfFileReader
|
||||||
|
import io
|
||||||
|
from reportlab.pdfgen import canvas
|
||||||
|
from reportlab.lib.pagesizes import A5
|
||||||
|
from reportlab.pdfbase import pdfmetrics
|
||||||
|
from reportlab.pdfbase.ttfonts import TTFont
|
||||||
|
from reportlab.pdfbase.pdfmetrics import stringWidth
|
||||||
|
|
||||||
|
FONT = 'Roboto'
|
||||||
|
SIZE = 36
|
||||||
|
|
||||||
|
packet = io.BytesIO()
|
||||||
|
# create a new PDF with Reportlab
|
||||||
|
|
||||||
|
pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))
|
||||||
|
|
||||||
|
can = canvas.Canvas(packet, pagesize=A5)
|
||||||
|
can.setFont(FONT, SIZE)
|
||||||
|
|
||||||
|
PAGE_WIDTH = A5[0]
|
||||||
|
|
||||||
|
text_width = stringWidth(text,FONT, SIZE)
|
||||||
|
can.drawString((PAGE_WIDTH - text_width) / 2, 100, text)
|
||||||
|
can.save()
|
||||||
|
|
||||||
|
#move to the beginning of the StringIO buffer
|
||||||
|
packet.seek(0)
|
||||||
|
new_pdf = PdfFileReader(packet)
|
||||||
|
# read your existing PDF
|
||||||
|
existing_pdf = PdfFileReader(open("copertina.pdf", "rb"))
|
||||||
|
output = PdfFileWriter()
|
||||||
|
# add the "watermark" (which is the new pdf) on the existing page
|
||||||
|
page = existing_pdf.getPage(0)
|
||||||
|
page.mergePage(new_pdf.getPage(0))
|
||||||
|
|
||||||
|
output.addPage(page)
|
||||||
|
|
||||||
|
return(page)
|
||||||
|
|
||||||
|
def indice(text):
|
||||||
|
# PDF GENERATION LIBRARIES
|
||||||
|
# import the report lab PDF generation tools
|
||||||
|
from reportlab.lib.pagesizes import letter
|
||||||
|
from reportlab.lib.styles import ParagraphStyle
|
||||||
|
from reportlab.lib.units import inch
|
||||||
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
|
||||||
|
from reportlab.pdfbase import pdfmetrics
|
||||||
|
from reportlab.pdfbase.ttfonts import TTFont
|
||||||
|
from reportlab.lib.pagesizes import A5
|
||||||
|
import io
|
||||||
|
from PyPDF2 import PdfFileWriter, PdfFileReader
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))
|
||||||
|
style = ParagraphStyle(
|
||||||
|
name='Normal',
|
||||||
|
fontName='Roboto',
|
||||||
|
fontSize=12,
|
||||||
|
leading = 14,
|
||||||
|
leftIndent=32,
|
||||||
|
firstLineIndent=-32,
|
||||||
|
spaceBefore=5
|
||||||
|
)
|
||||||
|
|
||||||
|
for ro in text.splitlines():
|
||||||
|
ro = ro.replace(' ',' ')
|
||||||
|
ro = ro.replace('\t',' ')
|
||||||
|
parts.append(Paragraph(ro, style = style))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
packet = io.BytesIO()
|
||||||
|
doc = SimpleDocTemplate(packet,
|
||||||
|
pagesize=A5,
|
||||||
|
rightMargin=20,
|
||||||
|
leftMargin=20,
|
||||||
|
topMargin=40,
|
||||||
|
bottomMargin=30)
|
||||||
|
doc.build(parts)
|
||||||
|
|
||||||
|
pdfReader = PdfFileReader(packet)
|
||||||
|
pdfWriter = PdfFileWriter()
|
||||||
|
|
||||||
|
for page in pdfReader.pages:
|
||||||
|
pdfWriter.addPage(page)
|
||||||
|
pdfWriter.addBlankPage()
|
||||||
|
|
||||||
|
return([pdfWriter.getPage(0),pdfWriter.getPage(1)])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
specialbooks = [
|
||||||
|
["Migliori per anno", "454 558 592 514 446 477 163 297 681 178 201 391 323 140 199 207 200 128 249 552 289 537 436 520 102 231 069 139 059 293 160 172 375 341 244 002 094 318 157 004 515 665 642 700 325 358 363 493 668"],
|
||||||
|
["Peggiori per anno", "171 014 034 205 277 596 043 584 476 671 132 697 174 485 589 301 137 237 159 185 027 437 284 496 466 644 380 299 676 652 166 229 488 336 210 421 632 609 576 475 360 527"],
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
with open('oloturiadump.json') as json_file:
|
||||||
|
all_vgos = json.load(json_file)
|
||||||
|
os.makedirs('media', exist_ok=True)
|
||||||
|
|
||||||
|
vgo_dict={}
|
||||||
|
|
||||||
|
for vgo in all_vgos:
|
||||||
|
vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
|
||||||
|
# print(vgo['content'].split("<br />")[-1])
|
||||||
|
# print(vgo['content'].split("<p>")[-1])
|
||||||
|
# vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
|
||||||
|
# if len(vgo_name) < 10:
|
||||||
|
# vgo_name = [s for s in html2text.html2text(vgo['content']).split("\n\n") if s][-1].replace("\n"," ")
|
||||||
|
|
||||||
|
vgo_name = vgo['content'].split("<p>")[-1].replace("</p>","")
|
||||||
|
vgo_name = vgo_name.split("<br />")[-1]
|
||||||
|
vgo_name = vgo_name.split("</a>")[-1]
|
||||||
|
vgo_name = html.unescape(vgo_name).strip()
|
||||||
|
|
||||||
|
# print(vgo_num + " " + vgo_name)
|
||||||
|
vgo_dict[vgo_num] = vgo_name
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print("Genero i libretti")
|
||||||
|
os.makedirs('books', exist_ok=True)
|
||||||
|
|
||||||
|
for book in specialbooks:
|
||||||
|
book_title = book[0]
|
||||||
|
vgo_list = book[1]
|
||||||
|
|
||||||
|
book_name = book_title.strip().replace(" ","_").lower()
|
||||||
|
book_name = os.path.join('books', 'vgo_' + book_name + '.pdf')
|
||||||
|
|
||||||
|
# if not os.path.isfile(book_name):
|
||||||
|
pdfWriter = PyPDF2.PdfFileWriter()
|
||||||
|
print(book_name)
|
||||||
|
|
||||||
|
# aggiungere copertina
|
||||||
|
pdfWriter.addPage(copertina(book_title))
|
||||||
|
|
||||||
|
pdfWriter.addBlankPage()
|
||||||
|
|
||||||
|
indtext = ""
|
||||||
|
for vgo_num in vgo_list.split():
|
||||||
|
pdf_name = os.path.join('pdf', vgo_num + '.pdf')
|
||||||
|
|
||||||
|
try:
|
||||||
|
#print(vgo_num + " - " + vgo_dict[vgo_num])
|
||||||
|
indtext = indtext + vgo_num + "\t" + vgo_dict[vgo_num] + "\n"
|
||||||
|
pdfFileObj = open(pdf_name, 'rb')
|
||||||
|
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
||||||
|
pageObj = pdfReader.getPage(0)
|
||||||
|
pdfWriter.addPage(pageObj)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for i in range(0, ((8 - (len(vgo_list.split()) + 5)) % 8)):
|
||||||
|
pdfWriter.addBlankPage()
|
||||||
|
|
||||||
|
# aggiungere indice
|
||||||
|
for indpag in indice(indtext):
|
||||||
|
pdfWriter.addPage(indpag)
|
||||||
|
|
||||||
|
#Aggiungere pagina finale
|
||||||
|
#pdfWriter.addBlankPage()
|
||||||
|
pdfFileObj = open("quarta.pdf", 'rb')
|
||||||
|
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
||||||
|
pageObj = pdfReader.getPage(0)
|
||||||
|
pdfWriter.addPage(pageObj)
|
||||||
|
|
||||||
|
with open(book_name, 'wb') as pdfOutput:
|
||||||
|
pdfWriter.write(pdfOutput)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in a new issue