oloturia2pdf.py 10 KB


  1. #!/usr/bin/env python3
  2. from mastodon import Mastodon
  3. import json
  4. import datetime
  5. import os.path
  6. from reportlab.lib import pagesizes
  7. import requests
  8. import html2text
  9. import pdfkit
  10. import locale
  11. import PyPDF2
  12. locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
  13. def copertina(text):
  14. from PyPDF2 import PdfFileWriter, PdfFileReader
  15. import io
  16. from reportlab.pdfgen import canvas
  17. from reportlab.lib.pagesizes import A5
  18. from reportlab.pdfbase import pdfmetrics
  19. from reportlab.pdfbase.ttfonts import TTFont
  20. from reportlab.pdfbase.pdfmetrics import stringWidth
  21. FONT = 'Roboto'
  22. SIZE = 36
  23. packet = io.BytesIO()
  24. # create a new PDF with Reportlab
  25. pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))
  26. can = canvas.Canvas(packet, pagesize=A5)
  27. can.setFont(FONT, SIZE)
  28. PAGE_WIDTH = A5[0]
  29. text_width = stringWidth(text,FONT, SIZE)
  30. can.drawString((PAGE_WIDTH - text_width) / 2, 100, text)
  31. can.save()
  32. #move to the beginning of the StringIO buffer
  33. packet.seek(0)
  34. new_pdf = PdfFileReader(packet)
  35. # read your existing PDF
  36. existing_pdf = PdfFileReader(open("copertina.pdf", "rb"))
  37. output = PdfFileWriter()
  38. # add the "watermark" (which is the new pdf) on the existing page
  39. page = existing_pdf.getPage(0)
  40. page.mergePage(new_pdf.getPage(0))
  41. output.addPage(page)
  42. return(page)
  43. def indice(text):
  44. # PDF GENERATION LIBRARIES
  45. # import the report lab PDF generation tools
  46. from reportlab.lib.pagesizes import letter
  47. from reportlab.lib.styles import ParagraphStyle
  48. from reportlab.lib.units import inch
  49. from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
  50. from reportlab.pdfbase import pdfmetrics
  51. from reportlab.pdfbase.ttfonts import TTFont
  52. from reportlab.lib.pagesizes import A5
  53. import io
  54. from PyPDF2 import PdfFileWriter, PdfFileReader
  55. parts = []
  56. pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))
  57. style = ParagraphStyle(
  58. name='Normal',
  59. fontName='Roboto',
  60. fontSize=12,
  61. leading = 14,
  62. leftIndent=32,
  63. firstLineIndent=-32,
  64. spaceBefore=5
  65. )
  66. for ro in text.splitlines():
  67. ro = ro.replace(' ',' ')
  68. ro = ro.replace('\t','    ')
  69. parts.append(Paragraph(ro, style = style))
  70. packet = io.BytesIO()
  71. doc = SimpleDocTemplate(packet,
  72. pagesize=A5,
  73. rightMargin=20,
  74. leftMargin=20,
  75. topMargin=40,
  76. bottomMargin=30)
  77. doc.build(parts)
  78. pdfReader = PdfFileReader(packet)
  79. return([pdfReader.getPage(0),pdfReader.getPage(1)])
  80. def main():
  81. # Scarica tutti i post da Mastodon
  82. print("Scarico i post")
  83. def default(o):
  84. if isinstance(o, (datetime.date, datetime.datetime)):
  85. return o.isoformat()
  86. if not os.path.isfile('oloturiadump.json'):
  87. mastodon = Mastodon(api_base_url = "https://mastodon.bida.im")
  88. all_vgos = []
  89. last_id = None
  90. while True:
  91. statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id)))
  92. if not statuses:
  93. break
  94. all_vgos += list(map(
  95. lambda s: {
  96. 'id': s['id'],
  97. 'uri': s['uri'],
  98. 'content': s['content'],
  99. 'replies_count': s['replies_count'],
  100. #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [],
  101. 'created': s['created_at'],
  102. 'reblogs': s['reblogs_count'],
  103. 'favourites': s['favourites_count'],
  104. 'media': s['media_attachments']
  105. }
  106. , statuses))
  107. last_id = statuses[-1]['id']
  108. #print(all_vgos)
  109. #print(json.dumps(all_vgos, default=default))
  110. with open('oloturiadump.json', 'w') as json_file:
  111. json.dump(all_vgos, json_file, indent=4, default=default)
  112. # Scarica tutte le immagini
  113. print("Scarico le immagini")
  114. with open('oloturiadump.json') as json_file:
  115. all_vgos = json.load(json_file)
  116. os.makedirs('media', exist_ok=True)
  117. vgo_dict={}
  118. for vgo in all_vgos:
  119. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  120. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  121. if len(vgo_name) < 10:
  122. vgo_name = [s for s in html2text.html2text(vgo['content']).split("\n\n") if s][-1].replace("\n"," ")
  123. #print(vgo_num +' - '+ vgo_name)
  124. #print(str(vgo['id']) +' '+ vgo['uri'])
  125. vgo_dict[vgo_num] = vgo_name
  126. for media in vgo['media']:
  127. #print(str(media['id']) +' '+ media['url'])
  128. ext = os.path.splitext(media['preview_url'])[1]
  129. img_name = os.path.join('media',str(media['id']) + ext)
  130. if not os.path.isfile(img_name):
  131. print(img_name)
  132. img_data = requests.get(media['preview_url']).content
  133. with open(img_name, 'wb') as handler:
  134. handler.write(img_data)
  135. with open('template.html') as html_file:
  136. html_base = html_file.read()
  137. with open('mediagallery.html') as html_file:
  138. html_mediagallery = html_file.read()
  139. # Genera i PDF
  140. print("Genero i PDF")
  141. os.makedirs('pdf', exist_ok=True)
  142. for vgo in all_vgos:
  143. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  144. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  145. html_name = 'oloturia.html'
  146. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  147. if not os.path.isfile(pdf_name):
  148. print(vgo_num +' - '+ vgo_name)
  149. media_num = 0
  150. mediagallery_tot = ''
  151. media_tot = len(vgo['media'])
  152. sizes = "622px" if media_tot == 1 else "311px"
  153. style = [
  154. ["inset: auto; width: 100%; height: 100%;"],
  155. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"],
  156. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"],
  157. ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"]
  158. ]
  159. for media in vgo['media']:
  160. mediagallery = html_mediagallery
  161. ext = os.path.splitext(media['url'])[1]
  162. img_name = os.path.join('media',str(media['id']) + ext)
  163. mediagallery = mediagallery.replace("[media]", img_name)
  164. mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num])
  165. mediagallery = mediagallery.replace("[sizes]", sizes)
  166. mediagallery_tot = mediagallery_tot + mediagallery
  167. media_num = media_num + 1
  168. content = html_base
  169. content = content.replace("[content]", vgo['content'])
  170. content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M"))
  171. content = content.replace("[reply]", str(vgo['replies_count']))
  172. content = content.replace("[reblogs]", str(vgo['reblogs']))
  173. content = content.replace("[favourites]", str(vgo['favourites']))
  174. content = content.replace("[mediagallery]", mediagallery_tot)
  175. with open(html_name, 'w') as handler:
  176. handler.write(content)
  177. options = {
  178. 'page-size': 'A5',
  179. 'margin-top': '0.5cm',
  180. 'margin-right': '0.5cm',
  181. 'margin-bottom': '0.5cm',
  182. 'margin-left': '0.5cm',
  183. 'encoding': "UTF-8",
  184. 'quiet': ''
  185. }
  186. try:
  187. pdfkit.from_file(html_name, pdf_name, options=options)
  188. except:
  189. pass
  190. os.remove(html_name)
  191. # Genera i libretti
  192. print("Genero i libretti")
  193. os.makedirs('books', exist_ok=True)
  194. for book_num in range(1, int(len(vgo_dict) / 50) + 1):
  195. pdfWriter = PyPDF2.PdfFileWriter()
  196. print(book_num)
  197. pagstart = (book_num - 1) * 50 + 1
  198. pagend = book_num * 50
  199. # aggiungere copertina
  200. pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3)))
  201. pdfWriter.addBlankPage()
  202. indtext = ""
  203. for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]:
  204. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  205. try:
  206. #print(vgo_num + " - " + vgo_dict[vgo_num])
  207. indtext = indtext + vgo_num + "\t" + vgo_dict[vgo_num] + "\n"
  208. pdfFileObj = open(pdf_name, 'rb')
  209. pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
  210. pageObj = pdfReader.getPage(0)
  211. pdfWriter.addPage(pageObj)
  212. except:
  213. pass
  214. pdfWriter.addBlankPage()
  215. # aggiungere indice
  216. for indpag in indice(indtext):
  217. pdfWriter.addPage(indpag)
  218. #Aggiungere pagina finale
  219. pdfWriter.addBlankPage()
  220. book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf')
  221. with open(book_name, 'wb') as pdfOutput:
  222. pdfWriter.write(pdfOutput)
  223. if __name__ == "__main__":
  224. main()