oloturia2pdf.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. #!/usr/bin/env python3
  2. from mastodon import Mastodon
  3. import json
  4. import datetime
  5. import os.path
  6. from reportlab.lib import pagesizes
  7. import requests
  8. import html2text
  9. import pdfkit
  10. import locale
  11. import PyPDF2
  12. locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
  13. def copertina(text):
  14. from PyPDF2 import PdfFileWriter, PdfFileReader
  15. import io
  16. from reportlab.pdfgen import canvas
  17. from reportlab.lib.pagesizes import A4
  18. from reportlab.pdfbase import pdfmetrics
  19. from reportlab.pdfbase.ttfonts import TTFont
  20. from reportlab.pdfbase.pdfmetrics import stringWidth
  21. from reportlab.rl_config import defaultPageSize
  22. FONT = 'Roboto'
  23. SIZE = 48
  24. packet = io.BytesIO()
  25. # create a new PDF with Reportlab
  26. pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))
  27. can = canvas.Canvas(packet, pagesize=A4)
  28. can.setFont(FONT, SIZE)
  29. PAGE_WIDTH = defaultPageSize[0]
  30. #PAGE_HEIGHT = defaultPageSize[1]
  31. text_width = stringWidth(text,FONT, SIZE)
  32. can.drawString((PAGE_WIDTH - text_width) / 2, 150, text)
  33. can.save()
  34. #move to the beginning of the StringIO buffer
  35. packet.seek(0)
  36. new_pdf = PdfFileReader(packet)
  37. # read your existing PDF
  38. existing_pdf = PdfFileReader(open("copertina.pdf", "rb"))
  39. output = PdfFileWriter()
  40. # add the "watermark" (which is the new pdf) on the existing page
  41. page = existing_pdf.getPage(0)
  42. page.mergePage(new_pdf.getPage(0))
  43. output.addPage(page)
  44. return(page)
  45. # Scarica tutti i post da Mastodon
  46. print("Scarico i post")
  47. def default(o):
  48. if isinstance(o, (datetime.date, datetime.datetime)):
  49. return o.isoformat()
  50. if not os.path.isfile('oloturiadump.json'):
  51. mastodon = Mastodon(api_base_url = "https://mastodon.bida.im")
  52. all_vgos = []
  53. last_id = None
  54. while True:
  55. statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id)))
  56. if not statuses:
  57. break
  58. all_vgos += list(map(
  59. lambda s: {
  60. 'id': s['id'],
  61. 'uri': s['uri'],
  62. 'content': s['content'],
  63. 'replies_count': s['replies_count'],
  64. #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [],
  65. 'created': s['created_at'],
  66. 'reblogs': s['reblogs_count'],
  67. 'favourites': s['favourites_count'],
  68. 'media': s['media_attachments']
  69. }
  70. , statuses))
  71. last_id = statuses[-1]['id']
  72. #print(all_vgos)
  73. #print(json.dumps(all_vgos, default=default))
  74. with open('oloturiadump.json', 'w') as json_file:
  75. json.dump(all_vgos, json_file, indent=4, default=default)
  76. # Scarica tutte le immagini
  77. print("Scarico le immagini")
  78. with open('oloturiadump.json') as json_file:
  79. all_vgos = json.load(json_file)
  80. os.makedirs('media', exist_ok=True)
  81. vgo_dict={}
  82. for vgo in all_vgos:
  83. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  84. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  85. #print(vgo_num +' - '+ vgo_name)
  86. #print(str(vgo['id']) +' '+ vgo['uri'])
  87. vgo_dict[vgo_num] = vgo_name
  88. for media in vgo['media']:
  89. #print(str(media['id']) +' '+ media['url'])
  90. ext = os.path.splitext(media['preview_url'])[1]
  91. img_name = os.path.join('media',str(media['id']) + ext)
  92. if not os.path.isfile(img_name):
  93. print(img_name)
  94. img_data = requests.get(media['preview_url']).content
  95. with open(img_name, 'wb') as handler:
  96. handler.write(img_data)
  97. with open('template.html') as html_file:
  98. html_base = html_file.read()
  99. with open('mediagallery.html') as html_file:
  100. html_mediagallery = html_file.read()
  101. # Genera i PDF
  102. print("Genero i PDF")
  103. os.makedirs('pdf', exist_ok=True)
  104. for vgo in all_vgos:
  105. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  106. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  107. html_name = 'oloturia.html'
  108. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  109. if not os.path.isfile(pdf_name):
  110. print(vgo_num +' - '+ vgo_name)
  111. media_num = 0
  112. mediagallery_tot = ''
  113. media_tot = len(vgo['media'])
  114. sizes = "622px" if media_tot == 1 else "311px"
  115. style = [
  116. ["inset: auto; width: 100%; height: 100%;"],
  117. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"],
  118. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"],
  119. ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"]
  120. ]
  121. for media in vgo['media']:
  122. mediagallery = html_mediagallery
  123. ext = os.path.splitext(media['url'])[1]
  124. img_name = os.path.join('media',str(media['id']) + ext)
  125. mediagallery = mediagallery.replace("[media]", img_name)
  126. mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num])
  127. mediagallery = mediagallery.replace("[sizes]", sizes)
  128. mediagallery_tot = mediagallery_tot + mediagallery
  129. media_num = media_num + 1
  130. content = html_base
  131. content = content.replace("[content]", vgo['content'])
  132. content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M"))
  133. content = content.replace("[reply]", str(vgo['replies_count']))
  134. content = content.replace("[reblogs]", str(vgo['reblogs']))
  135. content = content.replace("[favourites]", str(vgo['favourites']))
  136. content = content.replace("[mediagallery]", mediagallery_tot)
  137. with open(html_name, 'w') as handler:
  138. handler.write(content)
  139. options = {
  140. 'page-size': 'A5',
  141. 'margin-top': '0.5cm',
  142. 'margin-right': '0.5cm',
  143. 'margin-bottom': '0.5cm',
  144. 'margin-left': '0.5cm',
  145. 'encoding': "UTF-8",
  146. 'quiet': ''
  147. }
  148. try:
  149. pdfkit.from_file(html_name, pdf_name, options=options)
  150. except:
  151. pass
  152. os.remove(html_name)
  153. # Genera i libretti
  154. print("Genero i libretti")
  155. os.makedirs('books', exist_ok=True)
  156. for book_num in range(1, int(len(vgo_dict) / 50) + 1):
  157. pdfWriter = PyPDF2.PdfFileWriter()
  158. print(book_num)
  159. pagstart = (book_num - 1) * 50 + 1
  160. pagend = book_num * 50
  161. # aggiungere copertina
  162. pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3)))
  163. for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]:
  164. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  165. try:
  166. #print(vgo_num + " - " + vgo_dict[vgo_num])
  167. pdfFileObj = open(pdf_name, 'rb')
  168. pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
  169. pageObj = pdfReader.getPage(0)
  170. pdfWriter.addPage(pageObj)
  171. except:
  172. pass
  173. # aggiungere indice ed eventualmente pagina finale
  174. book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf')
  175. with open(book_name, 'wb') as pdfOutput:
  176. pdfWriter.write(pdfOutput)