oloturia2pdf.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. #!/usr/bin/env python3
  2. from mastodon import Mastodon
  3. import json
  4. import datetime
  5. import os.path
  6. from reportlab.lib import pagesizes
  7. import requests
  8. import html2text
  9. import pdfkit
  10. import locale
  11. import PyPDF2
  12. locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
  13. def copertina(text):
  14. from PyPDF2 import PdfFileWriter, PdfFileReader
  15. import io
  16. from reportlab.pdfgen import canvas
  17. from reportlab.lib.pagesizes import A5
  18. from reportlab.pdfbase import pdfmetrics
  19. from reportlab.pdfbase.ttfonts import TTFont
  20. from reportlab.pdfbase.pdfmetrics import stringWidth
  21. FONT = 'Roboto'
  22. SIZE = 36
  23. packet = io.BytesIO()
  24. # create a new PDF with Reportlab
  25. pdfmetrics.registerFont(TTFont("Roboto", "template/roboto-regular-webfont.ttf"))
  26. can = canvas.Canvas(packet, pagesize=A5)
  27. can.setFont(FONT, SIZE)
  28. PAGE_WIDTH = A5[0]
  29. text_width = stringWidth(text,FONT, SIZE)
  30. can.drawString((PAGE_WIDTH - text_width) / 2, 100, text)
  31. can.save()
  32. #move to the beginning of the StringIO buffer
  33. packet.seek(0)
  34. new_pdf = PdfFileReader(packet)
  35. # read your existing PDF
  36. existing_pdf = PdfFileReader(open("copertina.pdf", "rb"))
  37. output = PdfFileWriter()
  38. # add the "watermark" (which is the new pdf) on the existing page
  39. page = existing_pdf.getPage(0)
  40. page.mergePage(new_pdf.getPage(0))
  41. output.addPage(page)
  42. return(page)
  43. def main():
  44. # Scarica tutti i post da Mastodon
  45. print("Scarico i post")
  46. def default(o):
  47. if isinstance(o, (datetime.date, datetime.datetime)):
  48. return o.isoformat()
  49. if not os.path.isfile('oloturiadump.json'):
  50. mastodon = Mastodon(api_base_url = "https://mastodon.bida.im")
  51. all_vgos = []
  52. last_id = None
  53. while True:
  54. statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id)))
  55. if not statuses:
  56. break
  57. all_vgos += list(map(
  58. lambda s: {
  59. 'id': s['id'],
  60. 'uri': s['uri'],
  61. 'content': s['content'],
  62. 'replies_count': s['replies_count'],
  63. #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [],
  64. 'created': s['created_at'],
  65. 'reblogs': s['reblogs_count'],
  66. 'favourites': s['favourites_count'],
  67. 'media': s['media_attachments']
  68. }
  69. , statuses))
  70. last_id = statuses[-1]['id']
  71. #print(all_vgos)
  72. #print(json.dumps(all_vgos, default=default))
  73. with open('oloturiadump.json', 'w') as json_file:
  74. json.dump(all_vgos, json_file, indent=4, default=default)
  75. # Scarica tutte le immagini
  76. print("Scarico le immagini")
  77. with open('oloturiadump.json') as json_file:
  78. all_vgos = json.load(json_file)
  79. os.makedirs('media', exist_ok=True)
  80. vgo_dict={}
  81. for vgo in all_vgos:
  82. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  83. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  84. #print(vgo_num +' - '+ vgo_name)
  85. #print(str(vgo['id']) +' '+ vgo['uri'])
  86. vgo_dict[vgo_num] = vgo_name
  87. for media in vgo['media']:
  88. #print(str(media['id']) +' '+ media['url'])
  89. ext = os.path.splitext(media['preview_url'])[1]
  90. img_name = os.path.join('media',str(media['id']) + ext)
  91. if not os.path.isfile(img_name):
  92. print(img_name)
  93. img_data = requests.get(media['preview_url']).content
  94. with open(img_name, 'wb') as handler:
  95. handler.write(img_data)
  96. with open('template.html') as html_file:
  97. html_base = html_file.read()
  98. with open('mediagallery.html') as html_file:
  99. html_mediagallery = html_file.read()
  100. # Genera i PDF
  101. print("Genero i PDF")
  102. os.makedirs('pdf', exist_ok=True)
  103. for vgo in all_vgos:
  104. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  105. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  106. html_name = 'oloturia.html'
  107. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  108. if not os.path.isfile(pdf_name):
  109. print(vgo_num +' - '+ vgo_name)
  110. media_num = 0
  111. mediagallery_tot = ''
  112. media_tot = len(vgo['media'])
  113. sizes = "622px" if media_tot == 1 else "311px"
  114. style = [
  115. ["inset: auto; width: 100%; height: 100%;"],
  116. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"],
  117. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"],
  118. ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"]
  119. ]
  120. for media in vgo['media']:
  121. mediagallery = html_mediagallery
  122. ext = os.path.splitext(media['url'])[1]
  123. img_name = os.path.join('media',str(media['id']) + ext)
  124. mediagallery = mediagallery.replace("[media]", img_name)
  125. mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num])
  126. mediagallery = mediagallery.replace("[sizes]", sizes)
  127. mediagallery_tot = mediagallery_tot + mediagallery
  128. media_num = media_num + 1
  129. content = html_base
  130. content = content.replace("[content]", vgo['content'])
  131. content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M"))
  132. content = content.replace("[reply]", str(vgo['replies_count']))
  133. content = content.replace("[reblogs]", str(vgo['reblogs']))
  134. content = content.replace("[favourites]", str(vgo['favourites']))
  135. content = content.replace("[mediagallery]", mediagallery_tot)
  136. with open(html_name, 'w') as handler:
  137. handler.write(content)
  138. options = {
  139. 'page-size': 'A5',
  140. 'margin-top': '0.5cm',
  141. 'margin-right': '0.5cm',
  142. 'margin-bottom': '0.5cm',
  143. 'margin-left': '0.5cm',
  144. 'encoding': "UTF-8",
  145. 'quiet': ''
  146. }
  147. try:
  148. pdfkit.from_file(html_name, pdf_name, options=options)
  149. except:
  150. pass
  151. os.remove(html_name)
  152. # Genera i libretti
  153. print("Genero i libretti")
  154. os.makedirs('books', exist_ok=True)
  155. for book_num in range(1, int(len(vgo_dict) / 50) + 1):
  156. pdfWriter = PyPDF2.PdfFileWriter()
  157. print(book_num)
  158. pagstart = (book_num - 1) * 50 + 1
  159. pagend = book_num * 50
  160. # aggiungere copertina
  161. pdfWriter.addPage(copertina(str(pagstart).zfill(3) + " - " + str(pagend).zfill(3)))
  162. for vgo_num in [str(x).zfill(3) for x in range(pagstart, pagend + 1)]:
  163. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  164. try:
  165. #print(vgo_num + " - " + vgo_dict[vgo_num])
  166. pdfFileObj = open(pdf_name, 'rb')
  167. pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
  168. pageObj = pdfReader.getPage(0)
  169. pdfWriter.addPage(pageObj)
  170. except:
  171. pass
  172. # aggiungere indice ed eventualmente pagina finale
  173. book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf')
  174. with open(book_name, 'wb') as pdfOutput:
  175. pdfWriter.write(pdfOutput)
  176. if __name__ == "__main__":
  177. main()