oloturia2pdf.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. #!/usr/bin/env python3
  2. from mastodon import Mastodon
  3. import json
  4. import datetime
  5. import os.path
  6. import requests
  7. import html2text
  8. import pdfkit
  9. import locale
  10. import PyPDF2
  11. locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
  12. # Scarica tutti i post da Mastodon
  13. print("Scarico i post")
  14. def default(o):
  15. if isinstance(o, (datetime.date, datetime.datetime)):
  16. return o.isoformat()
  17. if not os.path.isfile('oloturiadump.json'):
  18. mastodon = Mastodon(api_base_url = "https://mastodon.bida.im")
  19. all_vgos = []
  20. last_id = None
  21. while True:
  22. statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id)))
  23. if not statuses:
  24. break
  25. all_vgos += list(map(
  26. lambda s: {
  27. 'id': s['id'],
  28. 'uri': s['uri'],
  29. 'content': s['content'],
  30. 'replies_count': s['replies_count'],
  31. #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [],
  32. 'created': s['created_at'],
  33. 'reblogs': s['reblogs_count'],
  34. 'favourites': s['favourites_count'],
  35. 'media': s['media_attachments']
  36. }
  37. , statuses))
  38. last_id = statuses[-1]['id']
  39. #print(all_vgos)
  40. #print(json.dumps(all_vgos, default=default))
  41. with open('oloturiadump.json', 'w') as json_file:
  42. json.dump(all_vgos, json_file, indent=4, default=default)
  43. # Scarica tutte le immagini
  44. print("Scarico le immagini")
  45. with open('oloturiadump.json') as json_file:
  46. all_vgos = json.load(json_file)
  47. os.makedirs('media', exist_ok=True)
  48. vgo_dict={}
  49. for vgo in all_vgos:
  50. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  51. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  52. #print(vgo_num +' - '+ vgo_name)
  53. #print(str(vgo['id']) +' '+ vgo['uri'])
  54. vgo_dict[vgo_num] = vgo_name
  55. for media in vgo['media']:
  56. #print(str(media['id']) +' '+ media['url'])
  57. ext = os.path.splitext(media['preview_url'])[1]
  58. img_name = os.path.join('media',str(media['id']) + ext)
  59. if not os.path.isfile(img_name):
  60. print(img_name)
  61. img_data = requests.get(media['preview_url']).content
  62. with open(img_name, 'wb') as handler:
  63. handler.write(img_data)
  64. with open('template.html') as html_file:
  65. html_base = html_file.read()
  66. with open('mediagallery.html') as html_file:
  67. html_mediagallery = html_file.read()
  68. # Genera i PDF
  69. print("Genero i PDF")
  70. os.makedirs('pdf', exist_ok=True)
  71. for vgo in all_vgos:
  72. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  73. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  74. html_name = 'oloturia.html'
  75. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  76. if not os.path.isfile(pdf_name):
  77. print(vgo_num +' - '+ vgo_name)
  78. media_num = 0
  79. mediagallery_tot = ''
  80. media_tot = len(vgo['media'])
  81. sizes = "622px" if media_tot == 1 else "311px"
  82. style = [
  83. ["inset: auto; width: 100%; height: 100%;"],
  84. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"],
  85. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"],
  86. ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"]
  87. ]
  88. for media in vgo['media']:
  89. mediagallery = html_mediagallery
  90. ext = os.path.splitext(media['url'])[1]
  91. img_name = os.path.join('media',str(media['id']) + ext)
  92. mediagallery = mediagallery.replace("[media]", img_name)
  93. mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num])
  94. mediagallery = mediagallery.replace("[sizes]", sizes)
  95. mediagallery_tot = mediagallery_tot + mediagallery
  96. media_num = media_num + 1
  97. content = html_base
  98. content = content.replace("[content]", vgo['content'])
  99. content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M"))
  100. content = content.replace("[reply]", str(vgo['replies_count']))
  101. content = content.replace("[reblogs]", str(vgo['reblogs']))
  102. content = content.replace("[favourites]", str(vgo['favourites']))
  103. content = content.replace("[mediagallery]", mediagallery_tot)
  104. with open(html_name, 'w') as handler:
  105. handler.write(content)
  106. options = {
  107. 'page-size': 'A5',
  108. 'margin-top': '0.5cm',
  109. 'margin-right': '0.5cm',
  110. 'margin-bottom': '0.5cm',
  111. 'margin-left': '0.5cm',
  112. 'encoding': "UTF-8",
  113. 'quiet': ''
  114. }
  115. try:
  116. pdfkit.from_file(html_name, pdf_name, options=options)
  117. except:
  118. pass
  119. os.remove(html_name)
  120. # Genera i libretti
  121. print("Genero i libretti")
  122. os.makedirs('books', exist_ok=True)
  123. for book_num in range(1, int(len(vgo_dict) / 50) + 2):
  124. pdfWriter = PyPDF2.PdfFileWriter()
  125. print(book_num)
  126. # aggiungere copertina
  127. for vgo_num in [str(x).zfill(3) for x in range((book_num - 1) * 50 + 1, book_num * 50 + 1)]:
  128. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  129. try:
  130. #print(vgo_num + " - " + vgo_dict[vgo_num])
  131. pdfFileObj = open(pdf_name, 'rb')
  132. pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
  133. pageObj = pdfReader.getPage(0)
  134. pdfWriter.addPage(pageObj)
  135. except:
  136. pass
  137. # aggiungere indice ed eventualmente pagina finale
  138. book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf')
  139. with open(book_name, 'wb') as pdfOutput:
  140. pdfWriter.write(pdfOutput)