oloturia2pdf.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. #!/usr/bin/env python3
  2. from mastodon import Mastodon
  3. import json
  4. import datetime
  5. import os.path
  6. import requests
  7. import html2text
  8. import pdfkit
  9. import locale
  10. import PyPDF2
  11. locale.setlocale(locale.LC_TIME, 'it_IT.UTF-8')
  12. # Scarica tutti i post da Mastodon
  13. mastodon = Mastodon(api_base_url = "https://mastodon.bida.im")
  14. all_vgos = []
  15. last_id = None
  16. def default(o):
  17. if isinstance(o, (datetime.date, datetime.datetime)):
  18. return o.isoformat()
  19. if not os.path.isfile('oloturiadump.json'):
  20. while True:
  21. statuses = list(filter(lambda s: s['account']['username'] == 'oloturia', mastodon.timeline_hashtag("vgo", local=True, max_id=last_id)))
  22. if not statuses:
  23. break
  24. all_vgos += list(map(
  25. lambda s: {
  26. 'id': s['id'],
  27. 'uri': s['uri'],
  28. 'content': s['content'],
  29. 'replies_count': s['replies_count'],
  30. #'replies': mastodon.status_context(s['id']) if s['replies_count'] > 0 else [],
  31. 'created': s['created_at'],
  32. 'reblogs': s['reblogs_count'],
  33. 'favourites': s['favourites_count'],
  34. 'media': s['media_attachments']
  35. }
  36. , statuses))
  37. last_id = statuses[-1]['id']
  38. #print(all_vgos)
  39. #print(json.dumps(all_vgos, default=default))
  40. with open('oloturiadump.json', 'w') as json_file:
  41. json.dump(all_vgos, json_file, indent=4, default=default)
  42. # Scarica tutte le immagini
  43. with open('oloturiadump.json') as json_file:
  44. all_vgos = json.load(json_file)
  45. os.makedirs('media', exist_ok=True)
  46. vgo_dict={}
  47. for vgo in all_vgos:
  48. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  49. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  50. #print(vgo_num +' - '+ vgo_name)
  51. #print(str(vgo['id']) +' '+ vgo['uri'])
  52. vgo_dict[vgo_num] = vgo_name
  53. for media in vgo['media']:
  54. #print(str(media['id']) +' '+ media['url'])
  55. ext = os.path.splitext(media['preview_url'])[1]
  56. img_name = os.path.join('media',str(media['id']) + ext)
  57. if not os.path.isfile(img_name):
  58. print(img_name)
  59. img_data = requests.get(media['preview_url']).content
  60. with open(img_name, 'wb') as handler:
  61. handler.write(img_data)
  62. with open('template.html') as html_file:
  63. html_base = html_file.read()
  64. with open('mediagallery.html') as html_file:
  65. html_mediagallery = html_file.read()
  66. # Genera i PDF
  67. os.makedirs('pdf', exist_ok=True)
  68. for vgo in all_vgos:
  69. vgo_num = html2text.html2text(vgo['content']).split(' ')[0]
  70. vgo_name = os.linesep.join([s for s in html2text.html2text(vgo['content']).splitlines() if s]).splitlines()[-1]
  71. html_name = 'oloturia.html'
  72. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  73. if not os.path.isfile(pdf_name):
  74. print(vgo_num +' - '+ vgo_name)
  75. media_num = 0
  76. mediagallery_tot = ''
  77. media_tot = len(vgo['media'])
  78. sizes = "622px" if media_tot == 1 else "311px"
  79. style = [
  80. ["inset: auto; width: 100%; height: 100%;"],
  81. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto auto 2px; width: 50%; height: 100%;"],
  82. ["inset: auto 2px auto auto; width: 50%; height: 100%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"],
  83. ["inset: auto 2px 2px auto; width: 50%; height: 50%;","inset: auto auto 2px 2px; width: 50%; height: 50%;","inset: 2px 2px auto auto; width: 50%; height: 50%;","inset: 2px auto auto 2px; width: 50%; height: 50%;"]
  84. ]
  85. for media in vgo['media']:
  86. mediagallery = html_mediagallery
  87. ext = os.path.splitext(media['url'])[1]
  88. img_name = os.path.join('media',str(media['id']) + ext)
  89. mediagallery = mediagallery.replace("[media]", img_name)
  90. mediagallery = mediagallery.replace("[style]", style[media_tot-1][media_num])
  91. mediagallery = mediagallery.replace("[sizes]", sizes)
  92. mediagallery_tot = mediagallery_tot + mediagallery
  93. media_num = media_num + 1
  94. content = html_base
  95. content = content.replace("[content]", vgo['content'])
  96. content = content.replace("[date]", datetime.datetime.fromisoformat(vgo['created']).strftime("%-d %B %Y, %H:%M"))
  97. content = content.replace("[reply]", str(vgo['replies_count']))
  98. content = content.replace("[reblogs]", str(vgo['reblogs']))
  99. content = content.replace("[favourites]", str(vgo['favourites']))
  100. content = content.replace("[mediagallery]", mediagallery_tot)
  101. with open(html_name, 'w') as handler:
  102. handler.write(content)
  103. options = {
  104. 'page-size': 'A5',
  105. 'margin-top': '0.5cm',
  106. 'margin-right': '0.5cm',
  107. 'margin-bottom': '0.5cm',
  108. 'margin-left': '0.5cm',
  109. 'encoding': "UTF-8",
  110. 'quiet': ''
  111. }
  112. try:
  113. pdfkit.from_file(html_name, pdf_name, options=options)
  114. except:
  115. pass
  116. os.remove(html_name)
  117. # Genera i libretti
  118. os.makedirs('books', exist_ok=True)
  119. for book_num in range(1, int(len(vgo_dict) / 50) + 2):
  120. pdfWriter = PyPDF2.PdfFileWriter()
  121. print(book_num)
  122. # aggiungere copertina
  123. for vgo_num in [str(x).zfill(3) for x in range((book_num - 1) * 50 + 1, book_num * 50 + 1)]:
  124. pdf_name = os.path.join('pdf', vgo_num + '.pdf')
  125. try:
  126. #print(vgo_num + " - " + vgo_dict[vgo_num])
  127. pdfFileObj = open(pdf_name, 'rb')
  128. pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
  129. pageObj = pdfReader.getPage(0)
  130. pdfWriter.addPage(pageObj)
  131. except:
  132. pass
  133. # aggiungere indice ed eventualmente pagina finale
  134. book_name = os.path.join('books', 'book' + str(book_num).zfill(2) + '.pdf')
  135. with open(book_name, 'wb') as pdfOutput:
  136. pdfWriter.write(pdfOutput)