ics2yaml.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. #!/usr/bin/python3
  2. import sys
  3. import logging
  4. import argparse
  5. import os.path
  6. from typing import Iterable
  7. from pathlib import Path
  8. from datetime import timedelta
  9. import json
  10. import yaml
  11. from icalendar import Calendar, Event
  12. def get_parser():
  13. p = argparse.ArgumentParser()
  14. p.add_argument("files", nargs="+", type=str)
  15. p.add_argument("--hm-json", default='./hackmeeting.json', type=Path)
  16. p.add_argument(
  17. "--out-talks-dir",
  18. help="Output directory for markdown files",
  19. default="talks/",
  20. type=Path,
  21. )
  22. p.add_argument("--trust-location", type=str, nargs="*", default=[])
  23. p.add_argument(
  24. "--slot-size",
  25. type=int,
  26. metavar="MINUTES",
  27. default=15,
  28. help="Round times to the nearest */MINUTES",
  29. )
  30. p.add_argument("--night-threshold", metavar="HOUR", default=5, type=int)
  31. p.add_argument("--mode", choices=["pelican"], default="pelican")
  32. return p
  33. def round_down(num, divisor):
  34. """
  35. >>> round_down(1000, 10)
  36. 1000
  37. >>> round_down(1001, 10)
  38. 1000
  39. >>> round_down(1009, 10)
  40. 1000
  41. """
  42. return num - (num % divisor)
  43. def round_down_time(hhmm: str, divisor: int):
  44. hh = hhmm[:-2]
  45. mm = hhmm[-2:]
  46. mm = round_down(int(mm, base=10), divisor)
  47. return int('%s%02d' % (hh,mm) , base=10)
  48. class Converter:
  49. """
  50. This class takes care of everything converter-related.
  51. Objects are used to enable multiple output formats to be added pretty easily by subclassing
  52. """
  53. def __init__(self, args):
  54. self.args = args
  55. self.rooms = []
  56. self.talks = {}
  57. self.talk_room = {} # map talk uid to room name
  58. self.talk_location = {} # same, but see --trust-location
  59. self.changed_files = []
  60. def _fname_to_room(self, fpath: str) -> str:
  61. base = os.path.splitext(os.path.basename(fpath))[0]
  62. if base == 'ALL':
  63. return '*'
  64. return base
  65. def get_vevents_from_calendar(self, cal: Calendar) -> Iterable[Event]:
  66. for subc in cal.subcomponents:
  67. if type(subc) is Event:
  68. yield subc
  69. def load_input(self):
  70. with self.args.hm_json.open() as buf:
  71. self.hackmeeting_metadata = json.load(buf)
  72. for fpath in self.args.files:
  73. room = self._fname_to_room(fpath)
  74. with open(fpath) as buf:
  75. file_content = buf.read()
  76. cal = Calendar.from_ical(file_content, multiple=True)
  77. for subcal in cal:
  78. for ev in self.get_vevents_from_calendar(subcal):
  79. if ev.decoded('DTSTART').year != self.hackmeeting_metadata['year']:
  80. continue
  81. uid = ev.decoded("uid").decode("ascii")
  82. self.talks[uid] = ev
  83. self.talk_room[uid] = room
  84. self.talk_location[uid] = room
  85. if fpath in self.args.trust_location:
  86. try:
  87. self.talk_location[uid] = ev.decoded("location").decode(
  88. "utf8"
  89. )
  90. except:
  91. pass
  92. def run(self):
  93. self.rooms = [self._fname_to_room(fpath) for fpath in self.args.files]
  94. self.load_input()
  95. self.output()
  96. for fpath in self.changed_files:
  97. print(fpath)
  98. class PelicanConverter(Converter):
  99. """
  100. add relevant output features to the base converter
  101. """
  102. def load_input(self):
  103. super().load_input()
  104. talks_meta = self.args.out_talks_dir / 'meta.yaml'
  105. with talks_meta.open() as buf:
  106. self.talks_metadata = yaml.safe_load(buf)
  107. def output_markdown(self):
  108. for uid in sorted(self.talks):
  109. talk = self.talks[uid]
  110. fname = 'meta.yaml'
  111. talkdir = self.args.out_talks_dir / uid
  112. talkdir.mkdir(exist_ok=True)
  113. fpath = talkdir / fname
  114. self.changed_files.append(fpath)
  115. day = (talk.decoded('DTSTART').date() - self.talks_metadata['startdate']).days
  116. after_midnight = talk.decoded('DTSTART').hour < self.args.night_threshold
  117. if after_midnight:
  118. day -= 1
  119. frontmatter = dict(
  120. key=uid,
  121. title=talk.decoded("SUMMARY").decode("utf8"),
  122. format="conference",
  123. start=talk.decoded("DTSTART"),
  124. time=talk.decoded("DTSTART").strftime('%H:%M'),
  125. day=day,
  126. end=talk.decoded("DTEND"),
  127. room=self.talk_location[uid],
  128. duration=int(
  129. (talk.decoded("DTEND") - talk.decoded("DTSTART")).total_seconds()
  130. // 60
  131. ),
  132. tags=[],
  133. )
  134. if "CATEGORIES" in talk:
  135. try:
  136. vobject = talk.get("CATEGORIES")
  137. if hasattr(vobject, "cats"):
  138. vobject = vobject.cats
  139. frontmatter["tags"] = [str(t) for t in vobject]
  140. else:
  141. frontmatter["tags"] = [str(vobject)]
  142. except Exception as exc:
  143. logging.warning("Error parsing categories: %s", str(exc))
  144. if "base" in frontmatter["tags"]:
  145. frontmatter["level"] = "beginner"
  146. if "DESCRIPTION" in talk:
  147. frontmatter['text'] = talk.decoded("DESCRIPTION").decode("utf8")
  148. else:
  149. frontmatter['text'] = ''
  150. with open(str(fpath), "w") as buf:
  151. yaml.safe_dump(frontmatter, buf)
  152. # body
  153. def output_schedule(self):
  154. days = {}
  155. for uid in sorted(self.talks):
  156. talk = self.talks[uid]
  157. # TODO: talk just after midnight should belong to the preceding day
  158. dt = talk.decoded("dtstart")
  159. after_midnight = dt.time().hour < self.args.night_threshold
  160. if after_midnight:
  161. dt = dt - timedelta(days=1)
  162. day = dt.strftime("%Y-%m-%d")
  163. hour = talk.decoded("dtstart").time().hour
  164. minute = talk.decoded("dtstart").time().minute
  165. if after_midnight:
  166. hour += 24
  167. start = "%02d:%02d" % (hour, minute)
  168. if day not in days:
  169. days[day] = dict(day=day, start=start, rooms={})
  170. if days[day]["start"] > start:
  171. days[day]["start"] = start
  172. room = self.talk_room[uid]
  173. days[day]["rooms"].setdefault(room, dict(room=room, slots=[]))
  174. talkstart = round_down_time('%02d%02d' % (hour, minute), self.args.slot_size)
  175. duration = talk.decoded("dtend") - talk.decoded("dtstart")
  176. duration_minutes = int(duration.total_seconds() // 60)
  177. duration_minutes = round_down(duration_minutes, self.args.slot_size)
  178. slot = "%04d-%dmin" % (talkstart, duration_minutes)
  179. days[day]["rooms"][room]["slots"].append(dict(slot=slot, talk=uid))
  180. # convert from our intermediate format to the correct one
  181. for d in sorted(days):
  182. # vanity: let's sort
  183. for room in sorted(days[d]["rooms"]):
  184. days[d]["rooms"][room]["slots"].sort(key=lambda x: x["slot"])
  185. # convert dict to list
  186. days[d]["rooms"] = [days[d]["rooms"][k] for k in sorted(days[d]["rooms"])]
  187. out = {"schedule": [days[k] for k in sorted(days)]}
  188. # XXX: dump, finally
  189. def output(self):
  190. self.output_markdown()
  191. def main():
  192. converter_register = {"pelican": PelicanConverter}
  193. args = get_parser().parse_args()
  194. c = converter_register[args.mode](args)
  195. c.run()
  196. if __name__ == "__main__":
  197. main()