#!/usr/bin/env python3 import requests import os from bs4 import BeautifulSoup from datetime import datetime, timedelta import locale import pytz from icalendar import Calendar, Event, vDatetime import re import argparse import glob locale.setlocale(locale.LC_TIME,'it_IT.UTF-8') def zic2ics(outpath): os.makedirs('history', exist_ok=True) page = requests.get("https://zic.it/agenda/").content cal = html2cal(page) ical = cal.to_ical() try: with open('zic.ics', 'rb') as f: ical_last = f.read() except: ical_last = "" if ical != ical_last: try: with open('zic_all.ics', 'rb') as f: cal_all = Calendar.from_ical(f.read()) except: cal_all = Calendar() ical_all = icalmerge(cal_all, cal, True).to_ical() with open('zic.ics', 'wb') as f: f.write(ical) with open('zic_all.ics', 'wb') as f: f.write(ical_all) with open(os.path.join('history','zic_%s.ics' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f: f.write(ical) with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f: f.write(page) if not outpath is None: with open(os.path.join(outpath, 'zic.ics'), 'wb') as f: f.write(ical) with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f: f.write(ical_all) def zicrebuild(): cal_all = Calendar() for html in sorted(glob.glob(os.path.join("history","zic_*.html"))): # print(html) with open(html, 'r') as f: page = f.read() cal = html2cal(page) cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone() cal_all = icalmerge(cal_all, cal, True, 15, cdat) with open('zic_rebuild.ics', 'wb') as f: f.write(cal.to_ical()) with open('zic_rebuild_all.ics', 'wb') as f: f.write(cal_all.to_ical()) def html2cal(page): cal = Calendar() cal.add('prodid', '-//Agenda di zic.it//') cal.add('version', '2.0') soup = BeautifulSoup(page, "html.parser") agenda = soup.find("div", class_="entry-content clearfix") entries = agenda.find_all("p", style=None) for entry in entries: try: lnk = entry.find('a', href=True)['href'] spe = entry.get_text(strip=True, separator='\n').splitlines() tit = spe[0].strip() dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome')) det = spe[3][2:-2].split(";") dov = det[0].strip() chi = det[1].strip() if 1 < len(det) else "" # print(" ~ ".join([tit, str(dat), dov, chi])) tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)" uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov]) event = Event() event.add('summary', tit) event.add('dtstart', vDatetime(dat)) event.add('description', chi) event.add('location', dov) event.add('url', lnk) event.add('uid', uid) if any(map(tit.__contains__, ["[rinviato]","[annullato]"])): event.add('status','CANCELLED') cal.add_component(event) except: pass return(cal) def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None): if cutoffdate is None: cutoffdate = datetime.now(pytz.UTC) cutoffdate = cutoffdate + timedelta(minutes = tolerance) cal = Calendar() cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8')) cal.add('version', calnew.get('version').to_ical().decode('utf-8')) #Add a computed UID if missing for e in calold.walk('VEVENT'): if e.get('UID') is None: uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')]) e.add('uid', uid) for e in calnew.walk('VEVENT'): if e.get('UID') is None: uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')]) e.add('uid', uid) calold_uids = [e.get('UID').to_ical().decode('utf-8') for e in calold.walk('VEVENT')] calnew_uids = [e.get('UID').to_ical().decode('utf-8') for e in calnew.walk('VEVENT')] imported_uids = [] for e in calold.walk('VEVENT'): uid = e.get('UID').to_ical().decode('utf-8') if uid not in calnew_uids: if e.get('dtstart').dt < cutoffdate: # print("Past \"%s\"" % uid) cal.add_component(e) else: if delete: print("Deleted \"%s\"" % uid) else: if e.get('status') != 'CANCELLED': print("Cancelled \"%s\"" % uid) e['status'] = 'CANCELLED' cal.add_component(e) for e in calnew.walk('VEVENT'): uid = e.get('UID').to_ical().decode('utf-8') if uid not in calold_uids: print("Added \"%s\"" % uid) cal.add_component(e) else: # print("Updated \"%s\"" % uid) cal.add_component(e) return(cal) # def cal2date(dtx): # # Workaround to fix a bug decoding date with TZID # # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000 # # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00 # tzx = dtx.params['TZID'] # if tzx is None: # return dtx.dt # else: # print(tzx) # return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx))) def dir_path(path): if os.path.isdir(path): return path else: raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path") if __name__ == "__main__": os.chdir(os.path.dirname(__file__)) parser = argparse.ArgumentParser() parser.add_argument("--rebuild", "-r", action='store_true') parser.add_argument("--output", "-o", type=dir_path) args = parser.parse_args() if args.rebuild: print("Rebuilding") zicrebuild() else: zic2ics(args.output)