zic2ics/zic2ics.py
2022-05-30 01:27:09 +02:00

216 lines
6.3 KiB
Python
Executable file

#!/usr/bin/env python3
import requests
import os
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import locale
import pytz
from icalendar import Calendar, Event, vDatetime
import re
import argparse
import glob
locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
def zic2ics(outpath):
os.makedirs('history', exist_ok=True)
page = requests.get("https://zic.it/agenda/").content
cal = html2cal(page)
ical = cal.to_ical()
try:
with open('zic.ics', 'rb') as f:
ical_last = f.read()
except:
ical_last = ""
if ical != ical_last:
try:
with open('zic_all.ics', 'rb') as f:
cal_all = Calendar.from_ical(f.read())
except:
cal_all = Calendar()
ical_all = icalmerge(cal_all, cal, True).to_ical()
with open('zic.ics', 'wb') as f:
f.write(ical)
with open('zic_all.ics', 'wb') as f:
f.write(ical_all)
with open(os.path.join('history','zic_%s.ics' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
f.write(ical)
with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
f.write(page)
if not outpath is None:
with open(os.path.join(outpath, 'zic.ics'), 'wb') as f:
f.write(ical)
with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f:
f.write(ical_all)
def zicrebuild():
cal_all = Calendar()
for html in sorted(glob.glob(os.path.join("history","zic_*.html"))):
# print(html)
with open(html, 'r') as f:
page = f.read()
cal = html2cal(page)
cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone()
cal_all = icalmerge(cal_all, cal, True, 15, cdat)
with open('zic_rebuild.ics', 'wb') as f:
f.write(cal.to_ical())
with open('zic_rebuild_all.ics', 'wb') as f:
f.write(cal_all.to_ical())
def html2cal(page):
cal = Calendar()
cal.add('prodid', '-//Agenda di zic.it//')
cal.add('version', '2.0')
soup = BeautifulSoup(page, "html.parser")
agenda = soup.find("div", class_="entry-content clearfix")
entries = agenda.find_all("p", style=None)
for entry in entries:
try:
lnk = entry.find('a', href=True)['href']
spe = entry.get_text(strip=True, separator='\n').splitlines()
tit = spe[0].strip()
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome'))
det = spe[3][2:-2].split(";")
dov = det[0].strip()
chi = det[1].strip() if 1 < len(det) else ""
# print(" ~ ".join([tit, str(dat), dov, chi]))
tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)"
uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov])
event = Event()
event.add('summary', tit)
event.add('dtstart', vDatetime(dat))
event.add('description', chi)
event.add('location', dov)
event.add('url', lnk)
event.add('uid', uid)
if any(map(tit.__contains__, ["[rinviato]","[annullato]"])):
event.add('status','CANCELLED')
cal.add_component(event)
except:
pass
return(cal)
def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None):
if cutoffdate is None:
cutoffdate = datetime.now(pytz.UTC)
cutoffdate = cutoffdate + timedelta(minutes = tolerance)
cal = Calendar()
cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8'))
cal.add('version', calnew.get('version').to_ical().decode('utf-8'))
#Add a computed UID if missing
for e in calold.walk('VEVENT'):
if e.get('UID') is None:
uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')])
e.add('uid', uid)
for e in calnew.walk('VEVENT'):
if e.get('UID') is None:
uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')])
e.add('uid', uid)
calold_uids = [e.get('UID').to_ical().decode('utf-8') for e in calold.walk('VEVENT')]
calnew_uids = [e.get('UID').to_ical().decode('utf-8') for e in calnew.walk('VEVENT')]
imported_uids = []
for e in calold.walk('VEVENT'):
uid = e.get('UID').to_ical().decode('utf-8')
if uid not in calnew_uids:
if e.get('dtstart').dt < cutoffdate:
# print("Past \"%s\"" % uid)
cal.add_component(e)
else:
if delete:
print("Deleted \"%s\"" % uid)
else:
if e.get('status') != 'CANCELLED':
print("Cancelled \"%s\"" % uid)
e['status'] = 'CANCELLED'
cal.add_component(e)
for e in calnew.walk('VEVENT'):
uid = e.get('UID').to_ical().decode('utf-8')
if uid not in calold_uids:
print("Added \"%s\"" % uid)
cal.add_component(e)
else:
# print("Updated \"%s\"" % uid)
cal.add_component(e)
return(cal)
# def cal2date(dtx):
# # Workaround to fix a bug decoding date with TZID
# # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000
# # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00
# tzx = dtx.params['TZID']
# if tzx is None:
# return dtx.dt
# else:
# print(tzx)
# return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx)))
def dir_path(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
if __name__ == "__main__":
os.chdir(os.path.dirname(__file__))
parser = argparse.ArgumentParser()
parser.add_argument("--rebuild", "-r", action='store_true')
parser.add_argument("--output", "-o", type=dir_path)
args = parser.parse_args()
if args.rebuild:
print("Rebuilding")
zicrebuild()
else:
zic2ics(args.output)