zic2ics/zic2ics.py

217 lines
6.3 KiB
Python
Raw Permalink Normal View History

2022-05-17 14:34:28 +02:00
#!/usr/bin/env python3
import requests
import os
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import locale
import pytz
2022-05-30 01:27:09 +02:00
from icalendar import Calendar, Event, vDatetime
import re
import argparse
import glob
2022-05-17 14:34:28 +02:00
2022-05-30 01:27:09 +02:00
locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
2022-05-17 14:34:28 +02:00
2022-05-30 01:27:09 +02:00
def zic2ics(outpath):
2022-05-17 14:34:28 +02:00
os.makedirs('history', exist_ok=True)
2022-05-30 01:27:09 +02:00
page = requests.get("https://zic.it/agenda/").content
2022-05-17 14:34:28 +02:00
2022-05-30 01:27:09 +02:00
cal = html2cal(page)
2022-05-17 14:34:28 +02:00
ical = cal.to_ical()
try:
with open('zic.ics', 'rb') as f:
ical_last = f.read()
except:
ical_last = ""
if ical != ical_last:
try:
with open('zic_all.ics', 'rb') as f:
cal_all = Calendar.from_ical(f.read())
except:
cal_all = Calendar()
2022-05-30 01:27:09 +02:00
ical_all = icalmerge(cal_all, cal, True).to_ical()
2022-05-17 14:34:28 +02:00
with open('zic.ics', 'wb') as f:
f.write(ical)
with open('zic_all.ics', 'wb') as f:
f.write(ical_all)
with open(os.path.join('history','zic_%s.ics' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
f.write(ical)
with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
2022-05-30 01:27:09 +02:00
f.write(page)
if not outpath is None:
with open(os.path.join(outpath, 'zic.ics'), 'wb') as f:
f.write(ical)
with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f:
f.write(ical_all)
def zicrebuild():
cal_all = Calendar()
for html in sorted(glob.glob(os.path.join("history","zic_*.html"))):
# print(html)
with open(html, 'r') as f:
page = f.read()
cal = html2cal(page)
cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone()
cal_all = icalmerge(cal_all, cal, True, 15, cdat)
with open('zic_rebuild.ics', 'wb') as f:
f.write(cal.to_ical())
with open('zic_rebuild_all.ics', 'wb') as f:
f.write(cal_all.to_ical())
2022-05-17 14:34:28 +02:00
2022-05-30 01:27:09 +02:00
def html2cal(page):
cal = Calendar()
cal.add('prodid', '-//Agenda di zic.it//')
cal.add('version', '2.0')
soup = BeautifulSoup(page, "html.parser")
agenda = soup.find("div", class_="entry-content clearfix")
entries = agenda.find_all("p", style=None)
for entry in entries:
try:
lnk = entry.find('a', href=True)['href']
spe = entry.get_text(strip=True, separator='\n').splitlines()
tit = spe[0].strip()
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome'))
det = spe[3][2:-2].split(";")
dov = det[0].strip()
chi = det[1].strip() if 1 < len(det) else ""
# print(" ~ ".join([tit, str(dat), dov, chi]))
tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)"
uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov])
event = Event()
event.add('summary', tit)
event.add('dtstart', vDatetime(dat))
event.add('description', chi)
event.add('location', dov)
event.add('url', lnk)
event.add('uid', uid)
if any(map(tit.__contains__, ["[rinviato]","[annullato]"])):
event.add('status','CANCELLED')
cal.add_component(event)
except:
pass
return(cal)
def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None):
if cutoffdate is None:
cutoffdate = datetime.now(pytz.UTC)
cutoffdate = cutoffdate + timedelta(minutes = tolerance)
2022-05-17 14:34:28 +02:00
cal = Calendar()
cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8'))
cal.add('version', calnew.get('version').to_ical().decode('utf-8'))
#Add a computed UID if missing
for e in calold.walk('VEVENT'):
if e.get('UID') is None:
uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')])
e.add('uid', uid)
for e in calnew.walk('VEVENT'):
if e.get('UID') is None:
uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')])
e.add('uid', uid)
calold_uids = [e.get('UID').to_ical().decode('utf-8') for e in calold.walk('VEVENT')]
calnew_uids = [e.get('UID').to_ical().decode('utf-8') for e in calnew.walk('VEVENT')]
imported_uids = []
for e in calold.walk('VEVENT'):
uid = e.get('UID').to_ical().decode('utf-8')
if uid not in calnew_uids:
2022-05-30 01:27:09 +02:00
if e.get('dtstart').dt < cutoffdate:
# print("Past \"%s\"" % uid)
2022-05-17 14:34:28 +02:00
cal.add_component(e)
else:
if delete:
print("Deleted \"%s\"" % uid)
else:
2022-05-30 01:27:09 +02:00
if e.get('status') != 'CANCELLED':
print("Cancelled \"%s\"" % uid)
e['status'] = 'CANCELLED'
2022-05-17 14:34:28 +02:00
cal.add_component(e)
for e in calnew.walk('VEVENT'):
uid = e.get('UID').to_ical().decode('utf-8')
if uid not in calold_uids:
print("Added \"%s\"" % uid)
cal.add_component(e)
else:
# print("Updated \"%s\"" % uid)
cal.add_component(e)
return(cal)
2022-05-30 01:27:09 +02:00
# def cal2date(dtx):
# # Workaround to fix a bug decoding date with TZID
# # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000
# # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00
# tzx = dtx.params['TZID']
# if tzx is None:
# return dtx.dt
# else:
# print(tzx)
# return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx)))
def dir_path(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
2022-05-17 14:34:28 +02:00
if __name__ == "__main__":
os.chdir(os.path.dirname(__file__))
2022-05-30 01:27:09 +02:00
parser = argparse.ArgumentParser()
parser.add_argument("--rebuild", "-r", action='store_true')
parser.add_argument("--output", "-o", type=dir_path)
args = parser.parse_args()
if args.rebuild:
print("Rebuilding")
zicrebuild()
else:
zic2ics(args.output)
2022-05-17 14:34:28 +02:00