2022-05-17 14:34:28 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import requests
|
|
|
|
import os
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
import locale
|
|
|
|
import pytz
|
2022-05-30 01:27:09 +02:00
|
|
|
from icalendar import Calendar, Event, vDatetime
|
|
|
|
import re
|
|
|
|
import argparse
|
|
|
|
import glob
|
2022-05-17 14:34:28 +02:00
|
|
|
|
2022-05-30 01:27:09 +02:00
|
|
|
locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
|
2022-05-17 14:34:28 +02:00
|
|
|
|
2022-05-30 01:27:09 +02:00
|
|
|
def zic2ics(outpath):
|
2022-05-17 14:34:28 +02:00
|
|
|
|
|
|
|
os.makedirs('history', exist_ok=True)
|
|
|
|
|
2022-05-30 01:27:09 +02:00
|
|
|
page = requests.get("https://zic.it/agenda/").content
|
2022-05-17 14:34:28 +02:00
|
|
|
|
2022-05-30 01:27:09 +02:00
|
|
|
cal = html2cal(page)
|
2022-05-17 14:34:28 +02:00
|
|
|
ical = cal.to_ical()
|
|
|
|
|
|
|
|
try:
|
|
|
|
with open('zic.ics', 'rb') as f:
|
|
|
|
ical_last = f.read()
|
|
|
|
except:
|
|
|
|
ical_last = ""
|
|
|
|
|
|
|
|
if ical != ical_last:
|
|
|
|
|
|
|
|
try:
|
|
|
|
with open('zic_all.ics', 'rb') as f:
|
|
|
|
cal_all = Calendar.from_ical(f.read())
|
|
|
|
except:
|
|
|
|
cal_all = Calendar()
|
2022-05-30 01:27:09 +02:00
|
|
|
ical_all = icalmerge(cal_all, cal, True).to_ical()
|
2022-05-17 14:34:28 +02:00
|
|
|
|
|
|
|
with open('zic.ics', 'wb') as f:
|
|
|
|
f.write(ical)
|
|
|
|
|
|
|
|
with open('zic_all.ics', 'wb') as f:
|
|
|
|
f.write(ical_all)
|
|
|
|
|
|
|
|
with open(os.path.join('history','zic_%s.ics' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
|
|
|
|
f.write(ical)
|
|
|
|
|
|
|
|
with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
|
2022-05-30 01:27:09 +02:00
|
|
|
f.write(page)
|
|
|
|
|
|
|
|
if not outpath is None:
|
|
|
|
with open(os.path.join(outpath, 'zic.ics'), 'wb') as f:
|
|
|
|
f.write(ical)
|
|
|
|
|
|
|
|
with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f:
|
|
|
|
f.write(ical_all)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def zicrebuild():
|
|
|
|
cal_all = Calendar()
|
|
|
|
|
|
|
|
for html in sorted(glob.glob(os.path.join("history","zic_*.html"))):
|
|
|
|
# print(html)
|
|
|
|
with open(html, 'r') as f:
|
|
|
|
page = f.read()
|
|
|
|
|
|
|
|
cal = html2cal(page)
|
|
|
|
cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone()
|
|
|
|
cal_all = icalmerge(cal_all, cal, True, 15, cdat)
|
|
|
|
|
|
|
|
with open('zic_rebuild.ics', 'wb') as f:
|
|
|
|
f.write(cal.to_ical())
|
|
|
|
|
|
|
|
with open('zic_rebuild_all.ics', 'wb') as f:
|
|
|
|
f.write(cal_all.to_ical())
|
|
|
|
|
2022-05-17 14:34:28 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2022-05-30 01:27:09 +02:00
|
|
|
def html2cal(page):
|
|
|
|
cal = Calendar()
|
|
|
|
cal.add('prodid', '-//Agenda di zic.it//')
|
|
|
|
cal.add('version', '2.0')
|
|
|
|
|
|
|
|
soup = BeautifulSoup(page, "html.parser")
|
|
|
|
|
|
|
|
agenda = soup.find("div", class_="entry-content clearfix")
|
|
|
|
entries = agenda.find_all("p", style=None)
|
|
|
|
|
|
|
|
for entry in entries:
|
|
|
|
try:
|
|
|
|
lnk = entry.find('a', href=True)['href']
|
|
|
|
|
|
|
|
spe = entry.get_text(strip=True, separator='\n').splitlines()
|
|
|
|
tit = spe[0].strip()
|
|
|
|
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome'))
|
|
|
|
|
|
|
|
det = spe[3][2:-2].split(";")
|
|
|
|
dov = det[0].strip()
|
|
|
|
chi = det[1].strip() if 1 < len(det) else ""
|
|
|
|
# print(" ~ ".join([tit, str(dat), dov, chi]))
|
|
|
|
|
|
|
|
tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)"
|
|
|
|
|
|
|
|
uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov])
|
|
|
|
|
|
|
|
event = Event()
|
|
|
|
event.add('summary', tit)
|
|
|
|
event.add('dtstart', vDatetime(dat))
|
|
|
|
event.add('description', chi)
|
|
|
|
event.add('location', dov)
|
|
|
|
event.add('url', lnk)
|
|
|
|
event.add('uid', uid)
|
|
|
|
if any(map(tit.__contains__, ["[rinviato]","[annullato]"])):
|
|
|
|
event.add('status','CANCELLED')
|
|
|
|
|
|
|
|
cal.add_component(event)
|
|
|
|
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
return(cal)
|
|
|
|
|
|
|
|
def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None):
|
|
|
|
if cutoffdate is None:
|
|
|
|
cutoffdate = datetime.now(pytz.UTC)
|
|
|
|
cutoffdate = cutoffdate + timedelta(minutes = tolerance)
|
2022-05-17 14:34:28 +02:00
|
|
|
|
|
|
|
cal = Calendar()
|
|
|
|
cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8'))
|
|
|
|
cal.add('version', calnew.get('version').to_ical().decode('utf-8'))
|
|
|
|
|
|
|
|
|
|
|
|
#Add a computed UID if missing
|
|
|
|
for e in calold.walk('VEVENT'):
|
|
|
|
if e.get('UID') is None:
|
|
|
|
uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')])
|
|
|
|
e.add('uid', uid)
|
|
|
|
|
|
|
|
for e in calnew.walk('VEVENT'):
|
|
|
|
if e.get('UID') is None:
|
|
|
|
uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')])
|
|
|
|
e.add('uid', uid)
|
|
|
|
|
|
|
|
calold_uids = [e.get('UID').to_ical().decode('utf-8') for e in calold.walk('VEVENT')]
|
|
|
|
calnew_uids = [e.get('UID').to_ical().decode('utf-8') for e in calnew.walk('VEVENT')]
|
|
|
|
|
|
|
|
imported_uids = []
|
|
|
|
for e in calold.walk('VEVENT'):
|
|
|
|
uid = e.get('UID').to_ical().decode('utf-8')
|
|
|
|
|
|
|
|
if uid not in calnew_uids:
|
2022-05-30 01:27:09 +02:00
|
|
|
if e.get('dtstart').dt < cutoffdate:
|
|
|
|
# print("Past \"%s\"" % uid)
|
2022-05-17 14:34:28 +02:00
|
|
|
cal.add_component(e)
|
|
|
|
else:
|
|
|
|
if delete:
|
|
|
|
print("Deleted \"%s\"" % uid)
|
|
|
|
else:
|
2022-05-30 01:27:09 +02:00
|
|
|
if e.get('status') != 'CANCELLED':
|
|
|
|
print("Cancelled \"%s\"" % uid)
|
|
|
|
e['status'] = 'CANCELLED'
|
|
|
|
|
2022-05-17 14:34:28 +02:00
|
|
|
cal.add_component(e)
|
|
|
|
|
|
|
|
for e in calnew.walk('VEVENT'):
|
|
|
|
uid = e.get('UID').to_ical().decode('utf-8')
|
|
|
|
if uid not in calold_uids:
|
|
|
|
print("Added \"%s\"" % uid)
|
|
|
|
cal.add_component(e)
|
|
|
|
else:
|
|
|
|
# print("Updated \"%s\"" % uid)
|
|
|
|
cal.add_component(e)
|
|
|
|
|
|
|
|
return(cal)
|
|
|
|
|
|
|
|
|
2022-05-30 01:27:09 +02:00
|
|
|
# def cal2date(dtx):
|
|
|
|
# # Workaround to fix a bug decoding date with TZID
|
|
|
|
# # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000
|
|
|
|
# # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00
|
|
|
|
|
|
|
|
# tzx = dtx.params['TZID']
|
|
|
|
# if tzx is None:
|
|
|
|
# return dtx.dt
|
|
|
|
# else:
|
|
|
|
# print(tzx)
|
|
|
|
# return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx)))
|
|
|
|
|
|
|
|
def dir_path(path):
|
|
|
|
if os.path.isdir(path):
|
|
|
|
return path
|
|
|
|
else:
|
|
|
|
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
|
|
|
|
|
2022-05-17 14:34:28 +02:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
os.chdir(os.path.dirname(__file__))
|
2022-05-30 01:27:09 +02:00
|
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("--rebuild", "-r", action='store_true')
|
|
|
|
parser.add_argument("--output", "-o", type=dir_path)
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if args.rebuild:
|
|
|
|
print("Rebuilding")
|
|
|
|
zicrebuild()
|
|
|
|
else:
|
|
|
|
zic2ics(args.output)
|
2022-05-17 14:34:28 +02:00
|
|
|
|