From 300fd8f5f0637100867554412b12dd7d514c0180 Mon Sep 17 00:00:00 2001 From: itec78 Date: Mon, 30 May 2022 01:27:09 +0200 Subject: [PATCH] fixes and new features --- .gitignore | 2 + zic2ics.py | 170 ++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 123 insertions(+), 49 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2dce335 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.ics +history/ diff --git a/zic2ics.py b/zic2ics.py index 3cdb2a2..7c9da5c 100755 --- a/zic2ics.py +++ b/zic2ics.py @@ -6,52 +6,20 @@ from bs4 import BeautifulSoup from datetime import datetime, timedelta import locale import pytz -from icalendar import Calendar, Event +from icalendar import Calendar, Event, vDatetime +import re +import argparse +import glob +locale.setlocale(locale.LC_TIME,'it_IT.UTF-8') +def zic2ics(outpath): - -def zic2ics(): - locale.setlocale(locale.LC_TIME,'it_IT.UTF-8') os.makedirs('history', exist_ok=True) - cal = Calendar() - cal.add('prodid', '-//Agenda di zic.it//') - cal.add('version', '2.0') + page = requests.get("https://zic.it/agenda/").content - page = requests.get("https://zic.it/agenda/") - soup = BeautifulSoup(page.content, "html.parser") - - agenda = soup.find("div", class_="entry-content clearfix") - entries = agenda.find_all("p", style=None) - - for entry in entries: - try: - lnk = entry.find('a', href=True)['href'] - - spe = entry.get_text(strip=True, separator='\n').splitlines() - tit = spe[0].strip() - dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").replace(tzinfo=pytz.timezone('Europe/Rome')) - - det = spe[3][2:-2].split(";") - dov = det[0].strip() - chi = det[1].strip() if 1 < len(det) else "" - # print(" ~ ".join([tit, str(dat), dov, chi])) - - uid = "|".join([dat.strftime("%Y%m%d"), tit, dov]) - - event = Event() - event.add('summary', tit) - event.add('dtstart', dat) - event.add('description', chi) - event.add('location', dov) - event.add('url', lnk) - event.add('uid', uid) - cal.add_component(event) - - except: - pass - + cal = html2cal(page) ical = cal.to_ical() try: @@ -67,7 +35,7 @@ def zic2ics(): cal_all = Calendar.from_ical(f.read()) except: cal_all = Calendar() - ical_all = icalmerge(cal_all, cal).to_ical() + ical_all = icalmerge(cal_all, cal, True).to_ical() with open('zic.ics', 'wb') as f: f.write(ical) @@ -79,14 +47,89 @@ def zic2ics(): f.write(ical) with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f: - f.write(page.content) + f.write(page) + + if not outpath is None: + with open(os.path.join(outpath, 'zic.ics'), 'wb') as f: + f.write(ical) + + with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f: + f.write(ical_all) + + + +def zicrebuild(): + cal_all = Calendar() + + for html in sorted(glob.glob(os.path.join("history","zic_*.html"))): + # print(html) + with open(html, 'r') as f: + page = f.read() + + cal = html2cal(page) + cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone() + cal_all = icalmerge(cal_all, cal, True, 15, cdat) + + with open('zic_rebuild.ics', 'wb') as f: + f.write(cal.to_ical()) + + with open('zic_rebuild_all.ics', 'wb') as f: + f.write(cal_all.to_ical()) -def icalmerge(calold, calnew, delete = False, tolerance = 15): + +def html2cal(page): + cal = Calendar() + cal.add('prodid', '-//Agenda di zic.it//') + cal.add('version', '2.0') + + soup = BeautifulSoup(page, "html.parser") + + agenda = soup.find("div", class_="entry-content clearfix") + entries = agenda.find_all("p", style=None) + + for entry in entries: + try: + lnk = entry.find('a', href=True)['href'] + + spe = entry.get_text(strip=True, separator='\n').splitlines() + tit = spe[0].strip() + dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome')) + + det = spe[3][2:-2].split(";") + dov = det[0].strip() + chi = det[1].strip() if 1 < len(det) else "" + # print(" ~ ".join([tit, str(dat), dov, chi])) + + tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)" + + uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov]) + + event = Event() + event.add('summary', tit) + event.add('dtstart', vDatetime(dat)) + event.add('description', chi) + event.add('location', dov) + event.add('url', lnk) + event.add('uid', uid) + if any(map(tit.__contains__, ["[rinviato]","[annullato]"])): + event.add('status','CANCELLED') + + cal.add_component(event) + + except: + pass + + return(cal) + +def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None): + if cutoffdate is None: + cutoffdate = datetime.now(pytz.UTC) + cutoffdate = cutoffdate + timedelta(minutes = tolerance) cal = Calendar() cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8')) @@ -112,15 +155,17 @@ def icalmerge(calold, calnew, delete = False, tolerance = 15): uid = e.get('UID').to_ical().decode('utf-8') if uid not in calnew_uids: - if e.decoded('dtstart') < datetime.now(pytz.UTC) + timedelta(minutes = tolerance): - print("Past \"%s\"" % uid) + if e.get('dtstart').dt < cutoffdate: + # print("Past \"%s\"" % uid) cal.add_component(e) else: if delete: print("Deleted \"%s\"" % uid) else: - print("Cancelled \"%s\"" % uid) - e.add('status', 'CANCELLED') + if e.get('status') != 'CANCELLED': + print("Cancelled \"%s\"" % uid) + e['status'] = 'CANCELLED' + cal.add_component(e) for e in calnew.walk('VEVENT'): @@ -135,10 +180,37 @@ def icalmerge(calold, calnew, delete = False, tolerance = 15): return(cal) +# def cal2date(dtx): +# # Workaround to fix a bug decoding date with TZID +# # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000 +# # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00 + +# tzx = dtx.params['TZID'] +# if tzx is None: +# return dtx.dt +# else: +# print(tzx) +# return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx))) + +def dir_path(path): + if os.path.isdir(path): + return path + else: + raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path") + - if __name__ == "__main__": os.chdir(os.path.dirname(__file__)) - zic2ics() + + parser = argparse.ArgumentParser() + parser.add_argument("--rebuild", "-r", action='store_true') + parser.add_argument("--output", "-o", type=dir_path) + args = parser.parse_args() + + if args.rebuild: + print("Rebuilding") + zicrebuild() + else: + zic2ics(args.output)