123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- #!/usr/bin/env python3
- import requests
- import os
- from bs4 import BeautifulSoup
- from datetime import datetime, timedelta
- import locale
- import pytz
- from icalendar import Calendar, Event, vDatetime
- import re
- import argparse
- import glob
- locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
- def zic2ics(outpath):
- os.makedirs('history', exist_ok=True)
- page = requests.get("https://zic.it/agenda/").content
- cal = html2cal(page)
- ical = cal.to_ical()
- try:
- with open('zic.ics', 'rb') as f:
- ical_last = f.read()
- except:
- ical_last = ""
- if ical != ical_last:
- try:
- with open('zic_all.ics', 'rb') as f:
- cal_all = Calendar.from_ical(f.read())
- except:
- cal_all = Calendar()
- ical_all = icalmerge(cal_all, cal, True).to_ical()
- with open('zic.ics', 'wb') as f:
- f.write(ical)
- with open('zic_all.ics', 'wb') as f:
- f.write(ical_all)
- with open(os.path.join('history','zic_%s.ics' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
- f.write(ical)
- with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
- f.write(page)
- if not outpath is None:
- with open(os.path.join(outpath, 'zic.ics'), 'wb') as f:
- f.write(ical)
- with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f:
- f.write(ical_all)
- def zicrebuild():
- cal_all = Calendar()
- for html in sorted(glob.glob(os.path.join("history","zic_*.html"))):
- # print(html)
- with open(html, 'r') as f:
- page = f.read()
- cal = html2cal(page)
- cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone()
- cal_all = icalmerge(cal_all, cal, True, 15, cdat)
-
- with open('zic_rebuild.ics', 'wb') as f:
- f.write(cal.to_ical())
- with open('zic_rebuild_all.ics', 'wb') as f:
- f.write(cal_all.to_ical())
- def html2cal(page):
- cal = Calendar()
- cal.add('prodid', '-//Agenda di zic.it//')
- cal.add('version', '2.0')
- soup = BeautifulSoup(page, "html.parser")
- agenda = soup.find("div", class_="entry-content clearfix")
- entries = agenda.find_all("p", style=None)
- for entry in entries:
- try:
- lnk = entry.find('a', href=True)['href']
- spe = entry.get_text(strip=True, separator='\n').splitlines()
- tit = spe[0].strip()
- dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome'))
-
- det = spe[3][2:-2].split(";")
- dov = det[0].strip()
- chi = det[1].strip() if 1 < len(det) else ""
- # print(" ~ ".join([tit, str(dat), dov, chi]))
- tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)"
-
- uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov])
- event = Event()
- event.add('summary', tit)
- event.add('dtstart', vDatetime(dat))
- event.add('description', chi)
- event.add('location', dov)
- event.add('url', lnk)
- event.add('uid', uid)
- if any(map(tit.__contains__, ["[rinviato]","[annullato]"])):
- event.add('status','CANCELLED')
- cal.add_component(event)
-
- except:
- pass
-
- return(cal)
- def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None):
- if cutoffdate is None:
- cutoffdate = datetime.now(pytz.UTC)
- cutoffdate = cutoffdate + timedelta(minutes = tolerance)
- cal = Calendar()
- cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8'))
- cal.add('version', calnew.get('version').to_ical().decode('utf-8'))
- #Add a computed UID if missing
- for e in calold.walk('VEVENT'):
- if e.get('UID') is None:
- uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')])
- e.add('uid', uid)
- for e in calnew.walk('VEVENT'):
- if e.get('UID') is None:
- uid = "|".join([e.decoded('dtstart').strftime("%Y%m%d"), e.get('Summary').to_ical().decode('utf-8'), e.get('Location').to_ical().decode('utf-8')])
- e.add('uid', uid)
- calold_uids = [e.get('UID').to_ical().decode('utf-8') for e in calold.walk('VEVENT')]
- calnew_uids = [e.get('UID').to_ical().decode('utf-8') for e in calnew.walk('VEVENT')]
- imported_uids = []
- for e in calold.walk('VEVENT'):
- uid = e.get('UID').to_ical().decode('utf-8')
- if uid not in calnew_uids:
- if e.get('dtstart').dt < cutoffdate:
- # print("Past \"%s\"" % uid)
- cal.add_component(e)
- else:
- if delete:
- print("Deleted \"%s\"" % uid)
- else:
- if e.get('status') != 'CANCELLED':
- print("Cancelled \"%s\"" % uid)
- e['status'] = 'CANCELLED'
- cal.add_component(e)
- for e in calnew.walk('VEVENT'):
- uid = e.get('UID').to_ical().decode('utf-8')
- if uid not in calold_uids:
- print("Added \"%s\"" % uid)
- cal.add_component(e)
- else:
- # print("Updated \"%s\"" % uid)
- cal.add_component(e)
- return(cal)
- # def cal2date(dtx):
- # # Workaround to fix a bug decoding date with TZID
- # # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000
- # # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00
- # tzx = dtx.params['TZID']
- # if tzx is None:
- # return dtx.dt
- # else:
- # print(tzx)
- # return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx)))
- def dir_path(path):
- if os.path.isdir(path):
- return path
- else:
- raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
- if __name__ == "__main__":
- os.chdir(os.path.dirname(__file__))
- parser = argparse.ArgumentParser()
- parser.add_argument("--rebuild", "-r", action='store_true')
- parser.add_argument("--output", "-o", type=dir_path)
- args = parser.parse_args()
-
- if args.rebuild:
- print("Rebuilding")
- zicrebuild()
- else:
- zic2ics(args.output)
|