fixes and new features

This commit is contained in:
itec78 2022-05-30 01:27:09 +02:00
parent 81b8336f57
commit 300fd8f5f0
2 changed files with 123 additions and 49 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
*.ics
history/

View file

@ -6,52 +6,20 @@ from bs4 import BeautifulSoup
from datetime import datetime, timedelta from datetime import datetime, timedelta
import locale import locale
import pytz import pytz
from icalendar import Calendar, Event from icalendar import Calendar, Event, vDatetime
import re
import argparse
import glob
locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
def zic2ics(outpath):
def zic2ics():
locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
os.makedirs('history', exist_ok=True) os.makedirs('history', exist_ok=True)
cal = Calendar() page = requests.get("https://zic.it/agenda/").content
cal.add('prodid', '-//Agenda di zic.it//')
cal.add('version', '2.0')
page = requests.get("https://zic.it/agenda/") cal = html2cal(page)
soup = BeautifulSoup(page.content, "html.parser")
agenda = soup.find("div", class_="entry-content clearfix")
entries = agenda.find_all("p", style=None)
for entry in entries:
try:
lnk = entry.find('a', href=True)['href']
spe = entry.get_text(strip=True, separator='\n').splitlines()
tit = spe[0].strip()
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").replace(tzinfo=pytz.timezone('Europe/Rome'))
det = spe[3][2:-2].split(";")
dov = det[0].strip()
chi = det[1].strip() if 1 < len(det) else ""
# print(" ~ ".join([tit, str(dat), dov, chi]))
uid = "|".join([dat.strftime("%Y%m%d"), tit, dov])
event = Event()
event.add('summary', tit)
event.add('dtstart', dat)
event.add('description', chi)
event.add('location', dov)
event.add('url', lnk)
event.add('uid', uid)
cal.add_component(event)
except:
pass
ical = cal.to_ical() ical = cal.to_ical()
try: try:
@ -67,7 +35,7 @@ def zic2ics():
cal_all = Calendar.from_ical(f.read()) cal_all = Calendar.from_ical(f.read())
except: except:
cal_all = Calendar() cal_all = Calendar()
ical_all = icalmerge(cal_all, cal).to_ical() ical_all = icalmerge(cal_all, cal, True).to_ical()
with open('zic.ics', 'wb') as f: with open('zic.ics', 'wb') as f:
f.write(ical) f.write(ical)
@ -79,14 +47,89 @@ def zic2ics():
f.write(ical) f.write(ical)
with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f: with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
f.write(page.content) f.write(page)
if not outpath is None:
with open(os.path.join(outpath, 'zic.ics'), 'wb') as f:
f.write(ical)
with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f:
f.write(ical_all)
def zicrebuild():
cal_all = Calendar()
for html in sorted(glob.glob(os.path.join("history","zic_*.html"))):
# print(html)
with open(html, 'r') as f:
page = f.read()
cal = html2cal(page)
cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone()
cal_all = icalmerge(cal_all, cal, True, 15, cdat)
with open('zic_rebuild.ics', 'wb') as f:
f.write(cal.to_ical())
with open('zic_rebuild_all.ics', 'wb') as f:
f.write(cal_all.to_ical())
def icalmerge(calold, calnew, delete = False, tolerance = 15):
def html2cal(page):
cal = Calendar()
cal.add('prodid', '-//Agenda di zic.it//')
cal.add('version', '2.0')
soup = BeautifulSoup(page, "html.parser")
agenda = soup.find("div", class_="entry-content clearfix")
entries = agenda.find_all("p", style=None)
for entry in entries:
try:
lnk = entry.find('a', href=True)['href']
spe = entry.get_text(strip=True, separator='\n').splitlines()
tit = spe[0].strip()
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome'))
det = spe[3][2:-2].split(";")
dov = det[0].strip()
chi = det[1].strip() if 1 < len(det) else ""
# print(" ~ ".join([tit, str(dat), dov, chi]))
tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)"
uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov])
event = Event()
event.add('summary', tit)
event.add('dtstart', vDatetime(dat))
event.add('description', chi)
event.add('location', dov)
event.add('url', lnk)
event.add('uid', uid)
if any(map(tit.__contains__, ["[rinviato]","[annullato]"])):
event.add('status','CANCELLED')
cal.add_component(event)
except:
pass
return(cal)
def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None):
if cutoffdate is None:
cutoffdate = datetime.now(pytz.UTC)
cutoffdate = cutoffdate + timedelta(minutes = tolerance)
cal = Calendar() cal = Calendar()
cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8')) cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8'))
@ -112,15 +155,17 @@ def icalmerge(calold, calnew, delete = False, tolerance = 15):
uid = e.get('UID').to_ical().decode('utf-8') uid = e.get('UID').to_ical().decode('utf-8')
if uid not in calnew_uids: if uid not in calnew_uids:
if e.decoded('dtstart') < datetime.now(pytz.UTC) + timedelta(minutes = tolerance): if e.get('dtstart').dt < cutoffdate:
print("Past \"%s\"" % uid) # print("Past \"%s\"" % uid)
cal.add_component(e) cal.add_component(e)
else: else:
if delete: if delete:
print("Deleted \"%s\"" % uid) print("Deleted \"%s\"" % uid)
else: else:
print("Cancelled \"%s\"" % uid) if e.get('status') != 'CANCELLED':
e.add('status', 'CANCELLED') print("Cancelled \"%s\"" % uid)
e['status'] = 'CANCELLED'
cal.add_component(e) cal.add_component(e)
for e in calnew.walk('VEVENT'): for e in calnew.walk('VEVENT'):
@ -135,10 +180,37 @@ def icalmerge(calold, calnew, delete = False, tolerance = 15):
return(cal) return(cal)
# def cal2date(dtx):
# # Workaround to fix a bug decoding date with TZID
# # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000
# # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00
# tzx = dtx.params['TZID']
# if tzx is None:
# return dtx.dt
# else:
# print(tzx)
# return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx)))
def dir_path(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
if __name__ == "__main__": if __name__ == "__main__":
os.chdir(os.path.dirname(__file__)) os.chdir(os.path.dirname(__file__))
zic2ics()
parser = argparse.ArgumentParser()
parser.add_argument("--rebuild", "-r", action='store_true')
parser.add_argument("--output", "-o", type=dir_path)
args = parser.parse_args()
if args.rebuild:
print("Rebuilding")
zicrebuild()
else:
zic2ics(args.output)