fixes and new features

This commit is contained in:
itec78 2022-05-30 01:27:09 +02:00
parent 81b8336f57
commit 300fd8f5f0
2 changed files with 123 additions and 49 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
*.ics
history/

View file

@ -6,52 +6,20 @@ from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import locale
import pytz
from icalendar import Calendar, Event
from icalendar import Calendar, Event, vDatetime
import re
import argparse
import glob
locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
def zic2ics(outpath):
def zic2ics():
locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
os.makedirs('history', exist_ok=True)
cal = Calendar()
cal.add('prodid', '-//Agenda di zic.it//')
cal.add('version', '2.0')
page = requests.get("https://zic.it/agenda/").content
page = requests.get("https://zic.it/agenda/")
soup = BeautifulSoup(page.content, "html.parser")
agenda = soup.find("div", class_="entry-content clearfix")
entries = agenda.find_all("p", style=None)
for entry in entries:
try:
lnk = entry.find('a', href=True)['href']
spe = entry.get_text(strip=True, separator='\n').splitlines()
tit = spe[0].strip()
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").replace(tzinfo=pytz.timezone('Europe/Rome'))
det = spe[3][2:-2].split(";")
dov = det[0].strip()
chi = det[1].strip() if 1 < len(det) else ""
# print(" ~ ".join([tit, str(dat), dov, chi]))
uid = "|".join([dat.strftime("%Y%m%d"), tit, dov])
event = Event()
event.add('summary', tit)
event.add('dtstart', dat)
event.add('description', chi)
event.add('location', dov)
event.add('url', lnk)
event.add('uid', uid)
cal.add_component(event)
except:
pass
cal = html2cal(page)
ical = cal.to_ical()
try:
@ -67,7 +35,7 @@ def zic2ics():
cal_all = Calendar.from_ical(f.read())
except:
cal_all = Calendar()
ical_all = icalmerge(cal_all, cal).to_ical()
ical_all = icalmerge(cal_all, cal, True).to_ical()
with open('zic.ics', 'wb') as f:
f.write(ical)
@ -79,14 +47,89 @@ def zic2ics():
f.write(ical)
with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
f.write(page.content)
f.write(page)
if not outpath is None:
with open(os.path.join(outpath, 'zic.ics'), 'wb') as f:
f.write(ical)
with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f:
f.write(ical_all)
def zicrebuild():
cal_all = Calendar()
for html in sorted(glob.glob(os.path.join("history","zic_*.html"))):
# print(html)
with open(html, 'r') as f:
page = f.read()
cal = html2cal(page)
cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone()
cal_all = icalmerge(cal_all, cal, True, 15, cdat)
with open('zic_rebuild.ics', 'wb') as f:
f.write(cal.to_ical())
with open('zic_rebuild_all.ics', 'wb') as f:
f.write(cal_all.to_ical())
def icalmerge(calold, calnew, delete = False, tolerance = 15):
def html2cal(page):
cal = Calendar()
cal.add('prodid', '-//Agenda di zic.it//')
cal.add('version', '2.0')
soup = BeautifulSoup(page, "html.parser")
agenda = soup.find("div", class_="entry-content clearfix")
entries = agenda.find_all("p", style=None)
for entry in entries:
try:
lnk = entry.find('a', href=True)['href']
spe = entry.get_text(strip=True, separator='\n').splitlines()
tit = spe[0].strip()
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome'))
det = spe[3][2:-2].split(";")
dov = det[0].strip()
chi = det[1].strip() if 1 < len(det) else ""
# print(" ~ ".join([tit, str(dat), dov, chi]))
tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)"
uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov])
event = Event()
event.add('summary', tit)
event.add('dtstart', vDatetime(dat))
event.add('description', chi)
event.add('location', dov)
event.add('url', lnk)
event.add('uid', uid)
if any(map(tit.__contains__, ["[rinviato]","[annullato]"])):
event.add('status','CANCELLED')
cal.add_component(event)
except:
pass
return(cal)
def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None):
if cutoffdate is None:
cutoffdate = datetime.now(pytz.UTC)
cutoffdate = cutoffdate + timedelta(minutes = tolerance)
cal = Calendar()
cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8'))
@ -112,15 +155,17 @@ def icalmerge(calold, calnew, delete = False, tolerance = 15):
uid = e.get('UID').to_ical().decode('utf-8')
if uid not in calnew_uids:
if e.decoded('dtstart') < datetime.now(pytz.UTC) + timedelta(minutes = tolerance):
print("Past \"%s\"" % uid)
if e.get('dtstart').dt < cutoffdate:
# print("Past \"%s\"" % uid)
cal.add_component(e)
else:
if delete:
print("Deleted \"%s\"" % uid)
else:
print("Cancelled \"%s\"" % uid)
e.add('status', 'CANCELLED')
if e.get('status') != 'CANCELLED':
print("Cancelled \"%s\"" % uid)
e['status'] = 'CANCELLED'
cal.add_component(e)
for e in calnew.walk('VEVENT'):
@ -135,10 +180,37 @@ def icalmerge(calold, calnew, delete = False, tolerance = 15):
return(cal)
# def cal2date(dtx):
# # Workaround to fix a bug decoding date with TZID
# # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000
# # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00
# tzx = dtx.params['TZID']
# if tzx is None:
# return dtx.dt
# else:
# print(tzx)
# return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx)))
def dir_path(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
if __name__ == "__main__":
os.chdir(os.path.dirname(__file__))
zic2ics()
parser = argparse.ArgumentParser()
parser.add_argument("--rebuild", "-r", action='store_true')
parser.add_argument("--output", "-o", type=dir_path)
args = parser.parse_args()
if args.rebuild:
print("Rebuilding")
zicrebuild()
else:
zic2ics(args.output)