fixes and new features
This commit is contained in:
parent
81b8336f57
commit
300fd8f5f0
2 changed files with 123 additions and 49 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
*.ics
|
||||
history/
|
168
zic2ics.py
168
zic2ics.py
|
@ -6,52 +6,20 @@ from bs4 import BeautifulSoup
|
|||
from datetime import datetime, timedelta
|
||||
import locale
|
||||
import pytz
|
||||
from icalendar import Calendar, Event
|
||||
from icalendar import Calendar, Event, vDatetime
|
||||
import re
|
||||
import argparse
|
||||
import glob
|
||||
|
||||
|
||||
|
||||
|
||||
def zic2ics():
|
||||
locale.setlocale(locale.LC_TIME,'it_IT.UTF-8')
|
||||
|
||||
def zic2ics(outpath):
|
||||
|
||||
os.makedirs('history', exist_ok=True)
|
||||
|
||||
cal = Calendar()
|
||||
cal.add('prodid', '-//Agenda di zic.it//')
|
||||
cal.add('version', '2.0')
|
||||
|
||||
page = requests.get("https://zic.it/agenda/")
|
||||
soup = BeautifulSoup(page.content, "html.parser")
|
||||
|
||||
agenda = soup.find("div", class_="entry-content clearfix")
|
||||
entries = agenda.find_all("p", style=None)
|
||||
|
||||
for entry in entries:
|
||||
try:
|
||||
lnk = entry.find('a', href=True)['href']
|
||||
|
||||
spe = entry.get_text(strip=True, separator='\n').splitlines()
|
||||
tit = spe[0].strip()
|
||||
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").replace(tzinfo=pytz.timezone('Europe/Rome'))
|
||||
|
||||
det = spe[3][2:-2].split(";")
|
||||
dov = det[0].strip()
|
||||
chi = det[1].strip() if 1 < len(det) else ""
|
||||
# print(" ~ ".join([tit, str(dat), dov, chi]))
|
||||
|
||||
uid = "|".join([dat.strftime("%Y%m%d"), tit, dov])
|
||||
|
||||
event = Event()
|
||||
event.add('summary', tit)
|
||||
event.add('dtstart', dat)
|
||||
event.add('description', chi)
|
||||
event.add('location', dov)
|
||||
event.add('url', lnk)
|
||||
event.add('uid', uid)
|
||||
cal.add_component(event)
|
||||
|
||||
except:
|
||||
pass
|
||||
page = requests.get("https://zic.it/agenda/").content
|
||||
|
||||
cal = html2cal(page)
|
||||
ical = cal.to_ical()
|
||||
|
||||
try:
|
||||
|
@ -67,7 +35,7 @@ def zic2ics():
|
|||
cal_all = Calendar.from_ical(f.read())
|
||||
except:
|
||||
cal_all = Calendar()
|
||||
ical_all = icalmerge(cal_all, cal).to_ical()
|
||||
ical_all = icalmerge(cal_all, cal, True).to_ical()
|
||||
|
||||
with open('zic.ics', 'wb') as f:
|
||||
f.write(ical)
|
||||
|
@ -79,14 +47,89 @@ def zic2ics():
|
|||
f.write(ical)
|
||||
|
||||
with open(os.path.join('history','zic_%s.html' % datetime.now().strftime("%Y%m%d%H%M%S")), 'wb') as f:
|
||||
f.write(page.content)
|
||||
f.write(page)
|
||||
|
||||
if not outpath is None:
|
||||
with open(os.path.join(outpath, 'zic.ics'), 'wb') as f:
|
||||
f.write(ical)
|
||||
|
||||
with open(os.path.join(outpath, 'zic_all.ics'), 'wb') as f:
|
||||
f.write(ical_all)
|
||||
|
||||
|
||||
|
||||
def zicrebuild():
|
||||
cal_all = Calendar()
|
||||
|
||||
for html in sorted(glob.glob(os.path.join("history","zic_*.html"))):
|
||||
# print(html)
|
||||
with open(html, 'r') as f:
|
||||
page = f.read()
|
||||
|
||||
cal = html2cal(page)
|
||||
cdat = datetime.strptime(os.path.basename(html).replace("zic_","").replace(".html",""),"%Y%m%d%H%M%S").astimezone()
|
||||
cal_all = icalmerge(cal_all, cal, True, 15, cdat)
|
||||
|
||||
with open('zic_rebuild.ics', 'wb') as f:
|
||||
f.write(cal.to_ical())
|
||||
|
||||
with open('zic_rebuild_all.ics', 'wb') as f:
|
||||
f.write(cal_all.to_ical())
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def icalmerge(calold, calnew, delete = False, tolerance = 15):
|
||||
|
||||
def html2cal(page):
|
||||
cal = Calendar()
|
||||
cal.add('prodid', '-//Agenda di zic.it//')
|
||||
cal.add('version', '2.0')
|
||||
|
||||
soup = BeautifulSoup(page, "html.parser")
|
||||
|
||||
agenda = soup.find("div", class_="entry-content clearfix")
|
||||
entries = agenda.find_all("p", style=None)
|
||||
|
||||
for entry in entries:
|
||||
try:
|
||||
lnk = entry.find('a', href=True)['href']
|
||||
|
||||
spe = entry.get_text(strip=True, separator='\n').splitlines()
|
||||
tit = spe[0].strip()
|
||||
dat = datetime.strptime(spe[2], "%A %d %B %Y - %H:%M").astimezone(tz=pytz.timezone('Europe/Rome'))
|
||||
|
||||
det = spe[3][2:-2].split(";")
|
||||
dov = det[0].strip()
|
||||
chi = det[1].strip() if 1 < len(det) else ""
|
||||
# print(" ~ ".join([tit, str(dat), dov, chi]))
|
||||
|
||||
tituid = re.sub("[\(\[].*?[\)\]]","",tit).strip() # "\(.*?\)"
|
||||
|
||||
uid = "|".join([dat.strftime("%Y%m%d"), tituid, dov])
|
||||
|
||||
event = Event()
|
||||
event.add('summary', tit)
|
||||
event.add('dtstart', vDatetime(dat))
|
||||
event.add('description', chi)
|
||||
event.add('location', dov)
|
||||
event.add('url', lnk)
|
||||
event.add('uid', uid)
|
||||
if any(map(tit.__contains__, ["[rinviato]","[annullato]"])):
|
||||
event.add('status','CANCELLED')
|
||||
|
||||
cal.add_component(event)
|
||||
|
||||
except:
|
||||
pass
|
||||
|
||||
return(cal)
|
||||
|
||||
def icalmerge(calold, calnew, delete = False, tolerance = 15, cutoffdate = None):
|
||||
if cutoffdate is None:
|
||||
cutoffdate = datetime.now(pytz.UTC)
|
||||
cutoffdate = cutoffdate + timedelta(minutes = tolerance)
|
||||
|
||||
cal = Calendar()
|
||||
cal.add('prodid', calnew.get('prodid').to_ical().decode('utf-8'))
|
||||
|
@ -112,15 +155,17 @@ def icalmerge(calold, calnew, delete = False, tolerance = 15):
|
|||
uid = e.get('UID').to_ical().decode('utf-8')
|
||||
|
||||
if uid not in calnew_uids:
|
||||
if e.decoded('dtstart') < datetime.now(pytz.UTC) + timedelta(minutes = tolerance):
|
||||
print("Past \"%s\"" % uid)
|
||||
if e.get('dtstart').dt < cutoffdate:
|
||||
# print("Past \"%s\"" % uid)
|
||||
cal.add_component(e)
|
||||
else:
|
||||
if delete:
|
||||
print("Deleted \"%s\"" % uid)
|
||||
else:
|
||||
if e.get('status') != 'CANCELLED':
|
||||
print("Cancelled \"%s\"" % uid)
|
||||
e.add('status', 'CANCELLED')
|
||||
e['status'] = 'CANCELLED'
|
||||
|
||||
cal.add_component(e)
|
||||
|
||||
for e in calnew.walk('VEVENT'):
|
||||
|
@ -135,10 +180,37 @@ def icalmerge(calold, calnew, delete = False, tolerance = 15):
|
|||
return(cal)
|
||||
|
||||
|
||||
# def cal2date(dtx):
|
||||
# # Workaround to fix a bug decoding date with TZID
|
||||
# # DTSTART;TZID=Europe/Rome;VALUE=DATE-TIME:20220611T160000
|
||||
# # is decoded to 2022-06-11 16:00:00+00:50 instead of 2022-06-11 16:00:00+02:00
|
||||
|
||||
# tzx = dtx.params['TZID']
|
||||
# if tzx is None:
|
||||
# return dtx.dt
|
||||
# else:
|
||||
# print(tzx)
|
||||
# return (dtx.dt.replace(tzinfo=None).astimezone(tz=pytz.timezone(tzx)))
|
||||
|
||||
def dir_path(path):
|
||||
if os.path.isdir(path):
|
||||
return path
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
os.chdir(os.path.dirname(__file__))
|
||||
zic2ics()
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--rebuild", "-r", action='store_true')
|
||||
parser.add_argument("--output", "-o", type=dir_path)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.rebuild:
|
||||
print("Rebuilding")
|
||||
zicrebuild()
|
||||
else:
|
||||
zic2ics(args.output)
|
||||
|
||||
|
|
Loading…
Reference in a new issue