Parser Für Den Uni-Kalender

Der folgende Artikel und Code wurde von mir alleine erstellt. Mit Hilfe von ChatGPT habe ich eine verbesserte Version des Codes erstellt. Der Code und eine kurze Beschreibung (auf Englisch) sind hier zu finden.

Die Kalender für kommendes Semester hat meine Uni dieses mal als 5 verschiedene iCal-Links für die einzelnen Kurse veröffentlicht. Da ich diese Kalender nicht einzeln abonnieren und verwalten möchte und mir wichtige Funktionen fehlen, habe ich ein kleines Python-Skript geschrieben um mir Abhilfe zu verschaffen.

Das kleine Python-Skript kann nun:

Kalender herunterladen und kombinieren
Veranstaltungen in Pflicht oder Wahlveranstaltungen unterteilen
Benachrichtigungen zu Pflichtveranstaltungen hinzufügen
“Veranstaltungen” wie Feiertage filtern und Duplikate vermeiden
Veränderungen des Kalenders erkennen und ein “diff” erstellen
Fehlerhaften UID Parameter entfernen und neuen generieren

Das Skipt wird von cron jeden morgen um 8:00 Uhr ausgeführt und speichert seine Ergebnisse in unterschiedliche Dateien. Die Dateien werden von einem nginx-Server gehostet und konnen so per URL als Kalenderabonnement in jeder gängingen Kalender-Software hinzugefügt werden.

get_calendar.py

#!/usr/bin/python3

# Input:
# u.txt  -> list of calendar urls to process

# Output:
# m.ics  -> calendar containing mandatory events
# v.ics  -> calendar containing voluntary and duplicate events
# e.txt  -> calendar as a human readable string
# d.html -> diff of changes to calendar since last run

from datetime import datetime
import urllib.request
import difflib

def parse_ics(text):
    # Lines before and after the block of events
    pre, post = [], []
    # Events: (m)andatory, (v)oluntary, (d)uplicate; Calender as (h)uman readable string
    m, v, d, h = [], [], [], []
    # Current (b)uffer
    b = []
    # Event details
    start, end, summary, location = None, None, None, None

    # Booleans describing event type
    # NOTE: is_duplicate in this context refers to events such as bank holidays, which appear
    #       in all input calendars, but should only be added to the output calendars once
    is_mandatory = False
    is_duplicate = False

    for line in text.split('\r\n'):
        if line == "BEGIN:VEVENT":
            # Save lines before first event into pre
            # NOTE: The buffer is not updated yet, so the line "BEGIN:VEVENT" will not part of pre
            if not pre:
                pre = b
                b = []

        # Generate pseudo UID from start, end and summary
        if line.startswith("UID"):
            uid = '{:s}{:s}{:s}'.format(
                    start.strftime("%Y-%m-%d%H:%M:%S"),
                    end.strftime("%H:%M:%S"),
                    summary
                    ).encode("utf-8").hex()
            line = "UID:{:s}".format(uid)

        # Append current line to buffer
        b.append(line)

        # Parse event into variables
        if line.startswith("DTSTART"):
            date = line.split(':')[1]
            start = datetime.strptime(date, '%Y%m%dT%H%M%S')
        if line.startswith("DTEND"):
            date = line.split(':')[1]
            end = datetime.strptime(date, '%Y%m%dT%H%M%S')
        if line.startswith("SUMMARY"):
            summary = line.split(':')[1].replace('\\', '')
        if line.startswith("LOCATION"):
            location = line.split(':')[1].replace('\\', '')

        if line == "END:VEVENT":
            # Append buffer to list depending on type of event
            if is_duplicate:
                d.extend(b)
            elif is_mandatory:
                # Add a notification to mandatory events
                b.insert(-1, "BEGIN:VALARM")
                b.insert(-1, "TRIGGER:-PT30M")
                b.insert(-1, "ACTION:DISPLAY")
                b.insert(-1, "END:VALARM")
                m.extend(b)
            else:
                v.extend(b)

            # Generate human readable string and append to h
            if not is_duplicate:
                h.append('{:s} - {:s} {:s} ({:s})'.format(
                    start.strftime("%Y-%m-%d %H:%M:%S"),
                    end.strftime("%H:%M:%S"),
                    summary,
                    location
                ))

            # Clear event details; Reset booleans; Clear buffer
            start, end, summary, location = None, None, None, None
            is_mandatory = False
            is_duplicate = False
            b = []

        # Set is_mandatory and is_duplicate booleans depending on event summary
        if summary:
            # Mandatory event summaries end with an asterisk
            if summary.endswith("*"):
                is_mandatory = True
            # Bank holidays and similar events
            if summary == "Feiertag" or summary == "Studieninformationstag":
                is_duplicate = True

    # Save lines after last event into post
    post = b
    return pre, post, m, v, d, h

# Combined lists m, v, h from different calendars
ma, va, ha = [], [], []

# Read list of calendar urls
with open("u.txt", "r") as f:
    l = f.read().splitlines()

# Iterate over calendar urls
for url in l:
    # Download calendar
    response = urllib.request.urlopen(url)
    data = response.read()
    text = data.decode('utf-8')

    # Call parse_ics to process response
    pre, post, m, v, d, h = parse_ics(text)

    # Append events to combined lists
    # NOTE: While the lists m, v and h get appended to a combined list, the variables 
    #       pre, post and d will be overwritten on every iteration and only contain
    #       the content from the last calendar when exiting the loop
    ma.extend(m)
    va.extend(v)
    ha.extend(h)

# ma, va, ha contain the combined lists m, v, h from every calendar
# pre, post, d contain the content from the last processed calendar

# Sort lines for human readable string
ha.sort()

# Save mandatory events to m.ics
with open("m.ics", "w") as f:
    f.write('\n'.join(pre + ma + post))

# Save voluntary event and duplicates to v.ics
with open("v.ics", "w") as f:
    f.write('\n'.join(pre + va + d + post))

# e.txt contains the human readable string from the last run
# If it exists we can compare it to the current result to detect changes
try:
    # Load old e.txt
    with open("e.txt", "r") as f:
        l = f.read().splitlines()

    # Compare and print diff to stdout
    d = difflib.unified_diff(l, ha, "Old", datetime.today().strftime('%Y-%m-%d'), n=0)
    print('\n'.join(d), end="")

    # Compare and save diff to d.html
    d = difflib.HtmlDiff().make_file(l, ha, "Old", datetime.today().strftime('%Y-%m-%d'))
    with open("d.html", "w") as f:
        f.write(d)
except IOError:
    print('\n'.join(ha))

# Save human readable string to e.txt
with open("e.txt", "w") as f:
    f.write('\n'.join(ha))

Output

Der Output sieht beispielsweise to aus, wenn eine Veranstaltung verändert und eine Neue hinzugefügt wird.

--- Old

+++ 2023-03-05

@@ -257 +257,2 @@

-2023-05-15 09:00:00 - 10:30:00 V Neuro Hirnnerven (HS Kopf; INF 400)
+2023-05-15 09:00:00 - 09:30:00 V Neuro Einführung (HS Kopf; INF 400)
+2023-05-15 09:45:00 - 10:30:00 V Neuro Hirnnerven (HS Kopf; INF 400)