-
Notifications
You must be signed in to change notification settings - Fork 1
Support past Meetup events and iCal unescaping; run sync on PRs and refine commit condition #14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| OUTPUT_FILE = REPO_ROOT / "_data" / "events.json" | ||
| DEFAULT_ICAL_URL = "https://www.meetup.com/genai-gurus/events/ical/" | ||
| DEFAULT_EVENTS_URL = "https://www.meetup.com/genai-gurus/events/" | ||
| DEFAULT_PAST_EVENTS_URL = "https://www.meetup.com/genai-gurus/events/past/" | ||
|
|
||
|
|
||
| def log(msg: str) -> None: | ||
|
|
@@ -55,6 +56,16 @@ def strip_html(value: str) -> str: | |
| return html.unescape(normalized).strip() | ||
|
|
||
|
|
||
| def unescape_ical_text(value: str) -> str: | ||
| return ( | ||
| value.replace("\\n", "\n") | ||
| .replace("\\N", "\n") | ||
| .replace("\\,", ",") | ||
| .replace("\\;", ";") | ||
| .replace("\\\\", "\\") | ||
| ) | ||
|
|
||
|
|
||
| def extract_speaker(summary: str, description: str) -> str: | ||
| text = f"{summary}\n{description}" | ||
| patterns = [r"Speaker[s]?:\s*([^\n|,;]+)", r"Presented by\s*([^\n|,;]+)"] | ||
|
|
@@ -94,9 +105,9 @@ def parse_ical_events(ical_text: str) -> list[dict[str, str]]: | |
| if event_dt is None: | ||
| continue | ||
|
|
||
| summary = strip_html(item.get("SUMMARY", "")).strip() | ||
| description = strip_html(item.get("DESCRIPTION", "")).strip() | ||
| location = strip_html(item.get("LOCATION", "")).strip() | ||
| summary = strip_html(unescape_ical_text(item.get("SUMMARY", ""))).strip() | ||
| description = strip_html(unescape_ical_text(item.get("DESCRIPTION", ""))).strip() | ||
| location = strip_html(unescape_ical_text(item.get("LOCATION", ""))).strip() | ||
| meetup_url = item.get("URL", "").strip() or DEFAULT_ICAL_URL | ||
| speaker = extract_speaker(summary, description) | ||
|
|
||
|
|
@@ -202,26 +213,56 @@ def parse_ld_json_events(events_html: str) -> list[dict[str, str]]: | |
| return ordered | ||
|
|
||
|
|
||
| def merge_events(*event_lists: list[dict[str, str]]) -> list[dict[str, str]]: | ||
| merged: dict[str, dict[str, str]] = {} | ||
| for events in event_lists: | ||
| for event in events: | ||
| key = event.get("meetup_url") or f"{event.get('title')}|{event.get('date')}" | ||
| merged[key] = event | ||
| return sorted(merged.values(), key=lambda e: e["date"]) | ||
|
|
||
|
|
||
| def fetch_events() -> list[dict[str, str]]: | ||
| source_url = getenv_or_default("MEETUP_ICAL_URL", DEFAULT_ICAL_URL) | ||
| events_url = getenv_or_default("MEETUP_EVENTS_URL", DEFAULT_EVENTS_URL) | ||
| past_events_url = getenv_or_default("MEETUP_PAST_EVENTS_URL", DEFAULT_PAST_EVENTS_URL) | ||
| headers = {"User-Agent": "genai-gurus-event-sync/1.0"} | ||
|
|
||
| errors: list[str] = [] | ||
|
|
||
| ical_events: list[dict[str, str]] = [] | ||
| past_events: list[dict[str, str]] = [] | ||
|
|
||
| try: | ||
| req = urllib.request.Request(source_url, headers=headers) | ||
| with urllib.request.urlopen(req, timeout=25) as response: | ||
| if response.status != 200: | ||
| raise RuntimeError(f"Meetup iCal fetch failed with status {response.status}") | ||
| payload = response.read().decode("utf-8", errors="replace") | ||
| events = parse_ical_events(payload) | ||
| if events: | ||
| return events | ||
| errors.append("Meetup iCal response contained no events") | ||
| ical_events = parse_ical_events(payload) | ||
| if ical_events: | ||
| log(f"Fetched {len(ical_events)} events from iCal") | ||
| else: | ||
| errors.append("Meetup iCal response contained no events") | ||
| except (urllib.error.URLError, RuntimeError, ValueError) as exc: | ||
| errors.append(f"iCal source failed: {exc}") | ||
|
|
||
| try: | ||
| req = urllib.request.Request(past_events_url, headers=headers) | ||
| with urllib.request.urlopen(req, timeout=25) as response: | ||
| if response.status != 200: | ||
| raise RuntimeError(f"Meetup past events page fetch failed with status {response.status}") | ||
| payload = response.read().decode("utf-8", errors="replace") | ||
| past_events = [event for event in parse_ld_json_events(payload) if event.get("event_status") == "past"] | ||
| if past_events: | ||
| log(f"Fetched {len(past_events)} past events from events/past page") | ||
| except (urllib.error.URLError, RuntimeError, ValueError) as exc: | ||
| errors.append(f"past events source failed: {exc}") | ||
|
|
||
| merged_events = merge_events(ical_events, past_events) | ||
| if merged_events: | ||
| return merged_events | ||
|
Comment on lines
+262
to
+264
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In Useful? React with 👍 / 👎. |
||
|
|
||
| try: | ||
| req = urllib.request.Request(events_url, headers=headers) | ||
| with urllib.request.urlopen(req, timeout=25) as response: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
merge_eventsdeduplicates onmeetup_urlalone, but iCal entries without aURLare normalized to the same default URL, so distinct events overwrite each other. This causes feeds with missingURLfields to collapse to a single retained event after merge, reducing event accuracy. The merge key should fall back to a unique composite key when the URL is default or missing.Useful? React with 👍 / 👎.