Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions .github/workflows/sync-meetup-events.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ on:
branches:
- main
- master
paths:
- '.github/workflows/sync-meetup-events.yml'
- 'scripts/sync_meetup_events.py'
- '_data/events.json'
pull_request:
branches:
- main
- master
schedule:
- cron: '15 */12 * * *'
workflow_dispatch:
Expand Down Expand Up @@ -54,6 +54,7 @@ jobs:
PY

- name: Commit changes when event data changed
if: github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
run: |
if git diff --quiet -- _data/events.json; then
echo "No changes to commit"
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ bundle exec jekyll serve

The homepage reads event data from `_data/events.json`.

- **Automated sync:** `.github/workflows/sync-meetup-events.yml` runs every 12 hours and on manual dispatch.
- It also runs on pushes to `main`/`master` that touch the workflow, sync script, or `_data/events.json` so first-time setup is easier to verify.
- **Automated sync:** `.github/workflows/sync-meetup-events.yml` runs every 12 hours, on manual dispatch, on pull requests targeting `main`/`master`, and on all pushes to `main`/`master`.
- **Sync script:** `scripts/sync_meetup_events.py` fetches Meetup data and writes deterministic JSON output.
- Primary source: Meetup iCal feed.
- Fallback source: JSON-LD event data from the Meetup events page.
Expand All @@ -28,6 +27,8 @@ The homepage reads event data from `_data/events.json`.
- If not set, the script defaults to `https://www.meetup.com/genai-gurus/events/ical/`.
- `MEETUP_EVENTS_URL` (optional): override Meetup events page URL used as the JSON-LD fallback source.
- If not set, the script defaults to `https://www.meetup.com/genai-gurus/events/`.
- `MEETUP_PAST_EVENTS_URL` (optional): override Meetup past-events page URL used to supplement iCal with recent historical events.
- If not set, the script defaults to `https://www.meetup.com/genai-gurus/events/past/`.
- `MEETUP_SYNC_STRICT` (optional): if truthy (`1`, `true`, `yes`, `on`), the script exits non-zero when fetch fails.
- Useful in CI to surface data-source outages immediately.

Expand Down
55 changes: 48 additions & 7 deletions scripts/sync_meetup_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
OUTPUT_FILE = REPO_ROOT / "_data" / "events.json"
DEFAULT_ICAL_URL = "https://www.meetup.com/genai-gurus/events/ical/"
DEFAULT_EVENTS_URL = "https://www.meetup.com/genai-gurus/events/"
DEFAULT_PAST_EVENTS_URL = "https://www.meetup.com/genai-gurus/events/past/"


def log(msg: str) -> None:
Expand Down Expand Up @@ -55,6 +56,16 @@ def strip_html(value: str) -> str:
return html.unescape(normalized).strip()


def unescape_ical_text(value: str) -> str:
return (
value.replace("\\n", "\n")
.replace("\\N", "\n")
.replace("\\,", ",")
.replace("\\;", ";")
.replace("\\\\", "\\")
)


def extract_speaker(summary: str, description: str) -> str:
text = f"{summary}\n{description}"
patterns = [r"Speaker[s]?:\s*([^\n|,;]+)", r"Presented by\s*([^\n|,;]+)"]
Expand Down Expand Up @@ -94,9 +105,9 @@ def parse_ical_events(ical_text: str) -> list[dict[str, str]]:
if event_dt is None:
continue

summary = strip_html(item.get("SUMMARY", "")).strip()
description = strip_html(item.get("DESCRIPTION", "")).strip()
location = strip_html(item.get("LOCATION", "")).strip()
summary = strip_html(unescape_ical_text(item.get("SUMMARY", ""))).strip()
description = strip_html(unescape_ical_text(item.get("DESCRIPTION", ""))).strip()
location = strip_html(unescape_ical_text(item.get("LOCATION", ""))).strip()
meetup_url = item.get("URL", "").strip() or DEFAULT_ICAL_URL
speaker = extract_speaker(summary, description)

Expand Down Expand Up @@ -202,26 +213,56 @@ def parse_ld_json_events(events_html: str) -> list[dict[str, str]]:
return ordered


def merge_events(*event_lists: list[dict[str, str]]) -> list[dict[str, str]]:
merged: dict[str, dict[str, str]] = {}
for events in event_lists:
for event in events:
key = event.get("meetup_url") or f"{event.get('title')}|{event.get('date')}"
merged[key] = event
Comment on lines +220 to +221
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid URL-only dedupe key in merged event lists

merge_events deduplicates on meetup_url alone, but iCal entries without a URL are normalized to the same default URL, so distinct events overwrite each other. This causes feeds with missing URL fields to collapse to a single retained event after merge, reducing event accuracy. The merge key should fall back to a unique composite key when the URL is default or missing.

Useful? React with 👍 / 👎.

return sorted(merged.values(), key=lambda e: e["date"])


def fetch_events() -> list[dict[str, str]]:
source_url = getenv_or_default("MEETUP_ICAL_URL", DEFAULT_ICAL_URL)
events_url = getenv_or_default("MEETUP_EVENTS_URL", DEFAULT_EVENTS_URL)
past_events_url = getenv_or_default("MEETUP_PAST_EVENTS_URL", DEFAULT_PAST_EVENTS_URL)
headers = {"User-Agent": "genai-gurus-event-sync/1.0"}

errors: list[str] = []

ical_events: list[dict[str, str]] = []
past_events: list[dict[str, str]] = []

try:
req = urllib.request.Request(source_url, headers=headers)
with urllib.request.urlopen(req, timeout=25) as response:
if response.status != 200:
raise RuntimeError(f"Meetup iCal fetch failed with status {response.status}")
payload = response.read().decode("utf-8", errors="replace")
events = parse_ical_events(payload)
if events:
return events
errors.append("Meetup iCal response contained no events")
ical_events = parse_ical_events(payload)
if ical_events:
log(f"Fetched {len(ical_events)} events from iCal")
else:
errors.append("Meetup iCal response contained no events")
except (urllib.error.URLError, RuntimeError, ValueError) as exc:
errors.append(f"iCal source failed: {exc}")

try:
req = urllib.request.Request(past_events_url, headers=headers)
with urllib.request.urlopen(req, timeout=25) as response:
if response.status != 200:
raise RuntimeError(f"Meetup past events page fetch failed with status {response.status}")
payload = response.read().decode("utf-8", errors="replace")
past_events = [event for event in parse_ld_json_events(payload) if event.get("event_status") == "past"]
if past_events:
log(f"Fetched {len(past_events)} past events from events/past page")
except (urllib.error.URLError, RuntimeError, ValueError) as exc:
errors.append(f"past events source failed: {exc}")

merged_events = merge_events(ical_events, past_events)
if merged_events:
return merged_events
Comment on lines +262 to +264
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve /events fallback when iCal source fails

In fetch_events, this early return skips the /events/ fallback whenever events/past yields any records. If the iCal request fails but events/past still responds, the sync returns only past events and drops upcoming events entirely because events_url is never queried. This is a data-loss regression during partial source outages and should still attempt the main events-page fallback when iCal data is unavailable.

Useful? React with 👍 / 👎.


try:
req = urllib.request.Request(events_url, headers=headers)
with urllib.request.urlopen(req, timeout=25) as response:
Expand Down
Loading