diff --git a/.github/workflows/sync-meetup-events.yml b/.github/workflows/sync-meetup-events.yml index f30333e..0a87afb 100644 --- a/.github/workflows/sync-meetup-events.yml +++ b/.github/workflows/sync-meetup-events.yml @@ -5,10 +5,10 @@ on: branches: - main - master - paths: - - '.github/workflows/sync-meetup-events.yml' - - 'scripts/sync_meetup_events.py' - - '_data/events.json' + pull_request: + branches: + - main + - master schedule: - cron: '15 */12 * * *' workflow_dispatch: @@ -54,6 +54,7 @@ jobs: PY - name: Commit changes when event data changed + if: github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' run: | if git diff --quiet -- _data/events.json; then echo "No changes to commit" diff --git a/README.md b/README.md index 29ce6f6..1ed020f 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,7 @@ bundle exec jekyll serve The homepage reads event data from `_data/events.json`. -- **Automated sync:** `.github/workflows/sync-meetup-events.yml` runs every 12 hours and on manual dispatch. - - It also runs on pushes to `main`/`master` that touch the workflow, sync script, or `_data/events.json` so first-time setup is easier to verify. +- **Automated sync:** `.github/workflows/sync-meetup-events.yml` runs every 12 hours, on manual dispatch, on pull requests targeting `main`/`master`, and on all pushes to `main`/`master`. - **Sync script:** `scripts/sync_meetup_events.py` fetches Meetup data and writes deterministic JSON output. - Primary source: Meetup iCal feed. - Fallback source: JSON-LD event data from the Meetup events page. @@ -28,6 +27,8 @@ The homepage reads event data from `_data/events.json`. - If not set, the script defaults to `https://www.meetup.com/genai-gurus/events/ical/`. - `MEETUP_EVENTS_URL` (optional): override Meetup events page URL used as the JSON-LD fallback source. - If not set, the script defaults to `https://www.meetup.com/genai-gurus/events/`. +- `MEETUP_PAST_EVENTS_URL` (optional): override Meetup past-events page URL used to supplement iCal with recent historical events. + - If not set, the script defaults to `https://www.meetup.com/genai-gurus/events/past/`. - `MEETUP_SYNC_STRICT` (optional): if truthy (`1`, `true`, `yes`, `on`), the script exits non-zero when fetch fails. - Useful in CI to surface data-source outages immediately. diff --git a/scripts/sync_meetup_events.py b/scripts/sync_meetup_events.py index e48c4de..16a638b 100755 --- a/scripts/sync_meetup_events.py +++ b/scripts/sync_meetup_events.py @@ -17,6 +17,7 @@ OUTPUT_FILE = REPO_ROOT / "_data" / "events.json" DEFAULT_ICAL_URL = "https://www.meetup.com/genai-gurus/events/ical/" DEFAULT_EVENTS_URL = "https://www.meetup.com/genai-gurus/events/" +DEFAULT_PAST_EVENTS_URL = "https://www.meetup.com/genai-gurus/events/past/" def log(msg: str) -> None: @@ -55,6 +56,16 @@ def strip_html(value: str) -> str: return html.unescape(normalized).strip() +def unescape_ical_text(value: str) -> str: + return ( + value.replace("\\n", "\n") + .replace("\\N", "\n") + .replace("\\,", ",") + .replace("\\;", ";") + .replace("\\\\", "\\") + ) + + def extract_speaker(summary: str, description: str) -> str: text = f"{summary}\n{description}" patterns = [r"Speaker[s]?:\s*([^\n|,;]+)", r"Presented by\s*([^\n|,;]+)"] @@ -94,9 +105,9 @@ def parse_ical_events(ical_text: str) -> list[dict[str, str]]: if event_dt is None: continue - summary = strip_html(item.get("SUMMARY", "")).strip() - description = strip_html(item.get("DESCRIPTION", "")).strip() - location = strip_html(item.get("LOCATION", "")).strip() + summary = strip_html(unescape_ical_text(item.get("SUMMARY", ""))).strip() + description = strip_html(unescape_ical_text(item.get("DESCRIPTION", ""))).strip() + location = strip_html(unescape_ical_text(item.get("LOCATION", ""))).strip() meetup_url = item.get("URL", "").strip() or DEFAULT_ICAL_URL speaker = extract_speaker(summary, description) @@ -202,26 +213,56 @@ def parse_ld_json_events(events_html: str) -> list[dict[str, str]]: return ordered +def merge_events(*event_lists: list[dict[str, str]]) -> list[dict[str, str]]: + merged: dict[str, dict[str, str]] = {} + for events in event_lists: + for event in events: + key = event.get("meetup_url") or f"{event.get('title')}|{event.get('date')}" + merged[key] = event + return sorted(merged.values(), key=lambda e: e["date"]) + + def fetch_events() -> list[dict[str, str]]: source_url = getenv_or_default("MEETUP_ICAL_URL", DEFAULT_ICAL_URL) events_url = getenv_or_default("MEETUP_EVENTS_URL", DEFAULT_EVENTS_URL) + past_events_url = getenv_or_default("MEETUP_PAST_EVENTS_URL", DEFAULT_PAST_EVENTS_URL) headers = {"User-Agent": "genai-gurus-event-sync/1.0"} errors: list[str] = [] + ical_events: list[dict[str, str]] = [] + past_events: list[dict[str, str]] = [] + try: req = urllib.request.Request(source_url, headers=headers) with urllib.request.urlopen(req, timeout=25) as response: if response.status != 200: raise RuntimeError(f"Meetup iCal fetch failed with status {response.status}") payload = response.read().decode("utf-8", errors="replace") - events = parse_ical_events(payload) - if events: - return events - errors.append("Meetup iCal response contained no events") + ical_events = parse_ical_events(payload) + if ical_events: + log(f"Fetched {len(ical_events)} events from iCal") + else: + errors.append("Meetup iCal response contained no events") except (urllib.error.URLError, RuntimeError, ValueError) as exc: errors.append(f"iCal source failed: {exc}") + try: + req = urllib.request.Request(past_events_url, headers=headers) + with urllib.request.urlopen(req, timeout=25) as response: + if response.status != 200: + raise RuntimeError(f"Meetup past events page fetch failed with status {response.status}") + payload = response.read().decode("utf-8", errors="replace") + past_events = [event for event in parse_ld_json_events(payload) if event.get("event_status") == "past"] + if past_events: + log(f"Fetched {len(past_events)} past events from events/past page") + except (urllib.error.URLError, RuntimeError, ValueError) as exc: + errors.append(f"past events source failed: {exc}") + + merged_events = merge_events(ical_events, past_events) + if merged_events: + return merged_events + try: req = urllib.request.Request(events_url, headers=headers) with urllib.request.urlopen(req, timeout=25) as response: