diff --git a/skills/ui-test/ci/README.md b/skills/ui-test/ci/README.md new file mode 100644 index 0000000..ca354de --- /dev/null +++ b/skills/ui-test/ci/README.md @@ -0,0 +1,99 @@ +# ui-test CI Integration + +Run adversarial UI testing automatically on every PR that touches frontend files. + +## Architecture + +``` +PR opened → preview deploys (Vercel/Netlify) → GitHub Action triggers + → Claude Code (headless, --print mode) reads diff, plans tests + → browse CLI → Browserbase cloud browser tests the preview URL + → results posted as PR comment + HTML report uploaded as artifact +``` + +## Setup + +### 1. Copy the workflow + +```bash +cp skills/ui-test/ci/ui-test.yml .github/workflows/ui-test.yml +``` + +### 2. Add secrets + +In your repo settings → Secrets and variables → Actions: + +| Secret | Required | Description | +|--------|----------|-------------| +| `ANTHROPIC_API_KEY` | Yes | Claude API key | +| `BROWSERBASE_API_KEY` | Yes | Browserbase API key for cloud browsers | + +### 3. Configure preview deploy detection + +The workflow defaults to **Vercel** preview detection. Edit the `wait-for-preview` job in `ui-test.yml` if you use Netlify, Cloudflare Pages, or a custom preview system. See the commented alternatives in the file. + +### 4. (Optional) Configure variables + +In repo settings → Secrets and variables → Actions → Variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `UI_TEST_MODE` | `light` | `light` = 2 agents, 20 steps each. `full` = 4 agents, 40 steps each | +| `UI_TEST_MAX_TOKENS` | `100000` | Max token budget per run | + +## How it works + +1. **Gate** — `paths-filter` checks if the PR touches UI files (`.tsx`, `.css`, etc.). Skips entirely if no UI changes. +2. **Wait** — Waits for the preview deployment to be ready (up to 5 minutes). +3. **Test** — `run-ui-test.sh` invokes Claude Code in `--print` mode with: + - The git diff of changed UI files + - The preview URL + - Mode-specific instructions (light vs full) +4. **Report** — Posts a summary comment on the PR and uploads the HTML report as a GitHub Actions artifact. +5. **Gate** — Exits non-zero if any test failed, so you can make it a required check. + +## Local testing + +Test the CI flow locally before deploying to GitHub Actions: + +```bash +skills/ui-test/ci/run-ui-test.sh \ + --url http://localhost:3000 \ + --local \ + --mode light +``` + +The `--local` flag skips the diff gate (no PR needed) and uses `browse env local` instead of remote. Results go to `.context/ui-test-summary.md`. + +## Cost estimate + +| Mode | Agents | Steps/agent | Estimated cost | +|------|--------|-------------|----------------| +| `light` | 2 | 20 | ~$0.50–$2 per run | +| `full` | 4 | 40 | ~$2–$5 per run | + +These are rough estimates. Actual cost depends on diff size and number of pages tested. + +## Customization + +### Only run on labeled PRs + +Add a condition to the workflow: + +```yaml +on: + pull_request: + types: [labeled] + +jobs: + check-ui-changes: + if: contains(github.event.pull_request.labels.*.name, 'ui-test') +``` + +### Adjust file filters + +Edit the `paths-filter` step in `ui-test.yml` to match your project structure. + +### Fail threshold + +By default, any STEP_FAIL causes a non-zero exit. To allow a pass rate threshold instead, modify the exit code logic in `run-ui-test.sh`. diff --git a/skills/ui-test/ci/run-ui-test.sh b/skills/ui-test/ci/run-ui-test.sh new file mode 100755 index 0000000..e60ba5e --- /dev/null +++ b/skills/ui-test/ci/run-ui-test.sh @@ -0,0 +1,154 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ── Parse arguments ────────────────────────────────────────────────────── +PREVIEW_URL="" +MODE="light" +PR_NUMBER="" +REPO="" +LOCAL=false + +while [[ $# -gt 0 ]]; do + case $1 in + --url) PREVIEW_URL="$2"; shift 2 ;; + --mode) MODE="$2"; shift 2 ;; + --pr) PR_NUMBER="$2"; shift 2 ;; + --repo) REPO="$2"; shift 2 ;; + --local) LOCAL=true; shift ;; + *) echo "Unknown arg: $1"; exit 1 ;; + esac +done + +if [[ -z "$PREVIEW_URL" ]]; then + echo "Error: --url is required" + exit 1 +fi + +# ── Verify preview is reachable ────────────────────────────────────────── +echo "Checking preview URL: $PREVIEW_URL" +HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$PREVIEW_URL" 2>/dev/null || echo "000") +if [[ "$HTTP_STATUS" == "000" ]]; then + echo "Error: Preview URL is not reachable" + exit 1 +fi +echo "Preview is up (HTTP $HTTP_STATUS)" + +# ── Build the prompt ───────────────────────────────────────────────────── +if [[ "$LOCAL" == true ]]; then + # Local mode: skip diff gate, test the full app + UI_FILES="(local mode — no diff filter, testing full app)" + BROWSE_ENV="browse env local" + DIFF_CONTEXT="No diff available (local mode). Explore the app and test what you find." +else + DIFF_FILES=$(git diff --name-only origin/main...HEAD 2>/dev/null || git diff --name-only HEAD~1) + + # Filter to UI-relevant files only + UI_FILES=$(echo "$DIFF_FILES" | grep -E '\.(tsx|jsx|vue|svelte|css|scss)$' || true) + + if [[ -z "$UI_FILES" ]]; then + echo "No UI files changed. Skipping tests." + mkdir -p .context + echo "No UI files changed in this PR." > .context/ui-test-summary.md + echo "0" > .context/ui-test-exit-code + exit 0 + fi + BROWSE_ENV="browse env remote" + DIFF_CONTEXT="Full diff of changed files (for context on what specifically changed): +$(git diff origin/main...HEAD -- $UI_FILES 2>/dev/null | head -500 || echo "Could not generate diff")" +fi + +echo "UI files changed:" +echo "$UI_FILES" + +# Build mode-specific instructions +if [[ "$MODE" == "light" ]]; then + MODE_INSTRUCTIONS="Run in CI-light mode: +- Use at most 2 sub-agents +- Budget each sub-agent at 20 browse steps max +- Focus on: functional correctness of changed components, basic accessibility (axe-core), and console errors +- Skip: exploratory testing, visual/design consistency, UX heuristics +- Skip: HTML report generation (the summary is enough for CI)" +else + MODE_INSTRUCTIONS="Run in full mode: +- Use up to 4 sub-agents +- Budget each sub-agent at 40 browse steps max +- Cover: functional, adversarial, accessibility, responsive, console health +- Generate the HTML report" +fi + +PR_CONTEXT="" +if [[ -n "$PR_NUMBER" && -n "$REPO" ]]; then + PR_CONTEXT="This is PR #${PR_NUMBER} on ${REPO}." +fi + +PROMPT=$(cat <&1 | tee .context/ui-test-output.log + +# ── Post-run ───────────────────────────────────────────────────────────── +# Ensure browse sessions are cleaned up +browse stop 2>/dev/null || true +pkill -f "browse.*daemon" 2>/dev/null || true + +# Default exit code if Claude didn't write one +if [[ ! -f .context/ui-test-exit-code ]]; then + echo "1" > .context/ui-test-exit-code + echo "Warning: Claude did not write an exit code. Defaulting to failure." +fi + +# Default summary if Claude didn't write one +if [[ ! -f .context/ui-test-summary.md ]]; then + cat > .context/ui-test-summary.md <<'EOF' +UI test run completed but did not produce a structured summary. + +Check the full output log for details. +EOF +fi + +echo "" +echo "=======================================" +echo "UI Test Complete" +echo "=======================================" +cat .context/ui-test-summary.md +echo "" +echo "Exit code: $(cat .context/ui-test-exit-code)" diff --git a/skills/ui-test/ci/ui-test.yml b/skills/ui-test/ci/ui-test.yml new file mode 100644 index 0000000..f5dd271 --- /dev/null +++ b/skills/ui-test/ci/ui-test.yml @@ -0,0 +1,196 @@ +# .github/workflows/ui-test.yml +# +# Runs adversarial UI testing on PRs that touch frontend files. +# Requires a preview deployment (Vercel, Netlify, etc.) before testing. +# +# Secrets needed: +# ANTHROPIC_API_KEY — Claude API access +# BROWSERBASE_API_KEY — Cloud browser for remote testing +# +# Optional: +# UI_TEST_MAX_TOKENS — Cap total token spend per run (default: 100000) +# UI_TEST_MODE — "light" (fewer agents, smaller budgets) or "full" (default: light) + +name: UI Test + +on: + pull_request: + types: [opened, synchronize, reopened] + +# Cancel in-flight runs for the same PR +concurrency: + group: ui-test-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + # ── Gate: only run if the PR touches UI files ────────────────────────── + check-ui-changes: + runs-on: ubuntu-latest + outputs: + has_ui_changes: ${{ steps.filter.outputs.ui }} + steps: + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + ui: + - '**/*.tsx' + - '**/*.jsx' + - '**/*.vue' + - '**/*.svelte' + - '**/*.css' + - '**/*.scss' + - '**/*.module.css' + - 'app/**' + - 'pages/**' + - 'src/routes/**' + - 'src/components/**' + - 'public/**' + + # ── Wait for preview deployment ──────────────────────────────────────── + wait-for-preview: + needs: check-ui-changes + if: needs.check-ui-changes.outputs.has_ui_changes == 'true' + runs-on: ubuntu-latest + outputs: + preview_url: ${{ steps.get-url.outputs.url }} + steps: + # Option A: Vercel — wait for the deployment to be ready + - name: Wait for Vercel preview + uses: patrickedqvist/wait-for-vercel-preview@v1.3.2 + id: vercel + with: + token: ${{ secrets.GITHUB_TOKEN }} + max_timeout: 300 + check_interval: 10 + + - name: Set preview URL + id: get-url + run: echo "url=${{ steps.vercel.outputs.url }}" >> "$GITHUB_OUTPUT" + + # ─── Alternative: Netlify ─── + # - name: Wait for Netlify preview + # uses: jakepartusch/wait-for-netlify-action@v1.4 + # id: netlify + # with: + # site_name: your-site-name + # max_timeout: 300 + # + # - name: Set preview URL + # id: get-url + # run: echo "url=${{ steps.netlify.outputs.url }}" >> "$GITHUB_OUTPUT" + + # ─── Alternative: Custom / self-hosted ─── + # - name: Set preview URL + # id: get-url + # run: echo "url=https://pr-${{ github.event.pull_request.number }}.preview.yourapp.com" >> "$GITHUB_OUTPUT" + + # ── Run UI tests ─────────────────────────────────────────────────────── + ui-test: + needs: wait-for-preview + runs-on: ubuntu-latest + timeout-minutes: 15 + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} + PREVIEW_URL: ${{ needs.wait-for-preview.outputs.preview_url }} + UI_TEST_MODE: ${{ vars.UI_TEST_MODE || 'light' }} + UI_TEST_MAX_TOKENS: ${{ vars.UI_TEST_MAX_TOKENS || '100000' }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 # full history for diff analysis + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 20 + + - name: Install tools + run: | + npm install -g @anthropic-ai/claude-code @browserbasehq/browse-cli + + - name: Install ui-test skill + run: | + # If the skill isn't already in the repo, install it + if [ ! -d ".claude/skills/ui-test" ]; then + npx skills add browserbase/ui-test + fi + + - name: Run UI tests + id: run-tests + run: | + chmod +x skills/ui-test/ci/run-ui-test.sh + skills/ui-test/ci/run-ui-test.sh \ + --url "$PREVIEW_URL" \ + --mode "$UI_TEST_MODE" \ + --pr "${{ github.event.pull_request.number }}" \ + --repo "${{ github.repository }}" + + - name: Upload HTML report + if: always() + uses: actions/upload-artifact@v4 + with: + name: ui-test-report + path: .context/ui-test-report.html + if-no-files-found: ignore + + - name: Upload screenshots + if: always() + uses: actions/upload-artifact@v4 + with: + name: ui-test-screenshots + path: .context/ui-test-screenshots/ + if-no-files-found: ignore + + - name: Comment on PR + if: always() + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + + // Read the summary generated by the test run + let summary = ''; + try { + summary = fs.readFileSync('.context/ui-test-summary.md', 'utf8'); + } catch { + summary = '⚠️ UI test run did not produce a summary. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.'; + } + + // Find and update existing comment, or create new one + const marker = ''; + const body = `${marker}\n## UI Test Results\n\n${summary}\n\n📎 [Full HTML report](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) (download artifact)\n\n---\n*Generated by [ui-test](https://github.com/browserbase/skills/tree/main/skills/ui-test) · Powered by [Browserbase](https://browserbase.com)*`; + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ github.event.pull_request.number }}, + }); + + const existing = comments.find(c => c.body.includes(marker)); + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ github.event.pull_request.number }}, + body, + }); + } + + - name: Check pass rate + if: always() + run: | + # Fail the check if pass rate is below threshold + if [ -f .context/ui-test-exit-code ]; then + exit $(cat .context/ui-test-exit-code) + fi