Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions .github/workflows/e2e-tests-ci-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,44 @@ jobs:
node-version-file: ".nvmrc"
cache: npm
cache-dependency-path: ${{ needs.generate-build-variables.outputs.node-cache-dependency-path }}
- name: ci/runner-prep-for-openldap
# Observed failure: "dependency failed to start: container
# mmserver-openldap-1 exited (1)" on ubuntu-24.04 runners — kills
# every LDAP spec on the affected shard.
#
# Ubuntu 24.04 introduced an AppArmor profile that restricts the
# creation of unprivileged user namespaces. The osixia/openldap
# image's internal init scripts rely on this capability; blocking
# it produces an immediate exit(1) with no useful stderr. The
# container's own security_opt: apparmor:unconfined is not
# sufficient — that only unconfines slapd, not the container's
# entrypoint process. The actual switch is at the host-kernel level.
#
# Also ensure docker-compose is >= 2.36.0 — the 2.35.1 shipped on
# some ubuntu-24.04 images has a known `up` regression that
# manifests as random dependency-failed errors under load.
run: |
echo "Before: docker compose version"
docker compose version || true

# Disable the AppArmor user-namespace restriction. Idempotent;
# safe if the key doesn't exist (older kernel).
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true

# If docker-compose is older than 2.36.0, install a newer one to
# the user's cli-plugins dir (takes precedence over the system copy).
CURRENT=$(docker compose version --short 2>/dev/null || echo "0.0.0")
NEED="2.36.0"
if [ "$(printf '%s\n' "$NEED" "$CURRENT" | sort -V | head -n1)" != "$NEED" ]; then
echo "Upgrading docker-compose from ${CURRENT} to 2.39.1"
mkdir -p "$HOME/.docker/cli-plugins"
curl -SL -o "$HOME/.docker/cli-plugins/docker-compose" \
"https://github.com/docker/compose/releases/download/v2.39.1/docker-compose-linux-x86_64"
chmod +x "$HOME/.docker/cli-plugins/docker-compose"
fi

echo "After: docker compose version"
docker compose version
- name: ci/e2e-test
run: |
make cloud-init
Expand Down Expand Up @@ -272,6 +310,36 @@ jobs:
- name: ci/cloud-teardown
if: always()
run: make cloud-teardown
- name: ci/dump-docker-state-on-failure
# Always run a final docker-state capture so failures unrelated to
# openldap startup (e.g. server container later crashes) still produce
# logs we can inspect. The script's own retry loop dumps openldap
# state per-attempt; this step is a backstop covering the whole job.
if: failure()
run: |
set +e
DIAG="e2e-tests/docker-diagnostics/job-failure"
mkdir -p "$DIAG"
docker ps -a >"$DIAG/docker.ps.txt" 2>&1
docker version >"$DIAG/docker.version.txt" 2>&1
docker info >"$DIAG/docker.info.txt" 2>&1
for c in $(docker ps -a --format '{{.Names}}'); do
docker inspect "$c" >"$DIAG/$c.inspect.json" 2>&1
docker logs "$c" >"$DIAG/$c.log" 2>&1
done
uname -a >"$DIAG/host.uname.txt" 2>&1
free -m >"$DIAG/host.free.txt" 2>&1
df -h >"$DIAG/host.df.txt" 2>&1
sudo dmesg | tail -500 >"$DIAG/host.dmesg.tail.txt" 2>&1
sudo dmesg | grep -iE 'apparmor|denied|oom|killed|openldap|slapd' >"$DIAG/host.dmesg.relevant.txt" 2>&1
- name: ci/upload-docker-diagnostics
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: always()
with:
name: docker-diagnostics-${{ inputs.TEST }}-${{ matrix.os }}-${{ matrix.worker_index }}
path: e2e-tests/docker-diagnostics/
retention-days: 7
if-no-files-found: ignore
- name: ci/e2e-test-store-results
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: always()
Expand Down
224 changes: 126 additions & 98 deletions .github/workflows/e2e-tests-playwright-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ jobs:

run-tests:
runs-on: ubuntu-24.04
timeout-minutes: 30
timeout-minutes: 60
continue-on-error: true
needs:
- generate-test-variables
Expand Down Expand Up @@ -173,13 +173,109 @@ jobs:
- name: ci/get-webapp-node-modules
working-directory: webapp
run: make node_modules
- name: ci/runner-prep-for-openldap
# Observed failure: "dependency failed to start: container
# mmserver-openldap-1 exited (1)" on ubuntu-24.04 runners — kills
# every ABAC/LDAP spec on the affected shard.
#
# Ubuntu 24.04 introduced an AppArmor profile that restricts the
# creation of unprivileged user namespaces. The osixia/openldap
# image's internal init scripts rely on this capability; blocking
# it produces an immediate exit(1) with no useful stderr. The
# container's own security_opt: apparmor:unconfined (already set
# in server/build/docker-compose.common.yml) isn't sufficient —
# that only unconfines slapd, not the container's entrypoint
# process. The actual switch is at the host-kernel level.
#
# Also ensure docker-compose is >= 2.36.0 — the 2.35.1 shipped on
# some ubuntu-24.04 images has a known `up` regression that
# manifests as random dependency-failed errors under load.
run: |
echo "Before: docker compose version"
docker compose version || true

# Disable the AppArmor user-namespace restriction. Idempotent;
# safe if the key doesn't exist (older kernel).
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true

# If docker-compose is older than 2.36.0, install a newer one to
# the user's cli-plugins dir (takes precedence over the system copy).
CURRENT=$(docker compose version --short 2>/dev/null || echo "0.0.0")
NEED="2.36.0"
if [ "$(printf '%s\n' "$NEED" "$CURRENT" | sort -V | head -n1)" != "$NEED" ]; then
echo "Upgrading docker-compose from ${CURRENT} to 2.39.1"
mkdir -p "$HOME/.docker/cli-plugins"
curl -SL -o "$HOME/.docker/cli-plugins/docker-compose" \
"https://github.com/docker/compose/releases/download/v2.39.1/docker-compose-linux-x86_64"
chmod +x "$HOME/.docker/cli-plugins/docker-compose"
fi

echo "After: docker compose version"
docker compose version
- name: ci/restore-playwright-image-cache
# Cache the Playwright Docker image tar by the SHA of the files that pin
# its version. Cache busts automatically when either file is edited to bump
# the version. Avoids repeated MCR pulls which are frequently blocked by
# Microsoft's CDN ("The request is blocked").
id: playwright-image-cache
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: /tmp/playwright-docker-image.tar
key: playwright-docker-image-${{ hashFiles('e2e-tests/.ci/server.generate.sh', '.github/workflows/e2e-tests-playwright-template.yml') }}-${{ runner.os }}
- name: ci/pre-pull-playwright-image
# Load from cache when available; pull from MCR only on cache miss.
# A single pull attempt is enough because the image is saved to the cache
# tar for all future runs — no need for a retry loop.
run: |
set -euo pipefail
IMAGE="mcr.microsoft.com/playwright:v1.59.1-noble"
TAR="/tmp/playwright-docker-image.tar"
if [ -f "${TAR}" ]; then
echo "Loading Playwright image from GitHub Actions cache"
docker load --input "${TAR}"
else
echo "Cache miss — pulling from MCR"
docker pull "${IMAGE}"
echo "Saving image to cache for future runs"
docker save "${IMAGE}" --output "${TAR}"
fi
- name: ci/run-tests
run: |
make cloud-init
make
- name: ci/cloud-teardown
if: always()
run: make cloud-teardown
- name: ci/dump-docker-state-on-failure
# Always run a final docker-state capture so failures unrelated to
# openldap startup (e.g. server container later crashes) still produce
# logs we can inspect. The script's own retry loop dumps openldap
# state per-attempt; this step is a backstop covering the whole job.
if: failure()
run: |
set +e
DIAG="e2e-tests/docker-diagnostics/job-failure"
mkdir -p "$DIAG"
docker ps -a >"$DIAG/docker.ps.txt" 2>&1
docker version >"$DIAG/docker.version.txt" 2>&1
docker info >"$DIAG/docker.info.txt" 2>&1
for c in $(docker ps -a --format '{{.Names}}'); do
docker inspect "$c" >"$DIAG/$c.inspect.json" 2>&1
docker logs "$c" >"$DIAG/$c.log" 2>&1
done
uname -a >"$DIAG/host.uname.txt" 2>&1
free -m >"$DIAG/host.free.txt" 2>&1
df -h >"$DIAG/host.df.txt" 2>&1
sudo dmesg | tail -500 >"$DIAG/host.dmesg.tail.txt" 2>&1
sudo dmesg | grep -iE 'apparmor|denied|oom|killed|openldap|slapd' >"$DIAG/host.dmesg.relevant.txt" 2>&1
- name: ci/upload-docker-diagnostics
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: always()
with:
name: docker-diagnostics-playwright-${{ inputs.test_type }}-${{ inputs.server_edition }}-${{ matrix.worker_index }}
path: e2e-tests/docker-diagnostics/
retention-days: 7
if-no-files-found: ignore
- name: ci/upload-results
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: always()
Expand Down Expand Up @@ -250,69 +346,21 @@ jobs:
id: record-end-time
run: echo "end_time=$(date +%s)" >> $GITHUB_OUTPUT

run-failed-tests:
runs-on: ubuntu-24.04
timeout-minutes: 30
needs:
- run-tests
- calculate-results
if: >-
always() &&
needs.calculate-results.result == 'success' &&
needs.calculate-results.outputs.failed != '0' &&
fromJSON(needs.calculate-results.outputs.failed_specs_count) <= 20
defaults:
run:
working-directory: e2e-tests
env:
SERVER: "${{ inputs.server }}"
MM_LICENSE: "${{ secrets.MM_LICENSE }}"
ENABLED_DOCKER_SERVICES: "${{ inputs.enabled_docker_services }}"
TEST: playwright
BRANCH: "${{ inputs.branch }}-${{ inputs.test_type }}-retest"
BUILD_ID: "${{ inputs.build_id }}-retest"
steps:
- name: ci/checkout-repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ inputs.commit_sha }}
fetch-depth: 0
- name: ci/setup-node
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
with:
node-version-file: ".nvmrc"
cache: npm
cache-dependency-path: "e2e-tests/playwright/package-lock.json"
- name: ci/get-webapp-node-modules
working-directory: webapp
run: make node_modules
- name: ci/run-failed-specs
env:
SPEC_FILES: ${{ needs.calculate-results.outputs.failed_specs }}
run: |
echo "Retesting failed specs: $SPEC_FILES"
make cloud-init
make start-server run-specs
- name: ci/cloud-teardown
if: always()
run: make cloud-teardown
- name: ci/upload-retest-results
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: always()
with:
name: playwright-${{ inputs.test_type }}-${{ inputs.server_edition }}-retest-results
path: |
e2e-tests/playwright/logs/
e2e-tests/playwright/results/
retention-days: 5
# NB: retries for failing specs happen INLINE inside each shard's
# `ci/run-tests` step (see e2e-tests/.ci/server.run_playwright.sh).
# That reuses the already-running server+docker stack instead of
# paying ~4-7 min to provision a fresh one here, and it correctly
# handles the chrome + chrome-serial project split. The old
# standalone `run-failed-tests` job was removed because it was
# invoking `--project=chrome` against specs that only exist in
# chrome-serial, causing the retest to run zero tests.

report:
runs-on: ubuntu-24.04
needs:
- generate-test-variables
- run-tests
- calculate-results
- run-failed-tests
if: always() && needs.calculate-results.result == 'success'
outputs:
passed: "${{ steps.final-results.outputs.passed }}"
Expand All @@ -335,28 +383,23 @@ jobs:
cache: npm
cache-dependency-path: "e2e-tests/playwright/package-lock.json"

# Download merged results (uploaded by calculate-results)
# Download merged results (uploaded by calculate-results). These blob
# reports already include the inline per-shard retry results, so no
# separate retest download/merge is needed here.
- name: ci/download-results
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
with:
name: playwright-${{ inputs.test_type }}-${{ inputs.server_edition }}-results
path: e2e-tests/playwright/results/

# Download retest results (only if retest ran)
- name: ci/download-retest-results
if: needs.run-failed-tests.result != 'skipped'
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
with:
name: playwright-${{ inputs.test_type }}-${{ inputs.server_edition }}-retest-results
path: e2e-tests/playwright/retest-results/

# Calculate results (with optional merge of retest results)
# Calculate final results. Tests that failed in the first pass but
# passed on inline retry are reported as `flaky`, not `failed`, so
# no retest-results-path is needed.
- name: ci/calculate-results
id: final-results
uses: ./.github/actions/calculate-playwright-results
with:
original-results-path: e2e-tests/playwright/results/reporter/results.json
retest-results-path: ${{ needs.run-failed-tests.result != 'skipped' && 'e2e-tests/playwright/retest-results/results/reporter/results.json' || '' }}

- name: ci/aws-configure
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0
Expand Down Expand Up @@ -394,9 +437,7 @@ jobs:
id: duration
env:
START_TIME: ${{ needs.generate-test-variables.outputs.start_time }}
FIRST_PASS_END_TIME: ${{ needs.calculate-results.outputs.end_time }}
RETEST_RESULT: ${{ needs.run-failed-tests.result }}
RETEST_SPEC_COUNT: ${{ needs.calculate-results.outputs.failed_specs_count }}
FLAKY_COUNT: ${{ steps.final-results.outputs.flaky }}
TEST_DURATION: ${{ steps.final-results.outputs.test_duration }}
run: |
NOW=$(date +%s)
Expand All @@ -405,33 +446,22 @@ jobs:
SECONDS=$((ELAPSED % 60))
DURATION="${MINUTES}m ${SECONDS}s"

# Compute first-pass and re-run durations
FIRST_PASS_ELAPSED=$((FIRST_PASS_END_TIME - START_TIME))
FP_MIN=$((FIRST_PASS_ELAPSED / 60))
FP_SEC=$((FIRST_PASS_ELAPSED % 60))
FIRST_PASS="${FP_MIN}m ${FP_SEC}s"

if [ "$RETEST_RESULT" != "skipped" ]; then
RERUN_ELAPSED=$((NOW - FIRST_PASS_END_TIME))
RR_MIN=$((RERUN_ELAPSED / 60))
RR_SEC=$((RERUN_ELAPSED % 60))
RUN_BREAKDOWN=" (first-pass: ${FIRST_PASS}, re-run: ${RR_MIN}m ${RR_SEC}s)"
else
RUN_BREAKDOWN=""
fi

# Duration icons: >20m high alert, >15m warning, otherwise clock
# Duration icons: >20m high alert, >15m warning, otherwise clock.
# Retries now happen inline per-shard, so there's no separate
# first-pass/re-run breakdown — the shard wall-clock already
# includes any retries it needed.
if [ "$MINUTES" -ge 20 ]; then
DURATION_DISPLAY=":rotating_light: ${DURATION}${RUN_BREAKDOWN} | test: ${TEST_DURATION}"
DURATION_DISPLAY=":rotating_light: ${DURATION} | test: ${TEST_DURATION}"
elif [ "$MINUTES" -ge 15 ]; then
DURATION_DISPLAY=":warning: ${DURATION}${RUN_BREAKDOWN} | test: ${TEST_DURATION}"
DURATION_DISPLAY=":warning: ${DURATION} | test: ${TEST_DURATION}"
else
DURATION_DISPLAY=":clock3: ${DURATION}${RUN_BREAKDOWN} | test: ${TEST_DURATION}"
DURATION_DISPLAY=":clock3: ${DURATION} | test: ${TEST_DURATION}"
fi

# Retest indicator with spec count
if [ "$RETEST_RESULT" != "skipped" ]; then
RETEST_DISPLAY=":repeat: re-run ${RETEST_SPEC_COUNT} spec(s)"
# Flaky indicator: tests that failed first pass but passed on
# inline retry. Signals retries did run.
if [ -n "$FLAKY_COUNT" ] && [ "$FLAKY_COUNT" -gt 0 ] 2>/dev/null; then
RETEST_DISPLAY=":repeat: ${FLAKY_COUNT} flaky"
else
RETEST_DISPLAY=""
fi
Expand Down Expand Up @@ -505,7 +535,6 @@ jobs:
COMMIT_STATUS_MESSAGE: ${{ steps.final-results.outputs.commit_status_message }}
FAILED_TESTS: ${{ steps.final-results.outputs.failed_tests }}
DURATION_DISPLAY: ${{ steps.duration.outputs.duration_display }}
RETEST_RESULT: ${{ needs.run-failed-tests.result }}
run: |
{
echo "## E2E Test Results - Playwright ${TEST_TYPE}"
Expand Down Expand Up @@ -537,10 +566,9 @@ jobs:
echo "| commit_status_message | ${COMMIT_STATUS_MESSAGE} |"
echo "| failed_specs | ${FAILED_SPECS:-none} |"
echo "| duration | ${DURATION_DISPLAY} |"
if [ "$RETEST_RESULT" != "skipped" ]; then
echo "| retested | Yes |"
else
echo "| retested | No |"
# Flaky > 0 means some tests needed the inline retry to pass.
if [ -n "$FLAKY" ] && [ "$FLAKY" -gt 0 ] 2>/dev/null; then
echo "| retried (flaky) | ${FLAKY} |"
fi

echo ""
Expand Down
Loading
Loading