code · pull · May 9, 2026 · May 8, 2026
diff --git a/.github/workflows/e2e-tests-ci-template.yml b/.github/workflows/e2e-tests-ci-template.yml
@@ -244,6 +244,44 @@ jobs:
           node-version-file: ".nvmrc"
           cache: npm
           cache-dependency-path: ${{ needs.generate-build-variables.outputs.node-cache-dependency-path }}
+      - name: ci/runner-prep-for-openldap
+        # Observed failure: "dependency failed to start: container
+        # mmserver-openldap-1 exited (1)" on ubuntu-24.04 runners — kills
+        # every LDAP spec on the affected shard.
+        #
+        # Ubuntu 24.04 introduced an AppArmor profile that restricts the
+        # creation of unprivileged user namespaces. The osixia/openldap
+        # image's internal init scripts rely on this capability; blocking
+        # it produces an immediate exit(1) with no useful stderr. The
+        # container's own security_opt: apparmor:unconfined is not
+        # sufficient — that only unconfines slapd, not the container's
+        # entrypoint process. The actual switch is at the host-kernel level.
+        #
+        # Also ensure docker-compose is >= 2.36.0 — the 2.35.1 shipped on
+        # some ubuntu-24.04 images has a known `up` regression that
+        # manifests as random dependency-failed errors under load.
+        run: |
+          echo "Before: docker compose version"
+          docker compose version || true
+
+          # Disable the AppArmor user-namespace restriction. Idempotent;
+          # safe if the key doesn't exist (older kernel).
+          sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true
+
+          # If docker-compose is older than 2.36.0, install a newer one to
+          # the user's cli-plugins dir (takes precedence over the system copy).
+          CURRENT=$(docker compose version --short 2>/dev/null || echo "0.0.0")
+          NEED="2.36.0"
+          if [ "$(printf '%s\n' "$NEED" "$CURRENT" | sort -V | head -n1)" != "$NEED" ]; then
+            echo "Upgrading docker-compose from ${CURRENT} to 2.39.1"
+            mkdir -p "$HOME/.docker/cli-plugins"
+            curl -SL -o "$HOME/.docker/cli-plugins/docker-compose" \
+              "https://github.com/docker/compose/releases/download/v2.39.1/docker-compose-linux-x86_64"
+            chmod +x "$HOME/.docker/cli-plugins/docker-compose"
+          fi
+
+          echo "After: docker compose version"
+          docker compose version
       - name: ci/e2e-test
         run: |
           make cloud-init
@@ -272,6 +310,36 @@ jobs:
       - name: ci/cloud-teardown
         if: always()
         run: make cloud-teardown
+      - name: ci/dump-docker-state-on-failure
+        # Always run a final docker-state capture so failures unrelated to
+        # openldap startup (e.g. server container later crashes) still produce
+        # logs we can inspect. The script's own retry loop dumps openldap
+        # state per-attempt; this step is a backstop covering the whole job.
+        if: failure()
+        run: |
+          set +e
+          DIAG="e2e-tests/docker-diagnostics/job-failure"
+          mkdir -p "$DIAG"
+          docker ps -a >"$DIAG/docker.ps.txt" 2>&1
+          docker version >"$DIAG/docker.version.txt" 2>&1
+          docker info >"$DIAG/docker.info.txt" 2>&1
+          for c in $(docker ps -a --format '{{.Names}}'); do
+            docker inspect "$c" >"$DIAG/$c.inspect.json" 2>&1
+            docker logs "$c" >"$DIAG/$c.log" 2>&1
+          done
+          uname -a >"$DIAG/host.uname.txt" 2>&1
+          free -m >"$DIAG/host.free.txt" 2>&1
+          df -h >"$DIAG/host.df.txt" 2>&1
+          sudo dmesg | tail -500 >"$DIAG/host.dmesg.tail.txt" 2>&1
+          sudo dmesg | grep -iE 'apparmor|denied|oom|killed|openldap|slapd' >"$DIAG/host.dmesg.relevant.txt" 2>&1
+      - name: ci/upload-docker-diagnostics
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        if: always()
+        with:
+          name: docker-diagnostics-${{ inputs.TEST }}-${{ matrix.os }}-${{ matrix.worker_index }}
+          path: e2e-tests/docker-diagnostics/
+          retention-days: 7
+          if-no-files-found: ignore
       - name: ci/e2e-test-store-results
         uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
         if: always()

diff --git a/.github/workflows/e2e-tests-playwright-template.yml b/.github/workflows/e2e-tests-playwright-template.yml
@@ -136,7 +136,7 @@ jobs:
 
   run-tests:
     runs-on: ubuntu-24.04
-    timeout-minutes: 30
+    timeout-minutes: 60
     continue-on-error: true
     needs:
       - generate-test-variables
@@ -173,13 +173,109 @@ jobs:
       - name: ci/get-webapp-node-modules
         working-directory: webapp
         run: make node_modules
+      - name: ci/runner-prep-for-openldap
+        # Observed failure: "dependency failed to start: container
+        # mmserver-openldap-1 exited (1)" on ubuntu-24.04 runners — kills
+        # every ABAC/LDAP spec on the affected shard.
+        #
+        # Ubuntu 24.04 introduced an AppArmor profile that restricts the
+        # creation of unprivileged user namespaces. The osixia/openldap
+        # image's internal init scripts rely on this capability; blocking
+        # it produces an immediate exit(1) with no useful stderr. The
+        # container's own security_opt: apparmor:unconfined (already set
+        # in server/build/docker-compose.common.yml) isn't sufficient —
+        # that only unconfines slapd, not the container's entrypoint
+        # process. The actual switch is at the host-kernel level.
+        #
+        # Also ensure docker-compose is >= 2.36.0 — the 2.35.1 shipped on
+        # some ubuntu-24.04 images has a known `up` regression that
+        # manifests as random dependency-failed errors under load.
+        run: |
+          echo "Before: docker compose version"
+          docker compose version || true
+
+          # Disable the AppArmor user-namespace restriction. Idempotent;
+          # safe if the key doesn't exist (older kernel).
+          sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true
+
+          # If docker-compose is older than 2.36.0, install a newer one to
+          # the user's cli-plugins dir (takes precedence over the system copy).
+          CURRENT=$(docker compose version --short 2>/dev/null || echo "0.0.0")
+          NEED="2.36.0"
+          if [ "$(printf '%s\n' "$NEED" "$CURRENT" | sort -V | head -n1)" != "$NEED" ]; then
+            echo "Upgrading docker-compose from ${CURRENT} to 2.39.1"
+            mkdir -p "$HOME/.docker/cli-plugins"
+            curl -SL -o "$HOME/.docker/cli-plugins/docker-compose" \
+              "https://github.com/docker/compose/releases/download/v2.39.1/docker-compose-linux-x86_64"
+            chmod +x "$HOME/.docker/cli-plugins/docker-compose"
+          fi
+
+          echo "After: docker compose version"
+          docker compose version
+      - name: ci/restore-playwright-image-cache
+        # Cache the Playwright Docker image tar by the SHA of the files that pin
+        # its version. Cache busts automatically when either file is edited to bump
+        # the version. Avoids repeated MCR pulls which are frequently blocked by
+        # Microsoft's CDN ("The request is blocked").
+        id: playwright-image-cache
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: /tmp/playwright-docker-image.tar
+          key: playwright-docker-image-${{ hashFiles('e2e-tests/.ci/server.generate.sh', '.github/workflows/e2e-tests-playwright-template.yml') }}-${{ runner.os }}
+      - name: ci/pre-pull-playwright-image
+        # Load from cache when available; pull from MCR only on cache miss.
+        # A single pull attempt is enough because the image is saved to the cache
+        # tar for all future runs — no need for a retry loop.
+        run: |
+          set -euo pipefail
+          IMAGE="mcr.microsoft.com/playwright:v1.59.1-noble"
+          TAR="/tmp/playwright-docker-image.tar"
+          if [ -f "${TAR}" ]; then
+            echo "Loading Playwright image from GitHub Actions cache"
+            docker load --input "${TAR}"
+          else
+            echo "Cache miss — pulling from MCR"
+            docker pull "${IMAGE}"
+            echo "Saving image to cache for future runs"
+            docker save "${IMAGE}" --output "${TAR}"
+          fi
       - name: ci/run-tests
         run: |
           make cloud-init
           make
       - name: ci/cloud-teardown
         if: always()
         run: make cloud-teardown
+      - name: ci/dump-docker-state-on-failure
+        # Always run a final docker-state capture so failures unrelated to
+        # openldap startup (e.g. server container later crashes) still produce
+        # logs we can inspect. The script's own retry loop dumps openldap
+        # state per-attempt; this step is a backstop covering the whole job.
+        if: failure()
+        run: |
+          set +e
+          DIAG="e2e-tests/docker-diagnostics/job-failure"
+          mkdir -p "$DIAG"
+          docker ps -a >"$DIAG/docker.ps.txt" 2>&1
+          docker version >"$DIAG/docker.version.txt" 2>&1
+          docker info >"$DIAG/docker.info.txt" 2>&1
+          for c in $(docker ps -a --format '{{.Names}}'); do
+            docker inspect "$c" >"$DIAG/$c.inspect.json" 2>&1
+            docker logs "$c" >"$DIAG/$c.log" 2>&1
+          done
+          uname -a >"$DIAG/host.uname.txt" 2>&1
+          free -m >"$DIAG/host.free.txt" 2>&1
+          df -h >"$DIAG/host.df.txt" 2>&1
+          sudo dmesg | tail -500 >"$DIAG/host.dmesg.tail.txt" 2>&1
+          sudo dmesg | grep -iE 'apparmor|denied|oom|killed|openldap|slapd' >"$DIAG/host.dmesg.relevant.txt" 2>&1
+      - name: ci/upload-docker-diagnostics
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        if: always()
+        with:
+          name: docker-diagnostics-playwright-${{ inputs.test_type }}-${{ inputs.server_edition }}-${{ matrix.worker_index }}
+          path: e2e-tests/docker-diagnostics/
+          retention-days: 7
+          if-no-files-found: ignore
       - name: ci/upload-results
         uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
         if: always()
@@ -250,69 +346,21 @@ jobs:
         id: record-end-time
         run: echo "end_time=$(date +%s)" >> $GITHUB_OUTPUT
 
-  run-failed-tests:
-    runs-on: ubuntu-24.04
-    timeout-minutes: 30
-    needs:
-      - run-tests
-      - calculate-results
-    if: >-
-      always() &&
-      needs.calculate-results.result == 'success' &&
-      needs.calculate-results.outputs.failed != '0' &&
-      fromJSON(needs.calculate-results.outputs.failed_specs_count) <= 20
-    defaults:
-      run:
-        working-directory: e2e-tests
-    env:
-      SERVER: "${{ inputs.server }}"
-      MM_LICENSE: "${{ secrets.MM_LICENSE }}"
-      ENABLED_DOCKER_SERVICES: "${{ inputs.enabled_docker_services }}"
-      TEST: playwright
-      BRANCH: "${{ inputs.branch }}-${{ inputs.test_type }}-retest"
-      BUILD_ID: "${{ inputs.build_id }}-retest"
-    steps:
-      - name: ci/checkout-repo
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          ref: ${{ inputs.commit_sha }}
-          fetch-depth: 0
-      - name: ci/setup-node
-        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
-        with:
-          node-version-file: ".nvmrc"
-          cache: npm
-          cache-dependency-path: "e2e-tests/playwright/package-lock.json"
-      - name: ci/get-webapp-node-modules
-        working-directory: webapp
-        run: make node_modules
-      - name: ci/run-failed-specs
-        env:
-          SPEC_FILES: ${{ needs.calculate-results.outputs.failed_specs }}
-        run: |
-          echo "Retesting failed specs: $SPEC_FILES"
-          make cloud-init
-          make start-server run-specs
-      - name: ci/cloud-teardown
-        if: always()
-        run: make cloud-teardown
-      - name: ci/upload-retest-results
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
-        if: always()
-        with:
-          name: playwright-${{ inputs.test_type }}-${{ inputs.server_edition }}-retest-results
-          path: |
-            e2e-tests/playwright/logs/
-            e2e-tests/playwright/results/
-          retention-days: 5
+  # NB: retries for failing specs happen INLINE inside each shard's
+  # `ci/run-tests` step (see e2e-tests/.ci/server.run_playwright.sh).
+  # That reuses the already-running server+docker stack instead of
+  # paying ~4-7 min to provision a fresh one here, and it correctly
+  # handles the chrome + chrome-serial project split. The old
+  # standalone `run-failed-tests` job was removed because it was
+  # invoking `--project=chrome` against specs that only exist in
+  # chrome-serial, causing the retest to run zero tests.
 
   report:
     runs-on: ubuntu-24.04
     needs:
       - generate-test-variables
       - run-tests
       - calculate-results
-      - run-failed-tests
     if: always() && needs.calculate-results.result == 'success'
     outputs:
       passed: "${{ steps.final-results.outputs.passed }}"
@@ -335,28 +383,23 @@ jobs:
           cache: npm
           cache-dependency-path: "e2e-tests/playwright/package-lock.json"
 
-      # Download merged results (uploaded by calculate-results)
+      # Download merged results (uploaded by calculate-results). These blob
+      # reports already include the inline per-shard retry results, so no
+      # separate retest download/merge is needed here.
       - name: ci/download-results
         uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
         with:
           name: playwright-${{ inputs.test_type }}-${{ inputs.server_edition }}-results
           path: e2e-tests/playwright/results/
 
-      # Download retest results (only if retest ran)
-      - name: ci/download-retest-results
-        if: needs.run-failed-tests.result != 'skipped'
-        uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
-        with:
-          name: playwright-${{ inputs.test_type }}-${{ inputs.server_edition }}-retest-results
-          path: e2e-tests/playwright/retest-results/
-
-      # Calculate results (with optional merge of retest results)
+      # Calculate final results. Tests that failed in the first pass but
+      # passed on inline retry are reported as `flaky`, not `failed`, so
+      # no retest-results-path is needed.
       - name: ci/calculate-results
         id: final-results
         uses: ./.github/actions/calculate-playwright-results
         with:
           original-results-path: e2e-tests/playwright/results/reporter/results.json
-          retest-results-path: ${{ needs.run-failed-tests.result != 'skipped' && 'e2e-tests/playwright/retest-results/results/reporter/results.json' || '' }}
 
       - name: ci/aws-configure
         uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0
@@ -394,9 +437,7 @@ jobs:
         id: duration
         env:
           START_TIME: ${{ needs.generate-test-variables.outputs.start_time }}
-          FIRST_PASS_END_TIME: ${{ needs.calculate-results.outputs.end_time }}
-          RETEST_RESULT: ${{ needs.run-failed-tests.result }}
-          RETEST_SPEC_COUNT: ${{ needs.calculate-results.outputs.failed_specs_count }}
+          FLAKY_COUNT: ${{ steps.final-results.outputs.flaky }}
           TEST_DURATION: ${{ steps.final-results.outputs.test_duration }}
         run: |
           NOW=$(date +%s)
@@ -405,33 +446,22 @@ jobs:
           SECONDS=$((ELAPSED % 60))
           DURATION="${MINUTES}m ${SECONDS}s"
 
-          # Compute first-pass and re-run durations
-          FIRST_PASS_ELAPSED=$((FIRST_PASS_END_TIME - START_TIME))
-          FP_MIN=$((FIRST_PASS_ELAPSED / 60))
-          FP_SEC=$((FIRST_PASS_ELAPSED % 60))
-          FIRST_PASS="${FP_MIN}m ${FP_SEC}s"
-
-          if [ "$RETEST_RESULT" != "skipped" ]; then
-            RERUN_ELAPSED=$((NOW - FIRST_PASS_END_TIME))
-            RR_MIN=$((RERUN_ELAPSED / 60))
-            RR_SEC=$((RERUN_ELAPSED % 60))
-            RUN_BREAKDOWN=" (first-pass: ${FIRST_PASS}, re-run: ${RR_MIN}m ${RR_SEC}s)"
-          else
-            RUN_BREAKDOWN=""
-          fi
-
-          # Duration icons: >20m high alert, >15m warning, otherwise clock
+          # Duration icons: >20m high alert, >15m warning, otherwise clock.
+          # Retries now happen inline per-shard, so there's no separate
+          # first-pass/re-run breakdown — the shard wall-clock already
+          # includes any retries it needed.
           if [ "$MINUTES" -ge 20 ]; then
-            DURATION_DISPLAY=":rotating_light: ${DURATION}${RUN_BREAKDOWN} | test: ${TEST_DURATION}"
+            DURATION_DISPLAY=":rotating_light: ${DURATION} | test: ${TEST_DURATION}"
           elif [ "$MINUTES" -ge 15 ]; then
-            DURATION_DISPLAY=":warning: ${DURATION}${RUN_BREAKDOWN} | test: ${TEST_DURATION}"
+            DURATION_DISPLAY=":warning: ${DURATION} | test: ${TEST_DURATION}"
           else
-            DURATION_DISPLAY=":clock3: ${DURATION}${RUN_BREAKDOWN} | test: ${TEST_DURATION}"
+            DURATION_DISPLAY=":clock3: ${DURATION} | test: ${TEST_DURATION}"
           fi
 
-          # Retest indicator with spec count
-          if [ "$RETEST_RESULT" != "skipped" ]; then
-            RETEST_DISPLAY=":repeat: re-run ${RETEST_SPEC_COUNT} spec(s)"
+          # Flaky indicator: tests that failed first pass but passed on
+          # inline retry. Signals retries did run.
+          if [ -n "$FLAKY_COUNT" ] && [ "$FLAKY_COUNT" -gt 0 ] 2>/dev/null; then
+            RETEST_DISPLAY=":repeat: ${FLAKY_COUNT} flaky"
           else
             RETEST_DISPLAY=""
           fi
@@ -505,7 +535,6 @@ jobs:
           COMMIT_STATUS_MESSAGE: ${{ steps.final-results.outputs.commit_status_message }}
           FAILED_TESTS: ${{ steps.final-results.outputs.failed_tests }}
           DURATION_DISPLAY: ${{ steps.duration.outputs.duration_display }}
-          RETEST_RESULT: ${{ needs.run-failed-tests.result }}
         run: |
           {
             echo "## E2E Test Results - Playwright ${TEST_TYPE}"
@@ -537,10 +566,9 @@ jobs:
             echo "| commit_status_message | ${COMMIT_STATUS_MESSAGE} |"
             echo "| failed_specs | ${FAILED_SPECS:-none} |"
             echo "| duration | ${DURATION_DISPLAY} |"
-            if [ "$RETEST_RESULT" != "skipped" ]; then
-              echo "| retested | Yes |"
-            else
-              echo "| retested | No |"
+            # Flaky > 0 means some tests needed the inline retry to pass.
+            if [ -n "$FLAKY" ] && [ "$FLAKY" -gt 0 ] 2>/dev/null; then
+              echo "| retried (flaky) | ${FLAKY} |"
             fi
 
             echo ""