From 37f81732b553b03a1950bb864e4cd9c6303af66f Mon Sep 17 00:00:00 2001
From: graysurf <10785178+graysurf@users.noreply.github.com>
Date: Thu, 26 Feb 2026 10:19:55 +0800
Subject: [PATCH] feat(ci): add strict stale-test audit guardrails

- Add A strict CI stale-test audit command with deterministic regression output.
- Add A workspace governance runbook for remove keep rewrite defer decisions.
- Wire A stale-test audit stage into the required-checks entrypoint.
---
 .../nils-cli-verify-required-checks/SKILL.md  |   8 +-
 .../nils-cli-verify-required-checks.sh        |   4 +-
 DEVELOPMENT.md                                |   4 +-
 docs/runbooks/test-cleanup-governance.md      |  52 +++++++
 scripts/ci/docs-placement-audit.sh            |   2 +-
 scripts/ci/test-stale-audit-baseline.tsv      |  15 ++
 scripts/ci/test-stale-audit.sh                | 140 ++++++++++++++++++
 7 files changed, 221 insertions(+), 4 deletions(-)
 create mode 100644 docs/runbooks/test-cleanup-governance.md
 create mode 100644 scripts/ci/test-stale-audit-baseline.tsv
 create mode 100644 scripts/ci/test-stale-audit.sh

diff --git a/.agents/skills/nils-cli-verify-required-checks/SKILL.md b/.agents/skills/nils-cli-verify-required-checks/SKILL.md
index ea2728aa..e4896bfa 100644
--- a/.agents/skills/nils-cli-verify-required-checks/SKILL.md
+++ b/.agents/skills/nils-cli-verify-required-checks/SKILL.md
@@ -20,12 +20,18 @@ Inputs:
 Outputs:
 
 - Runs the required pre-delivery checks from `DEVELOPMENT.md`:
+  - `bash scripts/ci/docs-placement-audit.sh --strict`
+  - `bash scripts/ci/docs-hygiene-audit.sh --strict`
+  - `bash scripts/ci/test-stale-audit.sh --strict`
+  - `bash scripts/ci/completion-asset-audit.sh --strict`
   - `cargo fmt --all -- --check`
   - `cargo clippy --all-targets --all-features -- -D warnings`
   - `cargo test --workspace`
+  - `bash scripts/ci/completion-flag-parity-audit.sh --strict`
   - `zsh -f tests/zsh/completion.test.zsh`
 - In `--docs-only` mode, runs only:
   - `bash scripts/ci/docs-placement-audit.sh --strict`
+  - `bash scripts/ci/docs-hygiene-audit.sh --strict`
 - Prints the failing command (if any) and exits non-zero on failure.
 
 Exit codes:
@@ -37,7 +43,7 @@ Exit codes:
 Failure modes:
 
 - Not in a git work tree (cannot resolve repo root).
-- Missing required tools on `PATH` (`git`, `cargo`, `zsh`).
+- Missing required tools on `PATH` (`git`, `cargo`, `zsh`, `rg`).
 - Any of the required lint/tests fail.
 
 ## Scripts (only entrypoints)
diff --git a/.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh b/.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh
index 208145c3..17062e66 100755
--- a/.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh
+++ b/.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh
@@ -9,6 +9,7 @@ Usage:
 Runs the required pre-delivery checks from DEVELOPMENT.md:
   - bash scripts/ci/docs-placement-audit.sh --strict
   - bash scripts/ci/docs-hygiene-audit.sh --strict
+  - bash scripts/ci/test-stale-audit.sh --strict
   - bash scripts/ci/completion-asset-audit.sh --strict
   - bash scripts/ci/completion-flag-parity-audit.sh --strict
   - cargo fmt --all -- --check
@@ -60,7 +61,7 @@ done
 
 required_cmds=(git)
 if [[ "$docs_only" -eq 0 ]]; then
-  required_cmds+=(cargo zsh)
+  required_cmds+=(cargo zsh rg)
 fi
 
 for cmd in "${required_cmds[@]}"; do
@@ -115,6 +116,7 @@ if [[ "$docs_only" -eq 1 ]]; then
   exit 0
 fi
 
+run bash scripts/ci/test-stale-audit.sh --strict
 coverage_dir="${NILS_CLI_COVERAGE_DIR:-target/coverage}"
 run mkdir -p "$coverage_dir"
 run bash scripts/ci/completion-asset-audit.sh --strict
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index 2daf32b1..e9d1e61c 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -44,6 +44,7 @@
 ## Documentation placement
 
 - Canonical policy: `docs/specs/crate-docs-placement-policy.md`.
+- Stale-test lifecycle and reviewer checklist: `docs/runbooks/test-cleanup-governance.md`.
 - When Markdown files change, run: `bash scripts/ci/docs-placement-audit.sh --strict`.
 - For stale references, transient-doc cleanup, and cross-link hygiene, run:
   `bash scripts/ci/docs-hygiene-audit.sh --strict`.
@@ -64,11 +65,12 @@
 - `bash scripts/ci/completion-flag-parity-audit.sh --strict`
 - `bash scripts/ci/docs-placement-audit.sh --strict`
 - `bash scripts/ci/docs-hygiene-audit.sh --strict`
+- `bash scripts/ci/test-stale-audit.sh --strict`
 - Coverage must be **>= 85.00%** total line coverage:
   - `mkdir -p target/coverage`
   - `cargo llvm-cov nextest --profile ci --workspace --lcov --output-path target/coverage/lcov.info --fail-under-lines 85`
   - `scripts/ci/coverage-summary.sh target/coverage/lcov.info`
-- Or run the single entrypoint for fmt/clippy/tests: `./.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh` (it pre-creates `target/coverage`; still run coverage commands above)
+- Or run the single entrypoint for required checks: `./.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh` (includes docs/completion/stale-test audits plus fmt/clippy/tests; it pre-creates `target/coverage`, but still run coverage commands above)
 - Docs-only fast path: if every changed file is documentation-only (`*.md`, `docs/**`, `crates/*/docs/**`, plus root docs like `README.md`, `DEVELOPMENT.md`), run:
   - `./.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh --docs-only`
   - In this mode, docs placement and docs hygiene checks still run; full workspace lint/test/coverage checks may be skipped.
diff --git a/docs/runbooks/test-cleanup-governance.md b/docs/runbooks/test-cleanup-governance.md
new file mode 100644
index 00000000..1e0f19a3
--- /dev/null
+++ b/docs/runbooks/test-cleanup-governance.md
@@ -0,0 +1,52 @@
+# Test Cleanup Governance
+
+## Purpose
+
+This runbook defines the stale test lifecycle for workspace cleanup and ongoing maintenance.
+Use it to decide whether a candidate should be `remove`, `keep`, `rewrite`, or `defer`, and to keep CI guardrails deterministic.
+
+## Stale Test Lifecycle
+
+1. Discover candidates with `bash scripts/dev/workspace-test-stale-audit.sh`.
+2. Classify each candidate using one decision mode:
+   - `remove`: deterministic stale artifact with replacement coverage already present.
+   - `keep`: still protects active behavior, parity, JSON schema, warning text, color handling, or exit semantics.
+   - `rewrite`: behavior is still needed but the test/helper/fixture implementation is obsolete.
+   - `defer`: evidence is ambiguous (for example macro indirection or reflection risk) and requires manual review.
+3. Validate contract safety before merge.
+4. Update CI baseline only after reviewed cleanup PRs merge.
+
+## Evidence Rules
+
+Before marking a candidate `remove`, include all of the following:
+
+- Candidate path and symbol evidence from stale-test inventory output.
+- Confirmation that `contract-allowlist.tsv` does not protect the candidate path.
+- Replacement test evidence when user-visible behavior could change.
+- Explicit validation command outputs in the PR (`test-stale-audit`, required checks, and coverage gate).
+
+For `rewrite`, document:
+
+- Why the old test/helper is obsolete.
+- Which test now guards the behavior.
+- Which command(s) prove parity/contract behavior still pass.
+
+## CI Guardrails
+
+- `bash scripts/ci/test-stale-audit.sh --strict`
+  - Fails on new orphaned helper regressions relative to `scripts/ci/test-stale-audit-baseline.tsv`.
+  - Fails on deprecated-path leftovers (`deprecated_path_marker`) in the current inventory.
+- `./.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh`
+  - Must pass before delivery.
+- Coverage gate (non-docs changes):
+  - `cargo llvm-cov nextest --profile ci --workspace --lcov --output-path target/coverage/lcov.info --fail-under-lines 85`
+  - `scripts/ci/coverage-summary.sh target/coverage/lcov.info`
+
+## Reviewer Checklist
+
+- Decision mode selected: `remove`, `keep`, `rewrite`, or `defer`.
+- Evidence links include crate/file/symbol and replacement assertions when required.
+- `bash scripts/ci/test-stale-audit.sh --strict` output is clean.
+- Required checks entrypoint passes.
+- Coverage gate result is attached for non-doc changes.
+- Baseline updates are justified and limited to reviewed stale-helper removals.
diff --git a/scripts/ci/docs-placement-audit.sh b/scripts/ci/docs-placement-audit.sh
index 861a8523..2659d778 100755
--- a/scripts/ci/docs-placement-audit.sh
+++ b/scripts/ci/docs-placement-audit.sh
@@ -77,7 +77,7 @@ done
 runbook_is_approved_workspace_file() {
   local base="$1"
   case "$base" in
-    cli-completion-development-standard.md|crates-io-status-script-runbook.md|new-cli-crate-development-standard.md|wrappers-mode-usage.md)
+    cli-completion-development-standard.md|crates-io-status-script-runbook.md|new-cli-crate-development-standard.md|test-cleanup-governance.md|wrappers-mode-usage.md)
       return 0
       ;;
     *)
diff --git a/scripts/ci/test-stale-audit-baseline.tsv b/scripts/ci/test-stale-audit-baseline.tsv
new file mode 100644
index 00000000..cb80ee5d
--- /dev/null
+++ b/scripts/ci/test-stale-audit-baseline.tsv
@@ -0,0 +1,15 @@
+crate	path	symbol_or_test	signal	proposed_action
+agent-docs	crates/agent-docs/tests/common.rs	drop (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_commit_hash_missing_ref_errors (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_commit_hash_outputs_sha_for_head (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_commit_hash_resolves_annotated_tag (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_copy_staged_both_outputs_diff_and_status (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_copy_staged_help_prints_usage (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_copy_staged_no_changes_warns_and_exits_1 (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_copy_staged_rejects_conflicting_modes (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_copy_staged_rejects_unknown_arg (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_copy_staged_stdout_outputs_diff_only (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_root_not_in_repo_errors (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_root_prints_message (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_root_shell_outputs_cd_command (fanout=0)	helper_fanout	remove
+git-cli	crates/git-cli/tests/utils.rs	utils_zip_creates_backup_zip (fanout=0)	helper_fanout	remove
diff --git a/scripts/ci/test-stale-audit.sh b/scripts/ci/test-stale-audit.sh
new file mode 100644
index 00000000..55db6f5b
--- /dev/null
+++ b/scripts/ci/test-stale-audit.sh
@@ -0,0 +1,140 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+usage() {
+  cat <<'USAGE'
+Usage:
+  scripts/ci/test-stale-audit.sh [--strict]
+
+Runs workspace stale-test inventory and detects stale-test regressions:
+  - new orphaned helper candidates (`signal=helper_fanout`, `proposed_action=remove`)
+  - deprecated-path leftovers (`signal=deprecated_path_marker`)
+
+Policy baseline:
+  scripts/ci/test-stale-audit-baseline.tsv
+
+Options:
+  --strict   Treat regressions as hard failures (exit 1)
+  -h, --help Show this help
+USAGE
+}
+
+strict=0
+while [[ $# -gt 0 ]]; do
+  case "${1:-}" in
+    --strict)
+      strict=1
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "error: unknown argument: ${1:-}" >&2
+      usage >&2
+      exit 2
+      ;;
+  esac
+done
+
+repo_root="$(git rev-parse --show-toplevel 2>/dev/null || true)"
+if [[ -z "$repo_root" || ! -d "$repo_root" ]]; then
+  echo "error: must run inside a git work tree" >&2
+  exit 2
+fi
+cd "$repo_root"
+
+if ! command -v rg >/dev/null 2>&1; then
+  echo "error: ripgrep (rg) is required" >&2
+  exit 2
+fi
+
+if [[ -z "${AGENT_HOME:-}" ]]; then
+  if [[ -z "${HOME:-}" ]]; then
+    echo "error: AGENT_HOME is not set and HOME is unavailable" >&2
+    exit 2
+  fi
+  AGENT_HOME="${HOME}/.agents"
+fi
+export AGENT_HOME
+
+baseline_file="scripts/ci/test-stale-audit-baseline.tsv"
+if [[ ! -f "$baseline_file" ]]; then
+  echo "error: missing baseline file: $baseline_file" >&2
+  exit 2
+fi
+
+audit_root="${AGENT_HOME}/out/workspace-test-cleanup"
+inventory_file="${audit_root}/stale-tests.ci.tsv"
+mkdir -p "$audit_root"
+
+tmp_dir="$(mktemp -d "${TMPDIR:-/tmp}/test-stale-audit.XXXXXX")"
+current_orphans="${tmp_dir}/current-orphans.tsv"
+baseline_orphans="${tmp_dir}/baseline-orphans.tsv"
+new_orphans="${tmp_dir}/new-orphans.tsv"
+deprecated_leftovers="${tmp_dir}/deprecated-leftovers.tsv"
+
+cleanup() {
+  rm -rf "$tmp_dir"
+}
+trap cleanup EXIT
+
+bash scripts/dev/workspace-test-stale-audit.sh --out "$inventory_file" >/dev/null
+
+awk -F '\t' '
+  NR > 1 && $5 == "helper_fanout" && $6 == "remove" {
+    print $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6
+  }
+' "$inventory_file" | LC_ALL=C sort -u >"$current_orphans"
+
+awk -F '\t' '
+  NR > 1 {
+    print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5
+  }
+' "$baseline_file" | LC_ALL=C sort -u >"$baseline_orphans"
+
+comm -23 "$current_orphans" "$baseline_orphans" >"$new_orphans"
+
+awk -F '\t' '
+  NR > 1 && $5 == "deprecated_path_marker" {
+    print $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6
+  }
+' "$inventory_file" | LC_ALL=C sort -u >"$deprecated_leftovers"
+
+current_count="$(wc -l <"$current_orphans" | tr -d ' ')"
+baseline_count="$(wc -l <"$baseline_orphans" | tr -d ' ')"
+new_count="$(wc -l <"$new_orphans" | tr -d ' ')"
+deprecated_count="$(wc -l <"$deprecated_leftovers" | tr -d ' ')"
+
+echo "INFO: stale-test audit inventory refreshed at $inventory_file"
+echo "INFO: orphaned helper candidates current=$current_count baseline=$baseline_count new=$new_count"
+echo "INFO: deprecated-path leftovers=$deprecated_count"
+
+report_regressions() {
+  local prefix="$1"
+
+  while IFS=$'\t' read -r crate rel_path symbol signal action; do
+    [[ -z "$crate" ]] && continue
+    echo "${prefix}: stale-test regression type=orphaned-helper crate=${crate} path=${rel_path} symbol=${symbol} signal=${signal} action=${action}"
+  done <"$new_orphans"
+
+  while IFS=$'\t' read -r crate rel_path symbol signal action; do
+    [[ -z "$crate" ]] && continue
+    echo "${prefix}: stale-test regression type=deprecated-path-leftover crate=${crate} path=${rel_path} symbol=${symbol} signal=${signal} action=${action}"
+  done <"$deprecated_leftovers"
+}
+
+if (( new_count > 0 || deprecated_count > 0 )); then
+  if [[ "$strict" -eq 1 ]]; then
+    report_regressions "FAIL"
+    echo "FAIL: stale-test audit (strict=$strict, regressions=$((new_count + deprecated_count)), new_orphans=$new_count, deprecated_leftovers=$deprecated_count)"
+    exit 1
+  fi
+
+  report_regressions "WARN"
+  echo "WARN: stale-test audit (strict=$strict, regressions=$((new_count + deprecated_count)), new_orphans=$new_count, deprecated_leftovers=$deprecated_count)"
+  exit 0
+fi
+
+echo "PASS: stale-test audit (strict=$strict, current_orphans=$current_count, baseline_orphans=$baseline_count, new_orphans=0, deprecated_leftovers=0)"