From 37f81732b553b03a1950bb864e4cd9c6303af66f Mon Sep 17 00:00:00 2001 From: graysurf <10785178+graysurf@users.noreply.github.com> Date: Thu, 26 Feb 2026 10:19:55 +0800 Subject: [PATCH] feat(ci): add strict stale-test audit guardrails - Add A strict CI stale-test audit command with deterministic regression output. - Add A workspace governance runbook for remove keep rewrite defer decisions. - Wire A stale-test audit stage into the required-checks entrypoint. --- .../nils-cli-verify-required-checks/SKILL.md | 8 +- .../nils-cli-verify-required-checks.sh | 4 +- DEVELOPMENT.md | 4 +- docs/runbooks/test-cleanup-governance.md | 52 +++++++ scripts/ci/docs-placement-audit.sh | 2 +- scripts/ci/test-stale-audit-baseline.tsv | 15 ++ scripts/ci/test-stale-audit.sh | 140 ++++++++++++++++++ 7 files changed, 221 insertions(+), 4 deletions(-) create mode 100644 docs/runbooks/test-cleanup-governance.md create mode 100644 scripts/ci/test-stale-audit-baseline.tsv create mode 100644 scripts/ci/test-stale-audit.sh diff --git a/.agents/skills/nils-cli-verify-required-checks/SKILL.md b/.agents/skills/nils-cli-verify-required-checks/SKILL.md index ea2728aa..e4896bfa 100644 --- a/.agents/skills/nils-cli-verify-required-checks/SKILL.md +++ b/.agents/skills/nils-cli-verify-required-checks/SKILL.md @@ -20,12 +20,18 @@ Inputs: Outputs: - Runs the required pre-delivery checks from `DEVELOPMENT.md`: + - `bash scripts/ci/docs-placement-audit.sh --strict` + - `bash scripts/ci/docs-hygiene-audit.sh --strict` + - `bash scripts/ci/test-stale-audit.sh --strict` + - `bash scripts/ci/completion-asset-audit.sh --strict` - `cargo fmt --all -- --check` - `cargo clippy --all-targets --all-features -- -D warnings` - `cargo test --workspace` + - `bash scripts/ci/completion-flag-parity-audit.sh --strict` - `zsh -f tests/zsh/completion.test.zsh` - In `--docs-only` mode, runs only: - `bash scripts/ci/docs-placement-audit.sh --strict` + - `bash scripts/ci/docs-hygiene-audit.sh --strict` - Prints the failing command (if any) and exits non-zero on failure. Exit codes: @@ -37,7 +43,7 @@ Exit codes: Failure modes: - Not in a git work tree (cannot resolve repo root). -- Missing required tools on `PATH` (`git`, `cargo`, `zsh`). +- Missing required tools on `PATH` (`git`, `cargo`, `zsh`, `rg`). - Any of the required lint/tests fail. ## Scripts (only entrypoints) diff --git a/.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh b/.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh index 208145c3..17062e66 100755 --- a/.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh +++ b/.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh @@ -9,6 +9,7 @@ Usage: Runs the required pre-delivery checks from DEVELOPMENT.md: - bash scripts/ci/docs-placement-audit.sh --strict - bash scripts/ci/docs-hygiene-audit.sh --strict + - bash scripts/ci/test-stale-audit.sh --strict - bash scripts/ci/completion-asset-audit.sh --strict - bash scripts/ci/completion-flag-parity-audit.sh --strict - cargo fmt --all -- --check @@ -60,7 +61,7 @@ done required_cmds=(git) if [[ "$docs_only" -eq 0 ]]; then - required_cmds+=(cargo zsh) + required_cmds+=(cargo zsh rg) fi for cmd in "${required_cmds[@]}"; do @@ -115,6 +116,7 @@ if [[ "$docs_only" -eq 1 ]]; then exit 0 fi +run bash scripts/ci/test-stale-audit.sh --strict coverage_dir="${NILS_CLI_COVERAGE_DIR:-target/coverage}" run mkdir -p "$coverage_dir" run bash scripts/ci/completion-asset-audit.sh --strict diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 2daf32b1..e9d1e61c 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -44,6 +44,7 @@ ## Documentation placement - Canonical policy: `docs/specs/crate-docs-placement-policy.md`. +- Stale-test lifecycle and reviewer checklist: `docs/runbooks/test-cleanup-governance.md`. - When Markdown files change, run: `bash scripts/ci/docs-placement-audit.sh --strict`. - For stale references, transient-doc cleanup, and cross-link hygiene, run: `bash scripts/ci/docs-hygiene-audit.sh --strict`. @@ -64,11 +65,12 @@ - `bash scripts/ci/completion-flag-parity-audit.sh --strict` - `bash scripts/ci/docs-placement-audit.sh --strict` - `bash scripts/ci/docs-hygiene-audit.sh --strict` +- `bash scripts/ci/test-stale-audit.sh --strict` - Coverage must be **>= 85.00%** total line coverage: - `mkdir -p target/coverage` - `cargo llvm-cov nextest --profile ci --workspace --lcov --output-path target/coverage/lcov.info --fail-under-lines 85` - `scripts/ci/coverage-summary.sh target/coverage/lcov.info` -- Or run the single entrypoint for fmt/clippy/tests: `./.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh` (it pre-creates `target/coverage`; still run coverage commands above) +- Or run the single entrypoint for required checks: `./.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh` (includes docs/completion/stale-test audits plus fmt/clippy/tests; it pre-creates `target/coverage`, but still run coverage commands above) - Docs-only fast path: if every changed file is documentation-only (`*.md`, `docs/**`, `crates/*/docs/**`, plus root docs like `README.md`, `DEVELOPMENT.md`), run: - `./.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh --docs-only` - In this mode, docs placement and docs hygiene checks still run; full workspace lint/test/coverage checks may be skipped. diff --git a/docs/runbooks/test-cleanup-governance.md b/docs/runbooks/test-cleanup-governance.md new file mode 100644 index 00000000..1e0f19a3 --- /dev/null +++ b/docs/runbooks/test-cleanup-governance.md @@ -0,0 +1,52 @@ +# Test Cleanup Governance + +## Purpose + +This runbook defines the stale test lifecycle for workspace cleanup and ongoing maintenance. +Use it to decide whether a candidate should be `remove`, `keep`, `rewrite`, or `defer`, and to keep CI guardrails deterministic. + +## Stale Test Lifecycle + +1. Discover candidates with `bash scripts/dev/workspace-test-stale-audit.sh`. +2. Classify each candidate using one decision mode: + - `remove`: deterministic stale artifact with replacement coverage already present. + - `keep`: still protects active behavior, parity, JSON schema, warning text, color handling, or exit semantics. + - `rewrite`: behavior is still needed but the test/helper/fixture implementation is obsolete. + - `defer`: evidence is ambiguous (for example macro indirection or reflection risk) and requires manual review. +3. Validate contract safety before merge. +4. Update CI baseline only after reviewed cleanup PRs merge. + +## Evidence Rules + +Before marking a candidate `remove`, include all of the following: + +- Candidate path and symbol evidence from stale-test inventory output. +- Confirmation that `contract-allowlist.tsv` does not protect the candidate path. +- Replacement test evidence when user-visible behavior could change. +- Explicit validation command outputs in the PR (`test-stale-audit`, required checks, and coverage gate). + +For `rewrite`, document: + +- Why the old test/helper is obsolete. +- Which test now guards the behavior. +- Which command(s) prove parity/contract behavior still pass. + +## CI Guardrails + +- `bash scripts/ci/test-stale-audit.sh --strict` + - Fails on new orphaned helper regressions relative to `scripts/ci/test-stale-audit-baseline.tsv`. + - Fails on deprecated-path leftovers (`deprecated_path_marker`) in the current inventory. +- `./.agents/skills/nils-cli-verify-required-checks/scripts/nils-cli-verify-required-checks.sh` + - Must pass before delivery. +- Coverage gate (non-docs changes): + - `cargo llvm-cov nextest --profile ci --workspace --lcov --output-path target/coverage/lcov.info --fail-under-lines 85` + - `scripts/ci/coverage-summary.sh target/coverage/lcov.info` + +## Reviewer Checklist + +- Decision mode selected: `remove`, `keep`, `rewrite`, or `defer`. +- Evidence links include crate/file/symbol and replacement assertions when required. +- `bash scripts/ci/test-stale-audit.sh --strict` output is clean. +- Required checks entrypoint passes. +- Coverage gate result is attached for non-doc changes. +- Baseline updates are justified and limited to reviewed stale-helper removals. diff --git a/scripts/ci/docs-placement-audit.sh b/scripts/ci/docs-placement-audit.sh index 861a8523..2659d778 100755 --- a/scripts/ci/docs-placement-audit.sh +++ b/scripts/ci/docs-placement-audit.sh @@ -77,7 +77,7 @@ done runbook_is_approved_workspace_file() { local base="$1" case "$base" in - cli-completion-development-standard.md|crates-io-status-script-runbook.md|new-cli-crate-development-standard.md|wrappers-mode-usage.md) + cli-completion-development-standard.md|crates-io-status-script-runbook.md|new-cli-crate-development-standard.md|test-cleanup-governance.md|wrappers-mode-usage.md) return 0 ;; *) diff --git a/scripts/ci/test-stale-audit-baseline.tsv b/scripts/ci/test-stale-audit-baseline.tsv new file mode 100644 index 00000000..cb80ee5d --- /dev/null +++ b/scripts/ci/test-stale-audit-baseline.tsv @@ -0,0 +1,15 @@ +crate path symbol_or_test signal proposed_action +agent-docs crates/agent-docs/tests/common.rs drop (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_commit_hash_missing_ref_errors (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_commit_hash_outputs_sha_for_head (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_commit_hash_resolves_annotated_tag (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_copy_staged_both_outputs_diff_and_status (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_copy_staged_help_prints_usage (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_copy_staged_no_changes_warns_and_exits_1 (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_copy_staged_rejects_conflicting_modes (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_copy_staged_rejects_unknown_arg (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_copy_staged_stdout_outputs_diff_only (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_root_not_in_repo_errors (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_root_prints_message (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_root_shell_outputs_cd_command (fanout=0) helper_fanout remove +git-cli crates/git-cli/tests/utils.rs utils_zip_creates_backup_zip (fanout=0) helper_fanout remove diff --git a/scripts/ci/test-stale-audit.sh b/scripts/ci/test-stale-audit.sh new file mode 100644 index 00000000..55db6f5b --- /dev/null +++ b/scripts/ci/test-stale-audit.sh @@ -0,0 +1,140 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'USAGE' +Usage: + scripts/ci/test-stale-audit.sh [--strict] + +Runs workspace stale-test inventory and detects stale-test regressions: + - new orphaned helper candidates (`signal=helper_fanout`, `proposed_action=remove`) + - deprecated-path leftovers (`signal=deprecated_path_marker`) + +Policy baseline: + scripts/ci/test-stale-audit-baseline.tsv + +Options: + --strict Treat regressions as hard failures (exit 1) + -h, --help Show this help +USAGE +} + +strict=0 +while [[ $# -gt 0 ]]; do + case "${1:-}" in + --strict) + strict=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "error: unknown argument: ${1:-}" >&2 + usage >&2 + exit 2 + ;; + esac +done + +repo_root="$(git rev-parse --show-toplevel 2>/dev/null || true)" +if [[ -z "$repo_root" || ! -d "$repo_root" ]]; then + echo "error: must run inside a git work tree" >&2 + exit 2 +fi +cd "$repo_root" + +if ! command -v rg >/dev/null 2>&1; then + echo "error: ripgrep (rg) is required" >&2 + exit 2 +fi + +if [[ -z "${AGENT_HOME:-}" ]]; then + if [[ -z "${HOME:-}" ]]; then + echo "error: AGENT_HOME is not set and HOME is unavailable" >&2 + exit 2 + fi + AGENT_HOME="${HOME}/.agents" +fi +export AGENT_HOME + +baseline_file="scripts/ci/test-stale-audit-baseline.tsv" +if [[ ! -f "$baseline_file" ]]; then + echo "error: missing baseline file: $baseline_file" >&2 + exit 2 +fi + +audit_root="${AGENT_HOME}/out/workspace-test-cleanup" +inventory_file="${audit_root}/stale-tests.ci.tsv" +mkdir -p "$audit_root" + +tmp_dir="$(mktemp -d "${TMPDIR:-/tmp}/test-stale-audit.XXXXXX")" +current_orphans="${tmp_dir}/current-orphans.tsv" +baseline_orphans="${tmp_dir}/baseline-orphans.tsv" +new_orphans="${tmp_dir}/new-orphans.tsv" +deprecated_leftovers="${tmp_dir}/deprecated-leftovers.tsv" + +cleanup() { + rm -rf "$tmp_dir" +} +trap cleanup EXIT + +bash scripts/dev/workspace-test-stale-audit.sh --out "$inventory_file" >/dev/null + +awk -F '\t' ' + NR > 1 && $5 == "helper_fanout" && $6 == "remove" { + print $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6 + } +' "$inventory_file" | LC_ALL=C sort -u >"$current_orphans" + +awk -F '\t' ' + NR > 1 { + print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 + } +' "$baseline_file" | LC_ALL=C sort -u >"$baseline_orphans" + +comm -23 "$current_orphans" "$baseline_orphans" >"$new_orphans" + +awk -F '\t' ' + NR > 1 && $5 == "deprecated_path_marker" { + print $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6 + } +' "$inventory_file" | LC_ALL=C sort -u >"$deprecated_leftovers" + +current_count="$(wc -l <"$current_orphans" | tr -d ' ')" +baseline_count="$(wc -l <"$baseline_orphans" | tr -d ' ')" +new_count="$(wc -l <"$new_orphans" | tr -d ' ')" +deprecated_count="$(wc -l <"$deprecated_leftovers" | tr -d ' ')" + +echo "INFO: stale-test audit inventory refreshed at $inventory_file" +echo "INFO: orphaned helper candidates current=$current_count baseline=$baseline_count new=$new_count" +echo "INFO: deprecated-path leftovers=$deprecated_count" + +report_regressions() { + local prefix="$1" + + while IFS=$'\t' read -r crate rel_path symbol signal action; do + [[ -z "$crate" ]] && continue + echo "${prefix}: stale-test regression type=orphaned-helper crate=${crate} path=${rel_path} symbol=${symbol} signal=${signal} action=${action}" + done <"$new_orphans" + + while IFS=$'\t' read -r crate rel_path symbol signal action; do + [[ -z "$crate" ]] && continue + echo "${prefix}: stale-test regression type=deprecated-path-leftover crate=${crate} path=${rel_path} symbol=${symbol} signal=${signal} action=${action}" + done <"$deprecated_leftovers" +} + +if (( new_count > 0 || deprecated_count > 0 )); then + if [[ "$strict" -eq 1 ]]; then + report_regressions "FAIL" + echo "FAIL: stale-test audit (strict=$strict, regressions=$((new_count + deprecated_count)), new_orphans=$new_count, deprecated_leftovers=$deprecated_count)" + exit 1 + fi + + report_regressions "WARN" + echo "WARN: stale-test audit (strict=$strict, regressions=$((new_count + deprecated_count)), new_orphans=$new_count, deprecated_leftovers=$deprecated_count)" + exit 0 +fi + +echo "PASS: stale-test audit (strict=$strict, current_orphans=$current_count, baseline_orphans=$baseline_count, new_orphans=0, deprecated_leftovers=0)"