From 72ea87e47498f4c0fd4e7b8250e9692b7f3cbba3 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 8 May 2026 15:10:21 -0500 Subject: [PATCH] fix(ci): isolate coreutils drift external execution --- .github/workflows/coreutils-args-drift.yml | 162 +++++++++++------- .../bashkit/tests/workflow_security_tests.rs | 85 +++++++++ specs/coreutils-args-port.md | 32 ++-- 3 files changed, 201 insertions(+), 78 deletions(-) create mode 100644 crates/bashkit/tests/workflow_security_tests.rs diff --git a/.github/workflows/coreutils-args-drift.yml b/.github/workflows/coreutils-args-drift.yml index 22f90be8..37d6a4bc 100644 --- a/.github/workflows/coreutils-args-drift.yml +++ b/.github/workflows/coreutils-args-drift.yml @@ -38,11 +38,11 @@ jobs: permissions: contents: read outputs: - drifted: ${{ steps.drift.outputs.drifted }} - pinned: ${{ steps.rev.outputs.pinned }} + drifted: ${{ steps.diff.outputs.drifted }} + modules: ${{ steps.regen_modules.outputs.modules }} + old_rev: ${{ steps.rev.outputs.pinned }} rev: ${{ steps.regen.outputs.rev }} utils: ${{ steps.regen.outputs.utils }} - modules: ${{ steps.regen_modules.outputs.modules }} steps: - name: Checkout bashkit uses: actions/checkout@v6 @@ -84,9 +84,11 @@ jobs: echo "::error::could not parse UUTILS_REVISION pin" exit 1 fi + upstream_full="$(git -C "$GITHUB_WORKSPACE/uutils" rev-parse HEAD)" upstream="$(git -C "$GITHUB_WORKSPACE/uutils" rev-parse --short HEAD)" echo "pinned=$pinned" >> "$GITHUB_OUTPUT" echo "upstream=$upstream" >> "$GITHUB_OUTPUT" + echo "upstream_full=$upstream_full" >> "$GITHUB_OUTPUT" echo "Pinned: $pinned Upstream HEAD: $upstream" # Drift workflow's job is to track upstream — codegen + harness @@ -94,7 +96,7 @@ jobs: # auto-PR bumps the pin AND the regenerated files atomically. - name: Check out uutils at upstream HEAD for codegen working-directory: uutils - run: git checkout "${{ steps.rev.outputs.upstream }}" + run: git checkout --detach "${{ steps.rev.outputs.upstream_full }}" - name: Regenerate argument surfaces id: regen @@ -187,27 +189,31 @@ jobs: BASHKIT_RUN_COREUTILS_DIFF: '1' run: cargo test -p bashkit --test coreutils_differential_tests - - name: Check for generated drift - id: drift + - name: Capture drift patch + id: diff working-directory: bashkit run: | set -euo pipefail - if [ -z "$(git status --porcelain -- crates/bashkit/src/builtins/generated)" ]; then + patch="$RUNNER_TEMP/coreutils-drift.patch" + if git diff --quiet -- crates/bashkit/src/builtins/generated; then echo "drifted=false" >> "$GITHUB_OUTPUT" - else - echo "drifted=true" >> "$GITHUB_OUTPUT" + : > "$patch" + exit 0 fi + git diff --binary -- crates/bashkit/src/builtins/generated > "$patch" + echo "drifted=true" >> "$GITHUB_OUTPUT" - - name: Upload generated drift - if: steps.drift.outputs.drifted == 'true' + - name: Upload drift patch + if: steps.diff.outputs.drifted == 'true' uses: actions/upload-artifact@v4 with: - name: coreutils-generated-drift - path: bashkit/crates/bashkit/src/builtins/generated/ + name: coreutils-drift-patch + path: ${{ runner.temp }}/coreutils-drift.patch if-no-files-found: error + retention-days: 1 open-pr: - name: Open drift pull request + name: Open pull request if drifted needs: regenerate if: needs.regenerate.outputs.drifted == 'true' runs-on: ubuntu-latest @@ -222,54 +228,86 @@ jobs: path: bashkit persist-credentials: false - - name: Download generated drift + - name: Download drift patch uses: actions/download-artifact@v4 with: - name: coreutils-generated-drift - path: bashkit/crates/bashkit/src/builtins/generated/ + name: coreutils-drift-patch + path: ${{ runner.temp }}/coreutils-drift - - name: Open pull request if drifted - uses: peter-evans/create-pull-request@v7 - with: - path: bashkit - branch: chore/coreutils-args-drift - delete-branch: true - # Bump the pin AND the regenerated args/module files in one atomic PR. - add-paths: crates/bashkit/src/builtins/generated/ - title: 'chore: sync uutils/coreutils argument surfaces and vendored modules' - commit-message: | - chore(builtins): sync uutils/coreutils argument surfaces and vendored modules - - Regenerated by `bashkit-coreutils-port` against - uutils/coreutils@${{ needs.regenerate.outputs.rev }}. - - Utilities: ${{ needs.regenerate.outputs.utils }} - Vendored modules: ${{ needs.regenerate.outputs.modules }} - body: | - Automated regeneration of argument surfaces and vendored - modules ported from - [uutils/coreutils](https://github.com/uutils/coreutils). - - **Source revision:** `${{ needs.regenerate.outputs.rev }}` - (was `${{ needs.regenerate.outputs.pinned }}`) - **Utilities:** ${{ needs.regenerate.outputs.utils }} - **Vendored modules:** ${{ needs.regenerate.outputs.modules }} - **Trigger:** `.github/workflows/coreutils-args-drift.yml` - - This PR bumps `UUTILS_REVISION` in - `crates/bashkit/src/builtins/generated/mod.rs`, the matching - `_args.rs` files, and any vendored module trees - atomically — args codegen, module-vendor mode, and the - body-drift harness all stay aligned to the same rev. - - Review the diff to confirm whether upstream: - - Added flags we should now expose - - Removed/renamed flags we should drop or migrate - - Reworded help text in user-visible ways - - Changed body code in vendored modules (e.g. `format/`) - - **Squash-merge as a human** so the merge commit is attributed - correctly per `AGENTS.md`. Intermediate bot commits in this PR - are discarded by squash. - - Design: [`specs/coreutils-args-port.md`](../tree/main/specs/coreutils-args-port.md). + - name: Apply drift patch + working-directory: bashkit + run: | + set -euo pipefail + branch="chore/coreutils-args-drift" + git checkout -B "$branch" + git apply --index "$RUNNER_TEMP/coreutils-drift/coreutils-drift.patch" + bad_paths="$(git diff --cached --name-only \ + | grep -v '^crates/bashkit/src/builtins/generated/' || true)" + if [ -n "$bad_paths" ]; then + echo "::error::drift patch touched paths outside generated coreutils files" + echo "$bad_paths" + exit 1 + fi + + - name: Commit and open PR + working-directory: bashkit + env: + GH_TOKEN: ${{ github.token }} + NEW_REV: ${{ needs.regenerate.outputs.rev }} + OLD_REV: ${{ needs.regenerate.outputs.old_rev }} + UTILS: ${{ needs.regenerate.outputs.utils }} + MODULES: ${{ needs.regenerate.outputs.modules }} + run: | + set -euo pipefail + branch="chore/coreutils-args-drift" + title="chore: sync uutils/coreutils argument surfaces and vendored modules" + body_file="$(mktemp)" + + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + gh auth setup-git + git commit -m "chore(builtins): sync uutils/coreutils argument surfaces and vendored modules" \ + -m "Regenerated by \`bashkit-coreutils-port\` against uutils/coreutils@$NEW_REV." \ + -m "Utilities: $UTILS" \ + -m "Vendored modules: $MODULES" + git push --force-with-lease --set-upstream origin "$branch" + + cat > "$body_file" <_args.rs\` files, and any vendored module trees + atomically — args codegen, module-vendor mode, and the + body-drift harness all stay aligned to the same rev. + + Review the diff to confirm whether upstream: + - Added flags we should now expose + - Removed/renamed flags we should drop or migrate + - Reworded help text in user-visible ways + - Changed body code in vendored modules (e.g. \`format/\`) + + **Squash-merge as a human** so the merge commit is attributed + correctly per \`AGENTS.md\`. Intermediate bot commits in this PR + are discarded by squash. + + Design: [\`specs/coreutils-args-port.md\`](../tree/main/specs/coreutils-args-port.md). + EOF + + if gh pr view "$branch" --repo "$GITHUB_REPOSITORY" >/dev/null 2>&1; then + gh pr edit "$branch" --repo "$GITHUB_REPOSITORY" \ + --title "$title" \ + --body-file "$body_file" + else + gh pr create --repo "$GITHUB_REPOSITORY" \ + --base main \ + --head "$branch" \ + --title "$title" \ + --body-file "$body_file" + fi diff --git a/crates/bashkit/tests/workflow_security_tests.rs b/crates/bashkit/tests/workflow_security_tests.rs new file mode 100644 index 00000000..9f34e146 --- /dev/null +++ b/crates/bashkit/tests/workflow_security_tests.rs @@ -0,0 +1,85 @@ +//! Security regression tests for repository workflows. +//! +//! These tests keep high-impact CI credential boundaries explicit. The +//! coreutils drift workflow intentionally executes code from uutils/coreutils, +//! so that work must stay in a read-only job with persisted checkout +//! credentials disabled. + +use std::fs; +use std::path::{Path, PathBuf}; + +fn repo_root() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .ancestors() + .nth(2) + .expect("crate must live under repo/crates/bashkit") + .to_path_buf() +} + +fn workflow(name: &str) -> String { + let path = repo_root().join(".github/workflows").join(name); + fs::read_to_string(path).expect("read workflow") +} + +fn section_between<'a>(text: &'a str, start: &str, end: &str) -> &'a str { + let start_idx = text.find(start).expect("section start"); + let rest = &text[start_idx..]; + let end_idx = rest.find(end).expect("section end"); + &rest[..end_idx] +} + +#[test] +fn coreutils_drift_executes_external_code_only_in_read_only_job() { + let wf = workflow("coreutils-args-drift.yml"); + let regenerate = section_between(&wf, " regenerate:", " open-pr:"); + + assert!( + regenerate.contains("permissions:\n contents: read"), + "uutils checkout/build/test job must not have repository write permissions" + ); + assert!( + !regenerate.contains("contents: write") && !regenerate.contains("pull-requests: write"), + "external-code job must not request write scopes" + ); + assert!( + regenerate.matches("persist-credentials: false").count() >= 2, + "both bashkit and uutils checkouts must disable persisted credentials" + ); + assert!( + regenerate.contains("cargo build --release --bin coreutils"), + "test must cover the job that executes the external uutils build" + ); + assert!( + regenerate.contains("BASHKIT_RUN_COREUTILS_DIFF: '1'"), + "test must cover the job that executes the external uutils binary" + ); +} + +#[test] +fn coreutils_drift_write_job_does_not_execute_uutils_or_third_party_pr_action() { + let wf = workflow("coreutils-args-drift.yml"); + let open_pr = wf + .split_once(" open-pr:") + .map(|(_, section)| section) + .expect("open-pr job"); + + assert!( + open_pr.contains("permissions:\n contents: write\n pull-requests: write"), + "PR publication job owns the minimal write scopes" + ); + assert!( + !open_pr.contains("repository: uutils/coreutils") + && !open_pr.contains("working-directory: uutils") + && !open_pr.contains("cargo build") + && !open_pr.contains("BASHKIT_RUN_COREUTILS_DIFF"), + "write-scoped job must not checkout, build, or execute uutils" + ); + assert!( + !wf.contains("peter-evans/create-pull-request"), + "write-scoped PR creation must not depend on a third-party action" + ); + assert!( + open_pr.contains("gh pr create") || open_pr.contains("gh pr edit"), + "write-scoped job should publish with GitHub CLI" + ); +} diff --git a/specs/coreutils-args-port.md b/specs/coreutils-args-port.md index 27cd396c..07f63566 100644 --- a/specs/coreutils-args-port.md +++ b/specs/coreutils-args-port.md @@ -285,35 +285,35 @@ match_pinned_uutils_revision` asserts every `_args.rs` header references the same rev as the constant. Manual partial-regenerations that forget to bump (or mis-bump) the pin fail in CI. -The drift workflow always runs against upstream HEAD and bumps -`UUTILS_REVISION` together with the regenerated files in one PR — the -two never diverge across an auto-PR boundary. +The drift workflow always resolves upstream HEAD to a concrete commit, +checks out that commit detached, and bumps `UUTILS_REVISION` together +with the regenerated files in one PR — the two never diverge across an +auto-PR boundary. ## CI guard `.github/workflows/coreutils-args-drift.yml` runs weekly (Mondays 05:00 UTC) and on `workflow_dispatch`. It: -1. Checks out bashkit and `uutils/coreutils` side-by-side. +1. Checks out bashkit and `uutils/coreutils` side-by-side in a read-only + job with checkout credential persistence disabled. 2. Reads the current pin from `generated/mod.rs` and checks out uutils - at upstream HEAD for the regen. + at the resolved upstream HEAD commit for the regen. 3. Runs `bashkit-coreutils-port` against every `pub mod _args;` line in `crates/bashkit/src/builtins/generated/mod.rs` and bumps `UUTILS_REVISION` to the rev it just generated against. 4. Verifies bashkit still builds and the cat/tac spec tests pass. 5. Builds the uutils multicall from the same checkout and runs the differential harness with `BASHKIT_RUN_COREUTILS_DIFF=1`. -6. Uploads generated files as an artifact if `git diff` is non-empty. -7. A separate `open-pr` job downloads that artifact and opens a PR with - the regenerated files + bumped pin. - -Security boundary: the regeneration/build/test job treats uutils as -third-party input. It has only `contents: read` permission and both -checkouts set `persist-credentials: false`, so generated Rust can be -compiled and exercised without a repository write token. The later -`open-pr` job has `contents: write`/`pull-requests: write`, but it does -not build or execute the generated code; it only commits the already -tested generated files. +6. Uploads a binary git patch for `crates/bashkit/src/builtins/generated/` + if `git diff` is non-empty. +7. Runs a separate write-scoped PR job that checks out bashkit without + persisted credentials, applies only generated-file changes from that + patch, commits them, and opens or updates the drift PR with `gh`. + +The read-only job is the only job that builds or executes code from +`uutils/coreutils`. The write-scoped PR job must not checkout, build, or +execute uutils code, and must not use third-party PR creation actions. The PR's intermediate commits are bot-authored (this is automated drift detection, not a code change). Maintainers must **squash-merge as a human**