Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 64 additions & 9 deletions .github/workflows/coreutils-args-drift.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ name: Coreutils Args Drift
# The PR's intermediate commits are bot-authored (this is automated drift
# detection, not a code change). Squash-merge it as a human so the merge
# commit lands under a real identity per AGENTS.md commit-attribution rules.
#
# Security boundary: uutils source is third-party input. Regeneration,
# build, and tests run without repository write permission and without
# persisted checkout credentials. Only the later PR job gets write
# permission, and it only commits generated files after tests pass.

on:
schedule:
Expand All @@ -19,8 +24,7 @@ on:
workflow_dispatch:

permissions:
contents: write
pull-requests: write
contents: read

env:
CARGO_TERM_COLOR: always
Expand All @@ -31,17 +35,27 @@ jobs:
name: Regenerate uutils argument surfaces
runs-on: ubuntu-latest
timeout-minutes: 20
permissions:
contents: read
outputs:
drifted: ${{ steps.drift.outputs.drifted }}
pinned: ${{ steps.rev.outputs.pinned }}
rev: ${{ steps.regen.outputs.rev }}
utils: ${{ steps.regen.outputs.utils }}
modules: ${{ steps.regen_modules.outputs.modules }}
steps:
- name: Checkout bashkit
uses: actions/checkout@v6
with:
path: bashkit
persist-credentials: false

- name: Checkout uutils/coreutils
uses: actions/checkout@v6
with:
repository: uutils/coreutils
path: uutils
persist-credentials: false
# Full history so we can checkout the pinned rev below; depth=1
# would only have HEAD.
fetch-depth: 0
Expand Down Expand Up @@ -173,6 +187,47 @@ jobs:
BASHKIT_RUN_COREUTILS_DIFF: '1'
run: cargo test -p bashkit --test coreutils_differential_tests

- name: Check for generated drift
id: drift
working-directory: bashkit
run: |
set -euo pipefail
if [ -z "$(git status --porcelain -- crates/bashkit/src/builtins/generated)" ]; then
echo "drifted=false" >> "$GITHUB_OUTPUT"
else
echo "drifted=true" >> "$GITHUB_OUTPUT"
fi

- name: Upload generated drift
if: steps.drift.outputs.drifted == 'true'
uses: actions/upload-artifact@v4
with:
name: coreutils-generated-drift
path: bashkit/crates/bashkit/src/builtins/generated/
if-no-files-found: error

open-pr:
name: Open drift pull request
needs: regenerate
if: needs.regenerate.outputs.drifted == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout bashkit
uses: actions/checkout@v6
with:
path: bashkit
persist-credentials: false

- name: Download generated drift
uses: actions/download-artifact@v4
with:
name: coreutils-generated-drift
path: bashkit/crates/bashkit/src/builtins/generated/

- name: Open pull request if drifted
uses: peter-evans/create-pull-request@v7
with:
Expand All @@ -186,19 +241,19 @@ jobs:
chore(builtins): sync uutils/coreutils argument surfaces and vendored modules

Regenerated by `bashkit-coreutils-port` against
uutils/coreutils@${{ steps.regen.outputs.rev }}.
uutils/coreutils@${{ needs.regenerate.outputs.rev }}.

Utilities: ${{ steps.regen.outputs.utils }}
Vendored modules: ${{ steps.regen_modules.outputs.modules }}
Utilities: ${{ needs.regenerate.outputs.utils }}
Vendored modules: ${{ needs.regenerate.outputs.modules }}
body: |
Automated regeneration of argument surfaces and vendored
modules ported from
[uutils/coreutils](https://github.com/uutils/coreutils).

**Source revision:** `${{ steps.regen.outputs.rev }}`
(was `${{ steps.rev.outputs.pinned }}`)
**Utilities:** ${{ steps.regen.outputs.utils }}
**Vendored modules:** ${{ steps.regen_modules.outputs.modules }}
**Source revision:** `${{ needs.regenerate.outputs.rev }}`
(was `${{ needs.regenerate.outputs.pinned }}`)
**Utilities:** ${{ needs.regenerate.outputs.utils }}
**Vendored modules:** ${{ needs.regenerate.outputs.modules }}
**Trigger:** `.github/workflows/coreutils-args-drift.yml`

This PR bumps `UUTILS_REVISION` in
Expand Down
198 changes: 197 additions & 1 deletion crates/bashkit-coreutils-port/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ use anyhow::{Context, Result, anyhow, bail};
use proc_macro2::TokenStream;
use quote::quote;
use syn::visit_mut::{self, VisitMut};
use syn::{Expr, ExprCall, ExprMacro, Item, ItemFn, ItemImpl, ItemMod, LitStr, Type, parse_quote};
use syn::{
Expr, ExprCall, ExprMacro, Item, ItemFn, ItemImpl, ItemMod, LitStr, Stmt, Type, parse_quote,
};

pub fn run(uutils_dir: &Path, util: &str, rev: &str) -> Result<String> {
let src_path = uutils_dir
Expand Down Expand Up @@ -136,6 +138,8 @@ pub fn run(uutils_dir: &Path, util: &str, rev: &str) -> Result<String> {
);
}

validate_uu_app_body(&uu_app)?;

let cmd_fn_name = syn::Ident::new(&format!("{util}_command"), proc_macro2::Span::call_site());
uu_app.sig.ident = cmd_fn_name.clone();
let uu_app_block = &uu_app.block;
Expand Down Expand Up @@ -638,6 +642,109 @@ fn collect_option_constants(file: &syn::File) -> Vec<Item> {
.collect()
}

// THREAT[TM-INF-025]: args-mode consumes third-party Rust from uutils.
// Only a single clap builder expression may become executable bashkit
// code; prefix statements would run during CI/parser construction.
fn validate_uu_app_body(uu_app: &ItemFn) -> Result<()> {
let command_expr = match uu_app.block.stmts.as_slice() {
[Stmt::Expr(expr, None)] => expr,
stmts => bail!(
"unsafe uu_app body: expected a single clap Command builder expression, found {} statements",
stmts.len()
),
};

let mut validator = UuAppExprValidator { errors: vec![] };
syn::visit::Visit::visit_expr(&mut validator, command_expr);
if !validator.errors.is_empty() {
bail!("unsafe uu_app body: {}", validator.errors.join("; "));
}
if !chain_roots_at_command_new(command_expr) {
bail!("unsafe uu_app body: tail expression must be a clap Command::new(...) builder chain");
}
Ok(())
}

struct UuAppExprValidator {
errors: Vec<String>,
}

impl<'ast> syn::visit::Visit<'ast> for UuAppExprValidator {
fn visit_expr_block(&mut self, node: &'ast syn::ExprBlock) {
self.errors.push(format!(
"block expression is not allowed in command builder: {}",
quote::quote!(#node)
));
}

fn visit_expr_unsafe(&mut self, node: &'ast syn::ExprUnsafe) {
self.errors.push(format!(
"unsafe block is not allowed in command builder: {}",
quote::quote!(#node)
));
}

fn visit_expr_async(&mut self, node: &'ast syn::ExprAsync) {
self.errors.push(format!(
"async block is not allowed in command builder: {}",
quote::quote!(#node)
));
}

fn visit_expr_loop(&mut self, node: &'ast syn::ExprLoop) {
self.errors.push(format!(
"loop is not allowed in command builder: {}",
quote::quote!(#node)
));
}

fn visit_expr_while(&mut self, node: &'ast syn::ExprWhile) {
self.errors.push(format!(
"while is not allowed in command builder: {}",
quote::quote!(#node)
));
}

fn visit_expr_for_loop(&mut self, node: &'ast syn::ExprForLoop) {
self.errors.push(format!(
"for loop is not allowed in command builder: {}",
quote::quote!(#node)
));
}

fn visit_expr_match(&mut self, node: &'ast syn::ExprMatch) {
self.errors.push(format!(
"match is not allowed in command builder: {}",
quote::quote!(#node)
));
}
}

fn chain_roots_at_command_new(mut expr: &Expr) -> bool {
while let Expr::MethodCall(mc) = expr {
expr = &mc.receiver;
}

let Expr::Call(call) = expr else {
return false;
};
path_ends_with_command_new(&call.func)
}

fn path_ends_with_command_new(func: &Expr) -> bool {
let Expr::Path(p) = func else {
return false;
};
let segs = path_segments(&p.path);
let last_two: Vec<&str> = segs
.iter()
.rev()
.take(2)
.map(String::as_str)
.collect::<Vec<_>>();
matches!(last_two.as_slice(), ["new", "Command"])
}

fn find_fn<'a>(file: &'a syn::File, name: &str) -> Option<&'a ItemFn> {
file.items.iter().find_map(|it| match it {
Item::Fn(f) if f.sig.ident == name => Some(f),
Expand Down Expand Up @@ -876,3 +983,92 @@ fn matches_shortcut_value_parser(segs: &[String]) -> bool {
.collect::<Vec<_>>();
matches!(last_two.as_slice(), ["new", "ShortcutValueParser"])
}

#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;

fn fixture(files: &[(&str, &str)]) -> (TempDir, PathBuf) {
let tmp = tempfile::tempdir().unwrap();
let uutils = tmp.path().join("uutils");
for (rel, content) in files {
let path = uutils.join(rel);
fs::create_dir_all(path.parent().unwrap()).unwrap();
fs::write(path, content).unwrap();
}
(tmp, uutils)
}

#[test]
fn rejects_executable_statements_in_uu_app() {
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
std::fs::write("coreutils-port-poc", b"owned").unwrap();
std::process::abort();
Command::new("cat").arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("unsafe uu_app"), "got: {msg}");
}

#[test]
fn accepts_single_command_builder_expression() {
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
Command::new("cat").arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let body = run(&uutils, "cat", "poc").unwrap();
assert!(body.contains("pub fn cat_command() -> clap::Command"));
assert!(body.contains("Command::new(\"cat\")"));
}

#[test]
fn rejects_non_command_tail_expression() {
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
std::process::abort()
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("Command::new"), "got: {msg}");
}
}
21 changes: 18 additions & 3 deletions specs/coreutils-args-port.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,13 @@ codegen**, not by depending on `uu_*` crates at runtime.
empty). Per-builtin opt-in: a builtin chooses whether to wire
through the shim — if it does, every uutils env-default
auto-lights as that option's bashkit support lands.
5. Emits a generated file under
5. Validates the rewritten `uu_app()` before emission: args mode accepts
only a single tail clap `Command` builder expression. Prefix
statements, block expressions, loops, matches, async blocks, and
unsafe blocks are rejected before any generated Rust is written.
This keeps third-party uutils source from smuggling arbitrary
executable statements into `<util>_command()` (TM-INF-025).
6. Emits a generated file under
`crates/bashkit/src/builtins/generated/<util>_args.rs` with a clean
`pub fn <util>_command() -> clap::Command`.

Expand Down Expand Up @@ -297,8 +303,17 @@ and on `workflow_dispatch`. It:
4. Verifies bashkit still builds and the cat/tac spec tests pass.
5. Builds the uutils multicall from the same checkout and runs the
differential harness with `BASHKIT_RUN_COREUTILS_DIFF=1`.
6. Opens a PR with the regenerated files + bumped pin if `git diff` is
non-empty.
6. Uploads generated files as an artifact if `git diff` is non-empty.
7. A separate `open-pr` job downloads that artifact and opens a PR with
the regenerated files + bumped pin.

Security boundary: the regeneration/build/test job treats uutils as
third-party input. It has only `contents: read` permission and both
checkouts set `persist-credentials: false`, so generated Rust can be
compiled and exercised without a repository write token. The later
`open-pr` job has `contents: write`/`pull-requests: write`, but it does
not build or execute the generated code; it only commits the already
tested generated files.

The PR's intermediate commits are bot-authored (this is automated drift
detection, not a code change). Maintainers must **squash-merge as a human**
Expand Down
1 change: 1 addition & 0 deletions specs/threat-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ All execution stays within the virtual interpreter — no OS subprocess is spawn
| TM-INF-022 | Library Debug shapes leak via stderr | Builtins wrapping external libs (jaq, regex, serde_json, semver, chrono, …) format errors with `format!("{:?}", e)` and dump internal struct shapes into stderr — e.g. jq printed `(File { code: "<800 chars of prepended compat-defs source>", path: () }, [("@tsv", Filter(0))])` to LLM agents | Three-layer enforcement, all run by `cargo test`: (1) static — `builtins::tests::no_debug_fmt_in_builtin_source` walks every `crates/bashkit/src/builtins/*.rs` and forbids `{:?}` / `{:#?}` / `{name:?}` (per-line opt-out: `// debug-ok: <reason>`); (2) dynamic per-tool — each tool's `mod tests` calls `bashkit::testing::assert_no_leak` with malformed inputs; (3) fuzz — every cargo-fuzz target plus 5 proptest cases call `bashkit::testing::assert_fuzz_invariants` against random input and check the same invariants on stderr/stdout | **FIXED** |
| TM-INF-023 | jq `halt` / `halt_error` terminate the host process | jaq-std 3.0's `halt(N)` native calls `std::process::exit(exit_code)`. `defs.jq` wraps it with `def halt: halt(0);` and `def halt_error(...): ..., halt(...);`. Untrusted jq filters can invoke `halt` / `halt_error` to exit the entire embedding process, escaping the `ExecResult`-based sandbox boundary and causing process-wide DoS for any caller hosting bashkit. | Strip the upstream `halt` native from `jaq_std::funs::<D>()` and chain in a safe replacement that pops the variable argument and returns `Error::str("halt is disabled in the bashkit sandbox")`. The wrapper defs (`halt`, `halt_error`) still resolve, so callers see a normal jq runtime error (exit 5) instead of the host being killed. Regression tests in `builtins::jq::tests`: `halt_does_not_terminate_host_process`, `halt_with_arg_does_not_terminate_host_process`, `halt_error_does_not_terminate_host_process`. | **FIXED** |
| TM-INF-024 | Host env side-channel via clap `Arg::env(...)` | Clap-based builtins ported from uutils (currently `ls`, with `.env("TABSIZE")` and `.env("TIME_STYLE")` on `uu_app()`) resolve those defaults from `std::env`, not bashkit's `ctx.env`. This breaks the sandbox boundary two ways: (1) presence-probe — a script can tell whether the host process has `TIME_STYLE`/`TABSIZE` set by observing `ls`'s behaviour; (2) availability — a host- or container-set `TIME_STYLE` materialises as a clap value source for an option bashkit hasn't implemented yet, so the unsupported-option gate fires on every plain `ls` and breaks the builtin for unrelated tenants | Four-layer fix: (1) **codegen strips runtime `.env(...)` and harvests it** — `bashkit-coreutils-port` rewrites `.env("FOO")` chained method calls out of every Arg builder it ports AND emits a sidecar `<UTIL>_ENV_DEFAULTS: &[EnvDefault]` table recording each stripped annotation; (2) **virtual-env shim** — `crate::builtins::clap_env::apply_env_defaults` rewrites argv from `ctx.env` (never `std::env`) before `try_get_matches_from`, emulating clap's "argv > env > default" precedence so uutils' env-default UX is preserved without re-opening the host channel; (3) **static guards** — `no_clap_env_in_generated_parsers` forbids runtime `.env(` calls in `generated/*.rs`, and `every_generated_parser_emits_env_defaults_table` enforces every util emits the sidecar (uniform surface, no per-util conditionals); (4) **defence-in-depth** — workspace `clap` dep drops the `env` cargo feature, so a re-introduced `.env(...)` fails to compile rather than silently re-opening the channel. Coverage: ls runtime tests pin both directions — `ls_ignores_host_time_style_and_tabsize` (host `std::env` ignored) and `ls_honors_virtual_env_time_style` (virtual `ctx.env` honoured) | **FIXED** |
| TM-INF-025 | Untrusted generated Rust runs in drift CI with repository write token | The coreutils argument-drift workflow consumes third-party `uutils/coreutils` source, regenerates Rust from `uu_app()`, then builds/tests bashkit. If the generator preserved arbitrary upstream statements and the same job held `contents: write`/`pull-requests: write`, malicious upstream code could execute during parser construction with repository write impact. | Two-layer fix: (1) `bashkit-coreutils-port` validates args-mode `uu_app()` before emission and rejects anything other than a single tail clap `Command` builder expression; regression test `rejects_executable_statements_in_uu_app` proves write+abort payloads fail codegen. (2) `.github/workflows/coreutils-args-drift.yml` separates privilege: regeneration/build/test has only `contents: read` and `persist-credentials: false` on both checkouts; the later `open-pr` job gets write permission but does not build or execute generated Rust, only commits the tested artifact. | **FIXED** |

**TM-INF-022**: Generalizes TM-INF-016 to cover the whole builtin surface.
The originating bug was `builtins/jq/` formatting jaq's compile/parse
Expand Down
Loading