From 1f6c356a7bcddac13034368dc26ede654546704b Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Tue, 5 May 2026 22:41:48 +0000 Subject: [PATCH 1/3] refactor(builtins): migrate readlink to codegen-ported argument surface readlink moves from handwritten char-by-char short-flag parsing to the bashkit-coreutils-port codegen pipeline. Replaces the manual parser (~50 LoC of `match arg.as_str()` plus a combined-flag loop) with `readlink_command().try_get_matches_from(...)`. Codegen tool extension: bashkit-coreutils-port now also handles utils that declare option keys as module-level `const OPT_FOO: &str = ...` or `static OPT_FOO` rather than wrapping them in `mod options { }`. The two patterns coexist in uutils (cat/tac/truncate/stat/shuf use the mod, mktemp/realpath/readlink/od use bare consts); the tool now emits whichever the source uses without requiring per-util branches in the bashkit codebase. Behaviour fixes accompanying the migration: - `-n` / `--no-newline` now suppresses the trailing terminator on the last operand (previously silently accepted as a no-op). - `-z` / `--zero` switches the terminator to NUL (previously rejected as unknown). - Combined short flags like `-fn` now parse natively via clap; the manual `for ch in s[1..].chars()` loop is gone. - Unknown flags now exit with clap's exit code 2 instead of the custom code 1; documented as a clap-vs-GNU divergence in the spec test (`### bash_diff`), matching cat / tac / truncate / shuf. Tests: - `crates/bashkit/tests/spec_cases/bash/readlink.test.sh` covers `-f`/`-m`/`-e`, combined short flags, long form, and unknown-flag rejection. - `generated_args_headers_match_pinned_uutils_revision` continues to pass against the existing pin (39364b6). Spec: implementation-status.md adds the new test row. Part of #1532. --- crates/bashkit-coreutils-port/src/main.rs | 65 ++++++++- crates/bashkit/src/builtins/generated/mod.rs | 1 + .../src/builtins/generated/readlink_args.rs | 125 ++++++++++++++++++ crates/bashkit/src/builtins/path.rs | 106 ++++++++------- .../tests/spec_cases/bash/readlink.test.sh | 55 ++++++++ specs/implementation-status.md | 1 + 6 files changed, 304 insertions(+), 49 deletions(-) create mode 100644 crates/bashkit/src/builtins/generated/readlink_args.rs create mode 100644 crates/bashkit/tests/spec_cases/bash/readlink.test.sh diff --git a/crates/bashkit-coreutils-port/src/main.rs b/crates/bashkit-coreutils-port/src/main.rs index 6d7f383e..db62f56c 100644 --- a/crates/bashkit-coreutils-port/src/main.rs +++ b/crates/bashkit-coreutils-port/src/main.rs @@ -77,8 +77,26 @@ fn run(uutils_dir: &Path, util: &str, rev: &str) -> Result { let translations = parse_ftl(&ftl); let file = syn::parse_file(&src).context("parse uutils source as rust")?; - let options_mod = find_mod(&file, "options") - .ok_or_else(|| anyhow!("could not find `mod options` in {}", src_path.display()))?; + // Two option-key declaration styles in the wild: + // 1. `mod options { pub static FOO: &str = "foo"; ... }` (cat, tac, + // truncate, stat, shuf). uu_app() refers to keys via + // `options::FOO`. + // 2. Module-level `const OPT_FOO: &str = "foo";` / `static OPT_FOO` + // (mktemp, realpath, readlink, od). uu_app() refers to keys by + // bare name (`OPT_FOO`). + // + // Collect both: the optional `options` mod (if present) and any + // bare-name `OPT_*` / `ARG_*` constants we should also emit so + // uu_app's bare-name references resolve. + let options_mod = find_mod(&file, "options").cloned(); + let bare_name_consts = collect_option_constants(&file); + if options_mod.is_none() && bare_name_consts.is_empty() { + bail!( + "could not find `mod options` or any module-level `OPT_*`/`ARG_*` \ + constants in {}", + src_path.display() + ); + } let mut uu_app = find_fn(&file, "uu_app") .ok_or_else(|| anyhow!("could not find `fn uu_app` in {}", src_path.display()))? .clone(); @@ -129,7 +147,18 @@ fn run(uutils_dir: &Path, util: &str, rev: &str) -> Result { // Original uutils licensed MIT; see THIRD_PARTY_LICENSES.\n\n", ); - let options_tokens = options_mod; + // Optional `mod options { ... }` (cat-style); collapses to nothing + // for utils that use bare-name constants. + let options_mod_tokens: TokenStream = match options_mod { + Some(m) => quote!(#m), + None => quote!(), + }; + // Bare-name constants for utils that don't wrap them in a mod. + let const_tokens: Vec = bare_name_consts + .into_iter() + .map(|c| quote::quote!(#c)) + .collect(); + let body: TokenStream = quote! { #![allow(unused_imports, dead_code)] @@ -144,7 +173,8 @@ fn run(uutils_dir: &Path, util: &str, rev: &str) -> Result { use std::ops::RangeInclusive; use std::str::FromStr; - #options_tokens + #options_mod_tokens + #(#const_tokens)* /// Vendored stand-in for `uucore::format_usage`. /// @@ -269,6 +299,33 @@ fn find_mod<'a>(file: &'a syn::File, name: &str) -> Option<&'a ItemMod> { }) } +/// Collect module-level `const OPT_FOO: &str = ...` / `static OPT_FOO` +/// declarations that some uutils sources use in place of a +/// `mod options { ... }`. uu_app() bodies in these utils refer to the +/// constants by their bare name (`Arg::new(OPT_FOO)`); we emit them +/// at the same scope in the generated file so the bare-name references +/// resolve unchanged. +/// +/// Filter: name must start with `OPT_` or `ARG_` to avoid sweeping in +/// unrelated source-level constants. +fn collect_option_constants(file: &syn::File) -> Vec { + file.items + .iter() + .filter(|it| match it { + Item::Const(c) => { + let n = c.ident.to_string(); + n.starts_with("OPT_") || n.starts_with("ARG_") + } + Item::Static(s) => { + let n = s.ident.to_string(); + n.starts_with("OPT_") || n.starts_with("ARG_") + } + _ => false, + }) + .cloned() + .collect() +} + fn find_fn<'a>(file: &'a syn::File, name: &str) -> Option<&'a ItemFn> { file.items.iter().find_map(|it| match it { Item::Fn(f) if f.sig.ident == name => Some(f), diff --git a/crates/bashkit/src/builtins/generated/mod.rs b/crates/bashkit/src/builtins/generated/mod.rs index 1edf44fb..90ed6f51 100644 --- a/crates/bashkit/src/builtins/generated/mod.rs +++ b/crates/bashkit/src/builtins/generated/mod.rs @@ -28,6 +28,7 @@ pub const UUTILS_REVISION: &str = "39364b6"; pub mod cat_args; +pub mod readlink_args; pub mod shuf_args; pub mod tac_args; pub mod truncate_args; diff --git a/crates/bashkit/src/builtins/generated/readlink_args.rs b/crates/bashkit/src/builtins/generated/readlink_args.rs new file mode 100644 index 00000000..57fbf806 --- /dev/null +++ b/crates/bashkit/src/builtins/generated/readlink_args.rs @@ -0,0 +1,125 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uu/readlink/ +// Regenerate: cargo run -p bashkit-coreutils-port -- readlink +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +#![allow(unused_imports, dead_code)] +use clap::{Arg, ArgAction, Command, builder::ValueParser}; +use std::ffi::OsString; +use std::ops::RangeInclusive; +use std::str::FromStr; +const OPT_CANONICALIZE: &str = "canonicalize"; +const OPT_CANONICALIZE_MISSING: &str = "canonicalize-missing"; +const OPT_CANONICALIZE_EXISTING: &str = "canonicalize-existing"; +const OPT_NO_NEWLINE: &str = "no-newline"; +const OPT_QUIET: &str = "quiet"; +const OPT_SILENT: &str = "silent"; +const OPT_VERBOSE: &str = "verbose"; +const OPT_ZERO: &str = "zero"; +const ARG_FILES: &str = "files"; +/// Vendored stand-in for `uucore::format_usage`. +/// +/// Upstream wraps the usage line with stylized "Usage:" prefix logic +/// driven by uucore's locale stack. For our purposes the raw string +/// is enough; clap's `override_usage` accepts the literal as-is. +fn format_usage(s: &str) -> String { + s.to_string() +} +pub fn readlink_command() -> Command { + Command::new("readlink") + .version(env!("CARGO_PKG_VERSION")) + .about( + ::std::string::String::from( + "Print value of a symbolic link or canonical file name.", + ), + ) + .override_usage( + format_usage(&::std::string::String::from("readlink [OPTION]... [FILE]...")), + ) + .infer_long_args(true) + .arg( + Arg::new(OPT_CANONICALIZE) + .short('f') + .long(OPT_CANONICALIZE) + .help( + ::std::string::String::from( + "canonicalize by following every symlink in every component of the given name recursively; all but the last component must exist", + ), + ) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(OPT_CANONICALIZE_EXISTING) + .short('e') + .long("canonicalize-existing") + .help( + ::std::string::String::from( + "canonicalize by following every symlink in every component of the given name recursively, all components must exist", + ), + ) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(OPT_CANONICALIZE_MISSING) + .short('m') + .long(OPT_CANONICALIZE_MISSING) + .help( + ::std::string::String::from( + "canonicalize by following every symlink in every component of the given name recursively, without requirements on components existence", + ), + ) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(OPT_NO_NEWLINE) + .short('n') + .long(OPT_NO_NEWLINE) + .help( + ::std::string::String::from("do not output the trailing delimiter"), + ) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(OPT_QUIET) + .short('q') + .long(OPT_QUIET) + .help(::std::string::String::from("suppress most error messages")) + .overrides_with_all([OPT_QUIET, OPT_SILENT, OPT_VERBOSE]) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(OPT_SILENT) + .short('s') + .long(OPT_SILENT) + .help(::std::string::String::from("suppress most error messages")) + .overrides_with_all([OPT_QUIET, OPT_SILENT, OPT_VERBOSE]) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(OPT_VERBOSE) + .short('v') + .long(OPT_VERBOSE) + .help(::std::string::String::from("report error message")) + .overrides_with_all([OPT_QUIET, OPT_SILENT, OPT_VERBOSE]) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(OPT_ZERO) + .short('z') + .long(OPT_ZERO) + .help( + ::std::string::String::from( + "separate output with NUL rather than newline", + ), + ) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(ARG_FILES) + .action(ArgAction::Append) + .value_parser(clap::value_parser!(OsString)) + .value_hint(clap::ValueHint::AnyPath), + ) +} diff --git a/crates/bashkit/src/builtins/path.rs b/crates/bashkit/src/builtins/path.rs index 4e9b715a..f7123dcb 100644 --- a/crates/bashkit/src/builtins/path.rs +++ b/crates/bashkit/src/builtins/path.rs @@ -173,6 +173,10 @@ impl Builtin for Realpath { /// The readlink builtin - print resolved symbolic links or canonical file names. /// +/// Argument surface is generated from uutils/coreutils' `uu_app()` via the +/// `bashkit-coreutils-port` codegen tool — see `generated/readlink_args.rs`. +/// Behaviour stays local against the bashkit VFS. +/// /// Usage: readlink [-f|-m|-e] FILE... /// /// Options: @@ -186,49 +190,52 @@ pub struct Readlink; impl Builtin for Readlink { #[allow(clippy::collapsible_if)] async fn execute(&self, ctx: Context<'_>) -> Result { - if let Some(r) = super::check_help_version( - ctx.args, - "Usage: readlink [-f|-m|-e] FILE...\nPrint resolved symbolic links or canonical file names.\n\n -f\tcanonicalize by following every symlink; all but last component must exist\n -m\tcanonicalize without requiring components to exist\n -e\tcanonicalize; all components must exist\n -n\tdo not output the trailing newline\n --help\tdisplay this help and exit\n --version\toutput version information and exit\n", - Some("readlink (bashkit) 0.1"), - ) { - return Ok(r); - } - if ctx.args.is_empty() { - return Ok(ExecResult::err( - "readlink: missing operand\n".to_string(), - 1, - )); - } - - let mut mode = ReadlinkMode::Raw; - let mut files: Vec<&str> = Vec::new(); - - for arg in ctx.args { - match arg.as_str() { - "-f" => mode = ReadlinkMode::Canonicalize, - "-m" => mode = ReadlinkMode::CanonicalizeMissing, - "-e" => mode = ReadlinkMode::CanonicalizeExisting, - "-n" | "-v" | "-q" | "-s" | "--no-newline" => { /* silently accept */ } - s if s.starts_with('-') && s.len() > 1 && !s.starts_with("--") => { - // Could be combined flags like -fn - for ch in s[1..].chars() { - match ch { - 'f' => mode = ReadlinkMode::Canonicalize, - 'm' => mode = ReadlinkMode::CanonicalizeMissing, - 'e' => mode = ReadlinkMode::CanonicalizeExisting, - 'n' | 'v' | 'q' | 's' => {} - _ => { - return Ok(ExecResult::err( - format!("readlink: invalid option -- '{}'\n", ch), - 1, - )); - } - } - } + let argv: Vec = std::iter::once(std::ffi::OsString::from("readlink")) + .chain(ctx.args.iter().map(std::ffi::OsString::from)) + .collect(); + + let cmd = super::generated::readlink_args::readlink_command() + .help_template("Usage: {usage}\n{about}\n\n{all-args}\n"); + let matches = match cmd.try_get_matches_from(argv) { + Ok(m) => m, + Err(e) => { + let kind = e.kind(); + let rendered = e.render().to_string(); + if matches!( + kind, + clap::error::ErrorKind::DisplayHelp | clap::error::ErrorKind::DisplayVersion + ) { + return Ok(ExecResult::ok(rendered)); } - _ => files.push(arg), + return Ok(ExecResult::err(rendered, 2)); } - } + }; + + // -e/-m/-f are mutually exclusive in spirit; check most-restrictive + // first, matching uutils' precedence. + let mode = if matches.get_flag("canonicalize-existing") { + ReadlinkMode::CanonicalizeExisting + } else if matches.get_flag("canonicalize-missing") { + ReadlinkMode::CanonicalizeMissing + } else if matches.get_flag("canonicalize") { + ReadlinkMode::Canonicalize + } else { + ReadlinkMode::Raw + }; + + // -n suppresses the trailing terminator entirely; -z swaps it + // to NUL. Both can come from the codegen-generated args now + // that the parser handles them; the previous handwritten path + // silently accepted -n as a no-op, so honoring it is a strict + // improvement. + let suppress_terminator = matches.get_flag("no-newline"); + let zero_terminated = matches.get_flag("zero"); + let terminator: char = if zero_terminated { '\0' } else { '\n' }; + + let files: Vec = matches + .get_many::("files") + .map(|vs| vs.map(|v| v.to_string_lossy().into_owned()).collect()) + .unwrap_or_default(); if files.is_empty() { return Ok(ExecResult::err( @@ -239,9 +246,12 @@ impl Builtin for Readlink { let mut output = String::new(); let mut exit_code = 0; + let total_files = files.len(); - for file in &files { + for (idx, file) in files.iter().enumerate() { let resolved = super::resolve_path(ctx.cwd, file); + let is_last = idx + 1 == total_files; + let needs_terminator = !(suppress_terminator && is_last); match mode { ReadlinkMode::Raw => { @@ -249,7 +259,9 @@ impl Builtin for Readlink { match ctx.fs.read_link(&resolved).await { Ok(target) => { output.push_str(&target.to_string_lossy()); - output.push('\n'); + if needs_terminator { + output.push(terminator); + } } Err(_) => { exit_code = 1; @@ -274,13 +286,17 @@ impl Builtin for Readlink { } } output.push_str(&resolved.to_string_lossy()); - output.push('\n'); + if needs_terminator { + output.push(terminator); + } } ReadlinkMode::CanonicalizeExisting => { // -e: all components must exist if ctx.fs.exists(&resolved).await.unwrap_or(false) { output.push_str(&resolved.to_string_lossy()); - output.push('\n'); + if needs_terminator { + output.push(terminator); + } } else { exit_code = 1; } diff --git a/crates/bashkit/tests/spec_cases/bash/readlink.test.sh b/crates/bashkit/tests/spec_cases/bash/readlink.test.sh new file mode 100644 index 00000000..af448475 --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/readlink.test.sh @@ -0,0 +1,55 @@ +### readlink_canonicalize_f +# -f: canonicalize, all but last component must exist +mkdir -p /tmp/rl_f +touch /tmp/rl_f/file.txt +readlink -f /tmp/rl_f/file.txt +### expect +/tmp/rl_f/file.txt +### end + +### readlink_canonicalize_missing_m +# -m: canonicalize without requiring components to exist +readlink -m /tmp/rl_missing/path +### expect +/tmp/rl_missing/path +### end + +### readlink_canonicalize_existing_e +# -e: all components must exist +mkdir -p /tmp/rl_e +touch /tmp/rl_e/file.txt +readlink -e /tmp/rl_e/file.txt +### expect +/tmp/rl_e/file.txt +### end + +### readlink_combined_short_flags +# Codegen-driven clap accepts combined short flags like -fn (-f + -n). +# We don't yet honor -n trailing-newline suppression, but parsing must +# not fail and -f's canonicalization must still apply. +mkdir -p /tmp/rl_fn +touch /tmp/rl_fn/file.txt +readlink -fn /tmp/rl_fn/file.txt +echo +### expect +/tmp/rl_fn/file.txt +### end + +### readlink_long_form +# Long-form flag from generated args surface. +mkdir -p /tmp/rl_long +touch /tmp/rl_long/file.txt +readlink --canonicalize /tmp/rl_long/file.txt +### expect +/tmp/rl_long/file.txt +### end + +### readlink_unknown_flag_rejected +# Clap rejects unknown flags with exit 2; GNU readlink exits 1. +# bash_diff captures the well-known clap-vs-GNU divergence. +### bash_diff +readlink --bogus /tmp/whatever +echo "exit=$?" +### expect +exit=2 +### end diff --git a/specs/implementation-status.md b/specs/implementation-status.md index ae6f9124..0936599a 100644 --- a/specs/implementation-status.md +++ b/specs/implementation-status.md @@ -204,6 +204,7 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. | procsub.test.sh | 14 | process substitution | | quote.test.sh | 42 | quoting edge cases | | read-builtin.test.sh | 16 | `read` builtin, IFS splitting, `-r`, `-a` (array), `-n` (nchars), here-string | +| readlink.test.sh | 6 | codegen-ported args, `-f`/`-m`/`-e` modes, `-fn` combined short flags, long form, unknown-flag rejection | | recursive-cmdsub.test.sh | 3 | recursive function calls inside `$()` command substitution | | regex-limit.test.sh | 1 | regex size limits in sed, grep, awk | | replace_pattern_limit.test.sh | 3 | global pattern replacement result size cap | From 0307bade179a67f0c0c6f2766ec6d564673737e5 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Tue, 5 May 2026 22:58:50 +0000 Subject: [PATCH 2/3] test(readlink): update unit test for clap exit-code + flag space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `test_readlink_invalid_option` previously asserted exit 1 with stderr containing "invalid option" — the GNU/handwritten convention. After the codegen migration, clap exits 2 with "unexpected argument". Additionally, -z is now a valid flag (zero-terminate output), so the test must pick a flag that is genuinely not part of the ported surface. The clap-vs-GNU exit-code divergence is already documented as a \`### bash_diff\` row in the spec test; mirror that in the unit assertion. Same fix as the prior cat/tac/truncate/shuf migrations. Part of #1532. --- crates/bashkit/src/builtins/path.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/crates/bashkit/src/builtins/path.rs b/crates/bashkit/src/builtins/path.rs index f7123dcb..64025ab6 100644 --- a/crates/bashkit/src/builtins/path.rs +++ b/crates/bashkit/src/builtins/path.rs @@ -566,9 +566,22 @@ mod tests { #[tokio::test] async fn test_readlink_invalid_option() { + // The codegen-ported argument surface uses clap, which exits 2 + // (not the GNU coreutils convention of 1) on unknown flags. + // The clap-vs-GNU exit-code divergence is documented in + // `tests/spec_cases/bash/readlink.test.sh` (### bash_diff). + // -z is a valid flag now (zero-terminate output), so the test + // uses a string that is plainly not a flag bashkit ports. let fs = Arc::new(InMemoryFs::new()) as Arc; - let result = run_readlink_with_fs(&["-z", "/file"], fs).await; - assert_eq!(result.exit_code, 1); - assert!(result.stderr.contains("invalid option")); + let result = run_readlink_with_fs(&["--definitely-not-a-flag", "/file"], fs).await; + assert_eq!(result.exit_code, 2); + let stderr_lower = result.stderr.to_lowercase(); + assert!( + stderr_lower.contains("unexpected argument") + || stderr_lower.contains("unknown argument") + || stderr_lower.contains("invalid option"), + "expected clap unknown-flag stderr, got {:?}", + result.stderr + ); } } From ac8daf38ddf12cc7920bcc84b6c761dda6d9bd0d Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Tue, 5 May 2026 23:15:07 +0000 Subject: [PATCH 3/3] fix(readlink): use Display in test assertion to satisfy TM-INF-022 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix used `{:?}` which trips `no_debug_fmt_in_builtin_source`. Switch to `{}` — the asserted value is a String for which Display reads cleanly. --- crates/bashkit/src/builtins/path.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/bashkit/src/builtins/path.rs b/crates/bashkit/src/builtins/path.rs index 64025ab6..766ab041 100644 --- a/crates/bashkit/src/builtins/path.rs +++ b/crates/bashkit/src/builtins/path.rs @@ -580,7 +580,7 @@ mod tests { stderr_lower.contains("unexpected argument") || stderr_lower.contains("unknown argument") || stderr_lower.contains("invalid option"), - "expected clap unknown-flag stderr, got {:?}", + "expected clap unknown-flag stderr, got {}", result.stderr ); }