Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 147 additions & 3 deletions crates/bashkit/src/fs/limits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
//! - **TM-DOS-013**: Long filenames → `max_filename_length`, `max_path_length`
//! - **TM-DOS-014**: Many directory entries → `max_file_count`
//! - **TM-DOS-015**: Unicode path attacks → `validate_path()` control char rejection
//! - **TM-UNI-003**: Zero-width chars in paths → `find_unsafe_path_char()` rejection
//! - **TM-UNI-011**: Tag-block chars in paths → `find_unsafe_path_char()` rejection
//! - **TM-UNI-012**: Interlinear annotations in paths → `find_unsafe_path_char()` rejection
//! - **TM-UNI-013**: Deprecated format chars in paths → `find_unsafe_path_char()` rejection

use std::fmt;
use std::path::Path;
Expand Down Expand Up @@ -343,12 +347,23 @@ impl FsLimits {
}

// THREAT[TM-DOS-015]: Unicode control chars and bidi overrides can cause path confusion
// Mitigation: Reject paths containing these characters
// THREAT[TM-UNI-003]: Zero-width chars create visually-identical filenames
// THREAT[TM-UNI-011]: Tag block chars (U+E0001-U+E007F) hide content invisibly
// THREAT[TM-UNI-012]: Interlinear annotations (U+FFF9-U+FFFB) hide text
// THREAT[TM-UNI-013]: Deprecated format chars (U+206A-U+206F) cause display confusion
// Mitigation: Reject path components containing any of these invisible/confusable chars
/// Check if a path component contains unsafe characters.
///
/// Returns `Some(description)` for the first unsafe character found.
/// Rejects: ASCII control chars (0x00-0x1F, 0x7F), C1 controls (0x80-0x9F),
/// and Unicode bidi override characters (U+202A-U+202E, U+2066-U+2069).
///
/// Rejects:
/// - ASCII control chars (0x00-0x1F, 0x7F)
/// - C1 control characters (U+0080-U+009F)
/// - Bidi override characters (U+202A-U+202E, U+2066-U+2069)
/// - Zero-width characters: U+200B-U+200D, U+2060, U+FEFF, U+180E
/// - Deprecated format characters (U+206A-U+206F)
/// - Interlinear annotation markers (U+FFF9-U+FFFB)
/// - Tag block (U+E0000-U+E007F)
fn find_unsafe_path_char(name: &str) -> Option<String> {
for ch in name.chars() {
// ASCII control characters (except we allow nothing - null is already
Expand All @@ -364,6 +379,28 @@ fn find_unsafe_path_char(name: &str) -> Option<String> {
if ('\u{202A}'..='\u{202E}').contains(&ch) || ('\u{2066}'..='\u{2069}').contains(&ch) {
return Some(format!("U+{:04X} (bidi override)", ch as u32));
}
// TM-UNI-003: Zero-width characters - invisible, create confusable names
// U+200B ZWSP, U+200C ZWNJ, U+200D ZWJ, U+2060 Word Joiner,
// U+FEFF BOM/Zero Width No-Break Space, U+180E Mongolian Vowel Separator
if matches!(
ch,
'\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{2060}' | '\u{FEFF}' | '\u{180E}'
) {
return Some(format!("U+{:04X} (zero-width)", ch as u32));
}
// TM-UNI-013: Deprecated format characters - display confusion
if ('\u{206A}'..='\u{206F}').contains(&ch) {
return Some(format!("U+{:04X} (deprecated format)", ch as u32));
}
// TM-UNI-012: Interlinear annotation markers - hide text
if ('\u{FFF9}'..='\u{FFFB}').contains(&ch) {
return Some(format!("U+{:04X} (interlinear annotation)", ch as u32));
}
// TM-UNI-011: Tag block - invisible chars (deprecated in Unicode 5.0)
// Range covers U+E0000 LANGUAGE TAG and U+E0001-U+E007F (TAG ASCII chars)
if ('\u{E0000}'..='\u{E007F}').contains(&ch) {
return Some(format!("U+{:04X} (tag char)", ch as u32));
}
}
None
}
Expand Down Expand Up @@ -661,4 +698,111 @@ mod tests {
assert!(limits.validate_path(Path::new("/tmp/café")).is_ok());
assert!(limits.validate_path(Path::new("/tmp/文件")).is_ok());
}

// TM-UNI-003: Zero-width characters in filenames must be rejected
#[test]
fn test_validate_path_zwsp_rejected() {
let limits = FsLimits::new();
let path = PathBuf::from("/tmp/file\u{200B}name.txt");
let err = limits.validate_path(&path).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("zero-width"), "got: {msg}");
assert!(msg.contains("U+200B"), "got: {msg}");
}

#[test]
fn test_validate_path_zwnj_rejected() {
let limits = FsLimits::new();
assert!(limits.validate_path(Path::new("/tmp/a\u{200C}b")).is_err());
}

#[test]
fn test_validate_path_zwj_rejected() {
let limits = FsLimits::new();
assert!(limits.validate_path(Path::new("/tmp/a\u{200D}b")).is_err());
}

#[test]
fn test_validate_path_word_joiner_rejected() {
let limits = FsLimits::new();
assert!(limits.validate_path(Path::new("/tmp/a\u{2060}b")).is_err());
}

#[test]
fn test_validate_path_bom_rejected() {
let limits = FsLimits::new();
assert!(
limits
.validate_path(Path::new("/tmp/\u{FEFF}file"))
.is_err()
);
}

#[test]
fn test_validate_path_mongolian_vowel_separator_rejected() {
let limits = FsLimits::new();
assert!(limits.validate_path(Path::new("/tmp/a\u{180E}b")).is_err());
}

// TM-UNI-013: Deprecated format characters (U+206A-U+206F) must be rejected
#[test]
fn test_validate_path_deprecated_format_rejected() {
let limits = FsLimits::new();
for ch in '\u{206A}'..='\u{206F}' {
let path = PathBuf::from(format!("/tmp/a{ch}b"));
let err = limits.validate_path(&path).unwrap_err();
assert!(
err.to_string().contains("deprecated format"),
"U+{:04X}: {}",
ch as u32,
err
);
}
}

// TM-UNI-012: Interlinear annotation markers (U+FFF9-U+FFFB) must be rejected
#[test]
fn test_validate_path_interlinear_annotation_rejected() {
let limits = FsLimits::new();
for ch in ['\u{FFF9}', '\u{FFFA}', '\u{FFFB}'] {
let path = PathBuf::from(format!("/tmp/a{ch}b"));
let err = limits.validate_path(&path).unwrap_err();
assert!(
err.to_string().contains("interlinear annotation"),
"U+{:04X}: {}",
ch as u32,
err
);
}
}

// TM-UNI-011: Tag block characters (U+E0000-U+E007F) must be rejected
#[test]
fn test_validate_path_tag_chars_rejected() {
let limits = FsLimits::new();
// Spot-check the boundary values and a TAG ASCII char
for ch in ['\u{E0000}', '\u{E0001}', '\u{E0041}', '\u{E007F}'] {
let path = PathBuf::from(format!("/tmp/a{ch}b"));
let err = limits.validate_path(&path).unwrap_err();
assert!(
err.to_string().contains("tag char"),
"U+{:04X}: {}",
ch as u32,
err
);
}
}

// Adjacent chars to the new ranges must NOT be over-blocked.
#[test]
fn test_validate_path_adjacent_chars_allowed() {
let limits = FsLimits::new();
// U+200A HAIR SPACE — visible whitespace, just below ZWSP
assert!(limits.validate_path(Path::new("/tmp/a\u{200A}b")).is_ok());
// U+200E LRM, U+200F RLM — bidi marks (not overrides)
assert!(limits.validate_path(Path::new("/tmp/a\u{200E}b")).is_ok());
assert!(limits.validate_path(Path::new("/tmp/a\u{200F}b")).is_ok());
// U+2070 SUPERSCRIPT ZERO — just past the deprecated-format range
assert!(limits.validate_path(Path::new("/tmp/a\u{2070}b")).is_ok());
}
}
129 changes: 63 additions & 66 deletions crates/bashkit/tests/unicode_security_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,59 +166,66 @@ mod byte_boundary_safety {
mod zero_width_chars {
use super::*;

/// TM-UNI-003: Zero-width space in filename — documents current behavior.
/// Currently UNMITIGATED: find_unsafe_path_char() does not reject ZWSP.
/// TM-UNI-003: Zero-width space in filename — MITIGATED.
/// `find_unsafe_path_char()` rejects ZWSP, so the visually-identical
/// confusable filename can never reach the filesystem.
#[tokio::test]
async fn unicode_zwsp_in_filename_current_behavior() {
async fn unicode_zwsp_in_filename_rejected() {
let fs = InMemoryFs::new();

// Zero Width Space (U+200B) in filename
let result = fs
let err = fs
.write_file(Path::new("/tmp/file\u{200B}name.txt"), b"data")
.await;
.await
.expect_err("ZWSP must be rejected");
let msg = err.to_string();
assert!(
msg.contains("U+200B") || msg.contains("zero-width") || msg.contains("unsafe"),
"got: {msg}"
);
}

// Currently this succeeds — documents the gap.
// When TM-UNI-003 is fixed, this should return an error.
if result.is_ok() {
// Gap confirmed: zero-width chars pass validation
// Also verify the file is distinguishable from "filename.txt"
let normal = fs
.write_file(Path::new("/tmp/filename.txt"), b"other")
.await;
assert!(normal.is_ok());
// Two distinct files exist with visually identical names
let content1 = fs
.read_file(Path::new("/tmp/file\u{200B}name.txt"))
.await
.unwrap();
let content2 = fs.read_file(Path::new("/tmp/filename.txt")).await.unwrap();
assert_ne!(
content1, content2,
"ZWSP creates distinct file (TM-UNI-003 gap)"
);
}
// If it fails, the mitigation has been implemented
/// TM-UNI-003: BOM (U+FEFF) in filename — MITIGATED.
#[tokio::test]
async fn unicode_bom_in_filename_rejected() {
let fs = InMemoryFs::new();
fs.write_file(Path::new("/tmp/\u{FEFF}file.txt"), b"data")
.await
.expect_err("BOM must be rejected");
}

/// TM-UNI-003: BOM (U+FEFF) in filename — documents current behavior
/// TM-UNI-003: ZWJ (U+200D) in filename — MITIGATED.
#[tokio::test]
async fn unicode_bom_in_filename_current_behavior() {
async fn unicode_zwj_in_filename_rejected() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/\u{FEFF}file.txt"), b"data")
.await;
// Documents whether BOM is caught or not
let _ = result;
fs.write_file(Path::new("/tmp/file\u{200D}name.txt"), b"data")
.await
.expect_err("ZWJ must be rejected");
}

/// TM-UNI-003: ZWJ (U+200D) in filename — documents current behavior
/// TM-UNI-011: Tag block char (U+E0041 = TAG LATIN A) — MITIGATED.
#[tokio::test]
async fn unicode_zwj_in_filename_current_behavior() {
async fn unicode_tag_char_in_filename_rejected() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/file\u{200D}name.txt"), b"data")
.await;
let _ = result;
fs.write_file(Path::new("/tmp/file\u{E0041}name.txt"), b"data")
.await
.expect_err("tag char must be rejected");
}

/// TM-UNI-012: Interlinear annotation marker (U+FFF9) — MITIGATED.
#[tokio::test]
async fn unicode_interlinear_annotation_in_filename_rejected() {
let fs = InMemoryFs::new();
fs.write_file(Path::new("/tmp/file\u{FFF9}name.txt"), b"data")
.await
.expect_err("interlinear annotation must be rejected");
}

/// TM-UNI-013: Deprecated format char (U+206C) — MITIGATED.
#[tokio::test]
async fn unicode_deprecated_format_in_filename_rejected() {
let fs = InMemoryFs::new();
fs.write_file(Path::new("/tmp/file\u{206C}name.txt"), b"data")
.await
.expect_err("deprecated format char must be rejected");
}

/// TM-UNI-004: Zero-width chars in variable names — pass-through is correct
Expand Down Expand Up @@ -391,41 +398,31 @@ mod combining_char_tests {
mod invisible_char_tests {
use super::*;

/// TM-UNI-011: Tag characters in filename — documents current behavior
/// TM-UNI-011: U+E0001 LANGUAGE TAG — MITIGATED.
#[tokio::test]
async fn unicode_tag_chars_in_filename_current_behavior() {
async fn unicode_tag_chars_in_filename_rejected() {
let fs = InMemoryFs::new();

// U+E0001 (Language Tag) — invisible, deprecated since Unicode 5.0
let result = fs
.write_file(Path::new("/tmp/file\u{E0001}name.txt"), b"data")
.await;
// Currently UNMITIGATED — documents the gap
let _ = result;
fs.write_file(Path::new("/tmp/file\u{E0001}name.txt"), b"data")
.await
.expect_err("U+E0001 LANGUAGE TAG must be rejected");
}

/// TM-UNI-012: Interlinear annotation chars in filename — documents current behavior
/// TM-UNI-012: U+FFF9 INTERLINEAR ANNOTATION ANCHOR — MITIGATED.
#[tokio::test]
async fn unicode_interlinear_annotation_in_filename() {
async fn unicode_interlinear_annotation_in_filename_rejected() {
let fs = InMemoryFs::new();

// U+FFF9 (Interlinear Annotation Anchor)
let result = fs
.write_file(Path::new("/tmp/file\u{FFF9}name.txt"), b"data")
.await;
let _ = result;
fs.write_file(Path::new("/tmp/file\u{FFF9}name.txt"), b"data")
.await
.expect_err("U+FFF9 INTERLINEAR ANNOTATION ANCHOR must be rejected");
}

/// TM-UNI-013: Deprecated format chars in filename — documents current behavior
/// TM-UNI-013: U+206A INHIBIT SYMMETRIC SWAPPING — MITIGATED.
#[tokio::test]
async fn unicode_deprecated_format_chars_in_filename() {
async fn unicode_deprecated_format_chars_in_filename_rejected() {
let fs = InMemoryFs::new();

// U+206A (Inhibit Symmetric Swapping) — deprecated
let result = fs
.write_file(Path::new("/tmp/file\u{206A}name.txt"), b"data")
.await;
let _ = result;
fs.write_file(Path::new("/tmp/file\u{206A}name.txt"), b"data")
.await
.expect_err("U+206A deprecated format must be rejected");
}
}

Expand Down
Loading
Loading