From 19fcf68834c800053abea1779b50c1612dcc68ac Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Wed, 15 Apr 2026 09:25:46 -0500 Subject: [PATCH] fix(file-saver): block symlink escapes on save --- crates/fetchkit/src/file_saver.rs | 128 ++++++++++++++++++++- crates/fetchkit/src/tool.rs | 7 +- crates/fetchkit/tests/file_saver_safety.rs | 51 ++++++++ specs/threat-model.md | 18 +-- 4 files changed, 184 insertions(+), 20 deletions(-) diff --git a/crates/fetchkit/src/file_saver.rs b/crates/fetchkit/src/file_saver.rs index b20484b..6ebc71f 100644 --- a/crates/fetchkit/src/file_saver.rs +++ b/crates/fetchkit/src/file_saver.rs @@ -90,6 +90,12 @@ impl LocalFileSaver { /// Resolve and validate a path, returning the normalized absolute path. fn resolve_path(&self, path: &str) -> Result { + if path.is_empty() { + return Err(FileSaveError::PathNotAllowed( + "Path must name a file".into(), + )); + } + let input = PathBuf::from(path); if let Some(base) = &self.base_dir { @@ -117,22 +123,134 @@ impl LocalFileSaver { Ok(normalize_path(&input)) } } + + async fn canonicalize_base_dir(&self, base: &Path) -> Result { + tokio::fs::create_dir_all(base).await?; + + let meta = tokio::fs::symlink_metadata(base).await?; + if meta.file_type().is_symlink() { + return Err(FileSaveError::PathNotAllowed( + "Base directory must not be a symlink".into(), + )); + } + if !meta.is_dir() { + return Err(FileSaveError::PathNotAllowed( + "Base directory must be a directory".into(), + )); + } + + Ok(tokio::fs::canonicalize(base).await?) + } + + async fn prepare_parent_dir(&self, resolved: &Path) -> Result { + let Some(base) = &self.base_dir else { + return Ok(resolved + .parent() + .ok_or_else(|| FileSaveError::PathNotAllowed("Path must name a file".into()))? + .to_path_buf()); + }; + + let normalized_base = normalize_path(base); + let relative = resolved + .strip_prefix(&normalized_base) + .map_err(|_| FileSaveError::PathNotAllowed("Path escapes base directory".into()))?; + let canonical_base = self.canonicalize_base_dir(base).await?; + let mut current = canonical_base.clone(); + + for component in relative + .parent() + .unwrap_or_else(|| Path::new("")) + .components() + { + let Component::Normal(name) = component else { + return Err(FileSaveError::PathNotAllowed(format!( + "Unsupported path component in save path: {}", + resolved.display() + ))); + }; + + let candidate = current.join(name); + let meta = match tokio::fs::symlink_metadata(&candidate).await { + Ok(meta) => meta, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + if let Err(create_err) = tokio::fs::create_dir(&candidate).await { + if create_err.kind() != std::io::ErrorKind::AlreadyExists { + return Err(create_err.into()); + } + } + tokio::fs::symlink_metadata(&candidate).await? + } + Err(err) => return Err(err.into()), + }; + + if meta.file_type().is_symlink() { + return Err(FileSaveError::PathNotAllowed(format!( + "Path traverses symlink: {}", + candidate.display() + ))); + } + if !meta.is_dir() { + return Err(FileSaveError::PathNotAllowed(format!( + "Parent path is not a directory: {}", + candidate.display() + ))); + } + + let canonical_candidate = tokio::fs::canonicalize(&candidate).await?; + if !canonical_candidate.starts_with(&canonical_base) { + return Err(FileSaveError::PathNotAllowed(format!( + "Path escapes base directory via symlink: {}", + candidate.display() + ))); + } + current = canonical_candidate; + } + + Ok(current) + } } #[async_trait] impl FileSaver for LocalFileSaver { async fn save(&self, path: &str, bytes: &[u8]) -> Result { let resolved = self.resolve_path(path)?; + if let Some(base_dir) = &self.base_dir { + if resolved == normalize_path(base_dir) { + return Err(FileSaveError::PathNotAllowed( + "Path must name a file".into(), + )); + } + } + let file_name = resolved + .file_name() + .ok_or_else(|| FileSaveError::PathNotAllowed("Path must name a file".into()))?; + let parent_dir = self.prepare_parent_dir(&resolved).await?; + let final_path = parent_dir.join(file_name); + + if self.base_dir.is_none() { + if let Some(parent) = final_path.parent() { + tokio::fs::create_dir_all(parent).await?; + } + } - // Create parent directories - if let Some(parent) = resolved.parent() { - tokio::fs::create_dir_all(parent).await?; + match tokio::fs::symlink_metadata(&final_path).await { + Ok(meta) if meta.file_type().is_symlink() => { + return Err(FileSaveError::PathNotAllowed(format!( + "Refusing to write through symlink: {}", + final_path.display() + ))); + } + Ok(_) => {} + Err(err) if err.kind() == std::io::ErrorKind::NotFound => {} + Err(err) => return Err(err.into()), } - tokio::fs::write(&resolved, bytes).await?; + // THREAT[TM-INPUT-008]: Validate and create the final path during save, + // so symlink checks happen at write time rather than in a separate preflight step. + tokio::fs::write(&final_path, bytes).await?; Ok(SaveResult { - path: resolved.to_string_lossy().to_string(), + path: final_path.to_string_lossy().to_string(), bytes_written: bytes.len() as u64, }) } diff --git a/crates/fetchkit/src/tool.rs b/crates/fetchkit/src/tool.rs index 76e2b1e..ca9b26b 100644 --- a/crates/fetchkit/src/tool.rs +++ b/crates/fetchkit/src/tool.rs @@ -532,18 +532,13 @@ impl Tool { req: FetchRequest, saver: Option<&dyn FileSaver>, ) -> Result { - if let Some(path) = &req.save_to_file { + if req.save_to_file.is_some() { if !self.enable_save_to_file { return Err(FetchError::SaverNotAvailable); } let saver = saver.ok_or(FetchError::SaverNotAvailable)?; - saver - .validate_path(path) - .await - .map_err(|e| FetchError::SaveError(e.to_string()))?; - let options = self.build_options(); let registry = FetcherRegistry::with_defaults(); registry.fetch_to_file(req, options, saver).await diff --git a/crates/fetchkit/tests/file_saver_safety.rs b/crates/fetchkit/tests/file_saver_safety.rs index 56aa6f4..1d6817c 100644 --- a/crates/fetchkit/tests/file_saver_safety.rs +++ b/crates/fetchkit/tests/file_saver_safety.rs @@ -23,6 +23,16 @@ fn saver_in(dir: &std::path::Path) -> LocalFileSaver { LocalFileSaver::new(Some(dir.to_path_buf())) } +#[cfg(unix)] +fn symlink_dir(src: &std::path::Path, dst: &std::path::Path) { + std::os::unix::fs::symlink(src, dst).unwrap(); +} + +#[cfg(windows)] +fn symlink_dir(src: &std::path::Path, dst: &std::path::Path) { + std::os::windows::fs::symlink_dir(src, dst).unwrap(); +} + // --------------------------------------------------------------------------- // Path traversal attacks // --------------------------------------------------------------------------- @@ -117,6 +127,47 @@ async fn test_no_base_dir_requires_absolute() { ); } +#[tokio::test] +async fn test_path_traversal_symlink_escape_rejected() { + use fetchkit::file_saver::FileSaver; + + let base = tempfile::tempdir().unwrap(); + let outside = tempfile::tempdir().unwrap(); + let saver = saver_in(base.path()); + + symlink_dir(outside.path(), &base.path().join("pivot")); + + let result = saver.save("pivot/escape.txt", b"pwned").await; + assert!(result.is_err(), "symlink escape should be rejected"); + assert!(!outside.path().join("escape.txt").exists()); +} + +#[tokio::test] +async fn test_execute_with_saver_rejects_symlink_escape() { + let base = tempfile::tempdir().unwrap(); + let outside = tempfile::tempdir().unwrap(); + let saver = saver_in(base.path()); + let tool = tool_with_save(); + + symlink_dir(outside.path(), &base.path().join("pivot")); + + let mock = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string("pwned")) + .mount(&mock) + .await; + + let req = FetchRequest::new(format!("{}/", mock.uri())).save_to_file("pivot/escape.txt"); + let result = tool.execute_with_saver(req, Some(&saver)).await; + + assert!( + result.is_err(), + "execute_with_saver should reject symlink escape" + ); + assert!(!outside.path().join("escape.txt").exists()); +} + // --------------------------------------------------------------------------- // Server errors and edge cases // --------------------------------------------------------------------------- diff --git a/specs/threat-model.md b/specs/threat-model.md index 216dc5b..3147147 100644 --- a/specs/threat-model.md +++ b/specs/threat-model.md @@ -218,7 +218,7 @@ expected outbound path. | TM-INPUT-005 | URL with fragment/query manipulation | Low | Fragments and queries are part of the URL; no special handling needed | **BY DESIGN** | | TM-INPUT-006 | Prefix bypass via URL authority (http://evil.com@127.0.0.1) | Medium | `url` crate parses authority correctly; resolve-then-check validates the actual host | MITIGATED | | TM-INPUT-007 | Block prefix matching is string-based, not URL-aware | Medium | URL-aware prefix matching compares parsed components (scheme, host, path) | MITIGATED | -| TM-INPUT-008 | Symlink-based path traversal in LocalFileSaver | Medium | Lexical path normalization; symlinks within base_dir can escape | **ACCEPTED** | +| TM-INPUT-008 | Symlink-based path traversal in LocalFileSaver | Medium | Save-time parent-directory walk rejects symlinks and re-checks canonical path under base_dir | MITIGATED | | TM-INPUT-009 | LocalFileSaver without base_dir allows arbitrary writes | Medium | Documented limitation; callers should always set base_dir in untrusted contexts | **ACCEPTED** | ### Mitigation Details @@ -249,13 +249,13 @@ then compares scheme, host (exact match), port, and path (segment-boundary matching). `http://internal.example.com` correctly does NOT match `http://internal.example.com.evil.com` since hosts differ after parsing. -**TM-INPUT-008 — Symlink-based path traversal (ACCEPTED):** -`LocalFileSaver` uses lexical normalization (not `canonicalize()`) to prevent `..` -traversal. If a symlink exists within `base_dir` pointing outside it, the lexical -check is bypassed. Accepted because: -- The base_dir is operator-controlled, not user-controlled -- In containerized deployments, the save directory is freshly created per session -- Adding `canonicalize()` introduces TOCTOU races and requires the path to exist +**TM-INPUT-008 — Symlink-based path traversal (MITIGATED):** +`LocalFileSaver` still performs lexical normalization to block `..` traversal, but +save-time enforcement now walks each parent directory component under `base_dir`, +rejects symlinks, canonicalizes each directory after creation/use, and verifies +the canonical path stays under the canonical base directory. `execute_with_saver()` +no longer performs a separate `validate_path()` preflight, so path checks now +happen at write time instead of in a validate-then-write split. **TM-INPUT-009 — No base_dir allows arbitrary writes (ACCEPTED):** `LocalFileSaver::new(None)` only requires absolute paths, with no directory restriction. @@ -430,7 +430,7 @@ None — all previously open threats have been mitigated. | New client per request | TM-NET | No connection pool state leakage | | Fetcher API URL hardcoding | TM-SSRF | Specialized fetchers (GitHub, Twitter) connect to hardcoded API hosts, not user-controlled URLs; DNS validation applied on initial connect | | Proxy env isolation | TM-NET | `reqwest::ClientBuilder::no_proxy()` by default | -| Path traversal prevention | TM-INPUT | Lexical path normalization in `LocalFileSaver` | +| Path traversal prevention | TM-INPUT | Lexical normalization plus save-time parent-directory symlink rejection in `LocalFileSaver` | | Save feature gating | TM-INPUT | `enable_save_to_file` disabled by default; schema gated | | Bot-auth feature gating | TM-AUTH | `bot-auth` Cargo feature disabled by default; no crypto deps unless opted in | | Signature nonce + timestamps | TM-AUTH | 32-byte random nonce + created/expires per signature prevents replay |