Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 123 additions & 5 deletions crates/fetchkit/src/file_saver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ impl LocalFileSaver {

/// Resolve and validate a path, returning the normalized absolute path.
fn resolve_path(&self, path: &str) -> Result<PathBuf, FileSaveError> {
if path.is_empty() {
return Err(FileSaveError::PathNotAllowed(
"Path must name a file".into(),
));
}

let input = PathBuf::from(path);

if let Some(base) = &self.base_dir {
Expand Down Expand Up @@ -117,22 +123,134 @@ impl LocalFileSaver {
Ok(normalize_path(&input))
}
}

async fn canonicalize_base_dir(&self, base: &Path) -> Result<PathBuf, FileSaveError> {
tokio::fs::create_dir_all(base).await?;

let meta = tokio::fs::symlink_metadata(base).await?;
if meta.file_type().is_symlink() {
return Err(FileSaveError::PathNotAllowed(
"Base directory must not be a symlink".into(),
));
}
if !meta.is_dir() {
return Err(FileSaveError::PathNotAllowed(
"Base directory must be a directory".into(),
));
}

Ok(tokio::fs::canonicalize(base).await?)
}

async fn prepare_parent_dir(&self, resolved: &Path) -> Result<PathBuf, FileSaveError> {
let Some(base) = &self.base_dir else {
return Ok(resolved
.parent()
.ok_or_else(|| FileSaveError::PathNotAllowed("Path must name a file".into()))?
.to_path_buf());
};

let normalized_base = normalize_path(base);
let relative = resolved
.strip_prefix(&normalized_base)
.map_err(|_| FileSaveError::PathNotAllowed("Path escapes base directory".into()))?;
let canonical_base = self.canonicalize_base_dir(base).await?;
let mut current = canonical_base.clone();

for component in relative
.parent()
.unwrap_or_else(|| Path::new(""))
.components()
{
let Component::Normal(name) = component else {
return Err(FileSaveError::PathNotAllowed(format!(
"Unsupported path component in save path: {}",
resolved.display()
)));
};

let candidate = current.join(name);
let meta = match tokio::fs::symlink_metadata(&candidate).await {
Ok(meta) => meta,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
if let Err(create_err) = tokio::fs::create_dir(&candidate).await {
if create_err.kind() != std::io::ErrorKind::AlreadyExists {
return Err(create_err.into());
}
}
tokio::fs::symlink_metadata(&candidate).await?
}
Err(err) => return Err(err.into()),
};

if meta.file_type().is_symlink() {
return Err(FileSaveError::PathNotAllowed(format!(
"Path traverses symlink: {}",
candidate.display()
)));
}
if !meta.is_dir() {
return Err(FileSaveError::PathNotAllowed(format!(
"Parent path is not a directory: {}",
candidate.display()
)));
}

let canonical_candidate = tokio::fs::canonicalize(&candidate).await?;
if !canonical_candidate.starts_with(&canonical_base) {
return Err(FileSaveError::PathNotAllowed(format!(
"Path escapes base directory via symlink: {}",
candidate.display()
)));
}
current = canonical_candidate;
}

Ok(current)
}
}

#[async_trait]
impl FileSaver for LocalFileSaver {
async fn save(&self, path: &str, bytes: &[u8]) -> Result<SaveResult, FileSaveError> {
let resolved = self.resolve_path(path)?;
if let Some(base_dir) = &self.base_dir {
if resolved == normalize_path(base_dir) {
return Err(FileSaveError::PathNotAllowed(
"Path must name a file".into(),
));
}
}
let file_name = resolved
.file_name()
.ok_or_else(|| FileSaveError::PathNotAllowed("Path must name a file".into()))?;
let parent_dir = self.prepare_parent_dir(&resolved).await?;
let final_path = parent_dir.join(file_name);

if self.base_dir.is_none() {
if let Some(parent) = final_path.parent() {
tokio::fs::create_dir_all(parent).await?;
}
}

// Create parent directories
if let Some(parent) = resolved.parent() {
tokio::fs::create_dir_all(parent).await?;
match tokio::fs::symlink_metadata(&final_path).await {
Ok(meta) if meta.file_type().is_symlink() => {
return Err(FileSaveError::PathNotAllowed(format!(
"Refusing to write through symlink: {}",
final_path.display()
)));
}
Ok(_) => {}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
Err(err) => return Err(err.into()),
}

tokio::fs::write(&resolved, bytes).await?;
// THREAT[TM-INPUT-008]: Validate and create the final path during save,
// so symlink checks happen at write time rather than in a separate preflight step.
tokio::fs::write(&final_path, bytes).await?;

Ok(SaveResult {
path: resolved.to_string_lossy().to_string(),
path: final_path.to_string_lossy().to_string(),
bytes_written: bytes.len() as u64,
})
}
Expand Down
7 changes: 1 addition & 6 deletions crates/fetchkit/src/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,18 +532,13 @@ impl Tool {
req: FetchRequest,
saver: Option<&dyn FileSaver>,
) -> Result<FetchResponse, FetchError> {
if let Some(path) = &req.save_to_file {
if req.save_to_file.is_some() {
if !self.enable_save_to_file {
return Err(FetchError::SaverNotAvailable);
}

let saver = saver.ok_or(FetchError::SaverNotAvailable)?;

saver
.validate_path(path)
.await
.map_err(|e| FetchError::SaveError(e.to_string()))?;

let options = self.build_options();
let registry = FetcherRegistry::with_defaults();
registry.fetch_to_file(req, options, saver).await
Expand Down
51 changes: 51 additions & 0 deletions crates/fetchkit/tests/file_saver_safety.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ fn saver_in(dir: &std::path::Path) -> LocalFileSaver {
LocalFileSaver::new(Some(dir.to_path_buf()))
}

#[cfg(unix)]
fn symlink_dir(src: &std::path::Path, dst: &std::path::Path) {
std::os::unix::fs::symlink(src, dst).unwrap();
}

#[cfg(windows)]
fn symlink_dir(src: &std::path::Path, dst: &std::path::Path) {
std::os::windows::fs::symlink_dir(src, dst).unwrap();
}

// ---------------------------------------------------------------------------
// Path traversal attacks
// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -117,6 +127,47 @@ async fn test_no_base_dir_requires_absolute() {
);
}

#[tokio::test]
async fn test_path_traversal_symlink_escape_rejected() {
use fetchkit::file_saver::FileSaver;

let base = tempfile::tempdir().unwrap();
let outside = tempfile::tempdir().unwrap();
let saver = saver_in(base.path());

symlink_dir(outside.path(), &base.path().join("pivot"));

let result = saver.save("pivot/escape.txt", b"pwned").await;
assert!(result.is_err(), "symlink escape should be rejected");
assert!(!outside.path().join("escape.txt").exists());
}

#[tokio::test]
async fn test_execute_with_saver_rejects_symlink_escape() {
let base = tempfile::tempdir().unwrap();
let outside = tempfile::tempdir().unwrap();
let saver = saver_in(base.path());
let tool = tool_with_save();

symlink_dir(outside.path(), &base.path().join("pivot"));

let mock = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/"))
.respond_with(ResponseTemplate::new(200).set_body_string("pwned"))
.mount(&mock)
.await;

let req = FetchRequest::new(format!("{}/", mock.uri())).save_to_file("pivot/escape.txt");
let result = tool.execute_with_saver(req, Some(&saver)).await;

assert!(
result.is_err(),
"execute_with_saver should reject symlink escape"
);
assert!(!outside.path().join("escape.txt").exists());
}

// ---------------------------------------------------------------------------
// Server errors and edge cases
// ---------------------------------------------------------------------------
Expand Down
18 changes: 9 additions & 9 deletions specs/threat-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ expected outbound path.
| TM-INPUT-005 | URL with fragment/query manipulation | Low | Fragments and queries are part of the URL; no special handling needed | **BY DESIGN** |
| TM-INPUT-006 | Prefix bypass via URL authority (http://evil.com@127.0.0.1) | Medium | `url` crate parses authority correctly; resolve-then-check validates the actual host | MITIGATED |
| TM-INPUT-007 | Block prefix matching is string-based, not URL-aware | Medium | URL-aware prefix matching compares parsed components (scheme, host, path) | MITIGATED |
| TM-INPUT-008 | Symlink-based path traversal in LocalFileSaver | Medium | Lexical path normalization; symlinks within base_dir can escape | **ACCEPTED** |
| TM-INPUT-008 | Symlink-based path traversal in LocalFileSaver | Medium | Save-time parent-directory walk rejects symlinks and re-checks canonical path under base_dir | MITIGATED |
| TM-INPUT-009 | LocalFileSaver without base_dir allows arbitrary writes | Medium | Documented limitation; callers should always set base_dir in untrusted contexts | **ACCEPTED** |

### Mitigation Details
Expand Down Expand Up @@ -249,13 +249,13 @@ then compares scheme, host (exact match), port, and path (segment-boundary
matching). `http://internal.example.com` correctly does NOT match
`http://internal.example.com.evil.com` since hosts differ after parsing.

**TM-INPUT-008 — Symlink-based path traversal (ACCEPTED):**
`LocalFileSaver` uses lexical normalization (not `canonicalize()`) to prevent `..`
traversal. If a symlink exists within `base_dir` pointing outside it, the lexical
check is bypassed. Accepted because:
- The base_dir is operator-controlled, not user-controlled
- In containerized deployments, the save directory is freshly created per session
- Adding `canonicalize()` introduces TOCTOU races and requires the path to exist
**TM-INPUT-008 — Symlink-based path traversal (MITIGATED):**
`LocalFileSaver` still performs lexical normalization to block `..` traversal, but
save-time enforcement now walks each parent directory component under `base_dir`,
rejects symlinks, canonicalizes each directory after creation/use, and verifies
the canonical path stays under the canonical base directory. `execute_with_saver()`
no longer performs a separate `validate_path()` preflight, so path checks now
happen at write time instead of in a validate-then-write split.

**TM-INPUT-009 — No base_dir allows arbitrary writes (ACCEPTED):**
`LocalFileSaver::new(None)` only requires absolute paths, with no directory restriction.
Expand Down Expand Up @@ -430,7 +430,7 @@ None — all previously open threats have been mitigated.
| New client per request | TM-NET | No connection pool state leakage |
| Fetcher API URL hardcoding | TM-SSRF | Specialized fetchers (GitHub, Twitter) connect to hardcoded API hosts, not user-controlled URLs; DNS validation applied on initial connect |
| Proxy env isolation | TM-NET | `reqwest::ClientBuilder::no_proxy()` by default |
| Path traversal prevention | TM-INPUT | Lexical path normalization in `LocalFileSaver` |
| Path traversal prevention | TM-INPUT | Lexical normalization plus save-time parent-directory symlink rejection in `LocalFileSaver` |
| Save feature gating | TM-INPUT | `enable_save_to_file` disabled by default; schema gated |
| Bot-auth feature gating | TM-AUTH | `bot-auth` Cargo feature disabled by default; no crypto deps unless opted in |
| Signature nonce + timestamps | TM-AUTH | 32-byte random nonce + created/expires per signature prevents replay |
Expand Down
Loading