Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions crates/fetchkit/src/fetchers/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@ const BINARY_PREFIXES: &[&str] = &[
const FIRST_BYTE_TIMEOUT: Duration = Duration::from_secs(1);

// THREAT[TM-DOS-002]: Body timeout caps total request duration
const BODY_TIMEOUT: Duration = Duration::from_secs(30);
pub(crate) const BODY_TIMEOUT: Duration = Duration::from_secs(30);

/// Truncation message appended when body is cut short (timeout or size limit)
const TRUNCATION_MESSAGE: &str = "\n\n[..content truncated...]";
pub(crate) const TRUNCATION_MESSAGE: &str = "\n\n[..content truncated...]";

// THREAT[TM-SSRF-010]: Maximum redirects to follow with IP validation at each hop
const MAX_REDIRECTS: usize = 10;

// THREAT[TM-DOS-001]: Default max body size (10 MB) to prevent memory exhaustion
// THREAT[TM-DOS-003]: Also protects against compressed content bombs (gzip bombs)
const DEFAULT_MAX_BODY_SIZE: usize = 10 * 1024 * 1024;
pub(crate) const DEFAULT_MAX_BODY_SIZE: usize = 10 * 1024 * 1024;

/// Default HTTP fetcher
///
Expand Down Expand Up @@ -640,7 +640,7 @@ fn parse_content_disposition_filename(value: &str) -> Option<String> {
/// due to timeout or exceeding `max_size`.
// THREAT[TM-DOS-001]: Configurable max body size prevents unbounded memory usage
// THREAT[TM-DOS-003]: Decompressed size is checked, catching gzip/brotli bombs
async fn read_body_with_timeout(
pub(crate) async fn read_body_with_timeout(
response: reqwest::Response,
timeout: Duration,
max_size: usize,
Expand Down
28 changes: 21 additions & 7 deletions crates/fetchkit/src/fetchers/docs_site.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@

use crate::client::FetchOptions;
use crate::error::FetchError;
use crate::fetchers::default::{apply_bot_auth_if_enabled, send_request_following_redirects};
use crate::fetchers::default::{
apply_bot_auth_if_enabled, read_body_with_timeout, send_request_following_redirects,
BODY_TIMEOUT, DEFAULT_MAX_BODY_SIZE, TRUNCATION_MESSAGE,
};
use crate::fetchers::Fetcher;
use crate::types::{FetchRequest, FetchResponse};
use crate::DEFAULT_USER_AGENT;
Expand Down Expand Up @@ -219,6 +222,11 @@ async fn fetch_llms_txt_direct(

let status_code = response.status().as_u16();
let final_url = response.url().to_string();
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());

if !response.status().is_success() {
return Ok(FetchResponse {
Expand All @@ -230,17 +238,23 @@ async fn fetch_llms_txt_direct(
});
}

let body = response
.text()
.await
.map_err(|e| FetchError::RequestError(e.to_string()))?;
let max_body_size = options.max_body_size.unwrap_or(DEFAULT_MAX_BODY_SIZE);
let (body, truncated) = read_body_with_timeout(response, BODY_TIMEOUT, max_body_size).await;
let size = body.len() as u64;
let mut content = String::from_utf8_lossy(&body).to_string();

if truncated {
content.push_str(TRUNCATION_MESSAGE);
}

Ok(FetchResponse {
url: final_url,
status_code: 200,
content_type: Some("text/plain".to_string()),
content_type,
format: Some("documentation".to_string()),
content: Some(body),
content: Some(content),
size: Some(size),
truncated: if truncated { Some(true) } else { None },
redirect_chain,
..Default::default()
})
Expand Down
27 changes: 27 additions & 0 deletions crates/fetchkit/tests/ssrf_security.rs
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,33 @@ async fn test_dos_001_body_within_limit_not_truncated() {
assert!(resp.content.unwrap().contains("small body"));
}

#[tokio::test]
async fn test_dos_001_direct_llms_txt_honors_body_size_limit() {
let mock_server = MockServer::start().await;
let large_body = "x".repeat(2000);

Mock::given(method("GET"))
.and(path("/llms.txt"))
.respond_with(
ResponseTemplate::new(200)
.set_body_string(&large_body)
.insert_header("content-type", "text/plain"),
)
.mount(&mock_server)
.await;

let tool = Tool::builder()
.block_private_ips(false)
.max_body_size(1000)
.build();
let req = FetchRequest::new(format!("{}/llms.txt", mock_server.uri()));
let resp = tool.execute(req).await.unwrap();

assert_eq!(resp.truncated, Some(true));
assert!(resp.size.unwrap() <= 1000);
assert!(resp.content.unwrap().contains("[..content truncated...]"));
}

// ============================================================================
// TM-INPUT-007: URL-aware prefix matching
// ============================================================================
Expand Down
Loading