diff --git a/crates/forge_app/src/dto/anthropic/request.rs b/crates/forge_app/src/dto/anthropic/request.rs index 7527e5f3da..72aa4496ec 100644 --- a/crates/forge_app/src/dto/anthropic/request.rs +++ b/crates/forge_app/src/dto/anthropic/request.rs @@ -60,27 +60,41 @@ impl SystemMessage { } } -#[derive(Serialize, Default, Debug, PartialEq, Eq)] -pub struct Thinking { - pub r#type: ThinkingType, - pub budget_tokens: u64, +/// Anthropic's `thinking` request field. Opus 4.7 rejects the `Enabled` shape +/// and the orchestrator applies model-specific reasoning normalization before +/// request conversion. +#[derive(Serialize, Debug, PartialEq, Eq)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum Thinking { + Enabled { + budget_tokens: u64, + }, + Adaptive { + #[serde(skip_serializing_if = "Option::is_none")] + display: Option, + }, + Disabled, +} + +/// On Opus 4.7 adaptive thinking content is omitted from responses unless +/// `Summarized` is requested explicitly. +#[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum ThinkingDisplay { + Summarized, + Omitted, } -/// Effort level for Anthropic's `output_config` API. -/// -/// Only the variants officially supported by Anthropic's `output_config.effort` -/// field. Mutually exclusive with the `thinking` object. #[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum OutputEffort { Low, Medium, High, + XHigh, Max, } -/// Output configuration for newer Anthropic models that support effort-based -/// reasoning (e.g. `claude-opus-4-6`). Mutually exclusive with `thinking`. #[derive(Serialize, Debug, PartialEq, Eq)] pub struct OutputConfig { pub effort: OutputEffort, @@ -93,14 +107,6 @@ pub enum OutputFormat { JsonSchema { schema: schemars::Schema }, } -#[derive(Serialize, Default, Debug, Clone, Copy, PartialEq, Eq)] -#[serde(rename_all = "lowercase")] -pub enum ThinkingType { - #[default] - Enabled, - Disabled, -} - impl TryFrom for Request { type Error = anyhow::Error; fn try_from(request: forge_domain::Context) -> std::result::Result { @@ -119,55 +125,40 @@ impl TryFrom for Request { }) .collect::>(); - // Route reasoning config to the correct Anthropic serialization. - // All paths require enabled == Some(true); without it nothing is emitted. - // - // • enabled + max_tokens → thinking object (older models, e.g. - // claude-3-7-sonnet). An explicit reasoning budget unambiguously - // selects the extended-thinking API. effort (which may arrive from - // embedded defaults) is ignored in this branch. - // - // • enabled + effort, no max_tokens → output_config.effort (newer models, e.g. - // claude-opus-4-6). No token budget means the caller chose the effort-based - // API. - // - // • enabled only (no effort, no max_tokens) → thinking with a default budget. - let (thinking, output_config) = if let Some(reasoning) = request.reasoning { - if reasoning.enabled == Some(true) { - if let Some(budget) = reasoning.max_tokens { - // Explicit budget → thinking object regardless of effort. - ( - Some(Thinking { - r#type: ThinkingType::Enabled, - budget_tokens: budget as u64, - }), - None, - ) - } else if let Some(effort) = reasoning.effort { - // Effort without budget → newer output_config API. - let output_effort = match effort { - forge_domain::Effort::Low => OutputEffort::Low, - forge_domain::Effort::High => OutputEffort::High, - forge_domain::Effort::Max => OutputEffort::Max, - // Map unsupported variants to the nearest Anthropic-valid effort. - forge_domain::Effort::None | forge_domain::Effort::Minimal => { - OutputEffort::Low - } - forge_domain::Effort::Medium => OutputEffort::Medium, - forge_domain::Effort::XHigh => OutputEffort::Max, - }; - (None, Some(OutputConfig { effort: output_effort })) - } else { - // Enabled-only → thinking with default budget. - ( - Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 10000 }), - None, - ) - } + // Gate on the domain rule so inherited configs with `enabled: None` but + // a positive effort / `max_tokens` still emit reasoning on the wire. + let reasoning_on = request.is_reasoning_supported(); + let (thinking, output_config) = if reasoning_on && let Some(reasoning) = request.reasoning { + // Adaptive thinking on 4.7 hides reasoning content by default; opting + // into reasoning should surface it unless the caller set `exclude`. + let adaptive_display = if reasoning.exclude == Some(true) { + Some(ThinkingDisplay::Omitted) } else { - // enabled=false or enabled=None → no reasoning emitted. - (None, None) - } + Some(ThinkingDisplay::Summarized) + }; + + let thinking = if let Some(budget) = reasoning.max_tokens { + Thinking::Enabled { budget_tokens: budget as u64 } + } else { + Thinking::Adaptive { display: adaptive_display } + }; + + // `Effort::None` is an explicit opt-out; `is_reasoning_supported` + // already filters it, but guard here so it can never become a stray + // `output_config.effort`. + let output_config = reasoning.effort.and_then(|effort| { + let output_effort = match effort { + forge_domain::Effort::None => return None, + forge_domain::Effort::Minimal | forge_domain::Effort::Low => OutputEffort::Low, + forge_domain::Effort::Medium => OutputEffort::Medium, + forge_domain::Effort::High => OutputEffort::High, + forge_domain::Effort::XHigh => OutputEffort::XHigh, + forge_domain::Effort::Max => OutputEffort::Max, + }; + Some(OutputConfig { effort: output_effort }) + }); + + (Some(thinking), output_config) } else { (None, None) }; @@ -540,37 +531,49 @@ mod tests { use super::*; #[test] - fn test_thinking_type_serializes_to_enabled() { - let thinking_type = ThinkingType::Enabled; - let actual = serde_json::to_string(&thinking_type).unwrap(); - let expected = r#""enabled""#; + fn test_thinking_enabled_serializes_with_budget() { + let thinking = Thinking::Enabled { budget_tokens: 5000 }; + let actual = serde_json::to_value(&thinking).unwrap(); + let expected = serde_json::json!({ + "type": "enabled", + "budget_tokens": 5000 + }); assert_eq!(actual, expected); } #[test] - fn test_thinking_type_serializes_to_disabled() { - let thinking_type = ThinkingType::Disabled; - let actual = serde_json::to_string(&thinking_type).unwrap(); - let expected = r#""disabled""#; + fn test_thinking_adaptive_serializes_without_display_when_none() { + let thinking = Thinking::Adaptive { display: None }; + let actual = serde_json::to_value(&thinking).unwrap(); + let expected = serde_json::json!({"type": "adaptive"}); assert_eq!(actual, expected); } #[test] - fn test_thinking_struct_serializes_correctly() { - let thinking = Thinking { r#type: ThinkingType::Enabled, budget_tokens: 5000 }; + fn test_thinking_adaptive_serializes_with_summarized_display() { + let thinking = Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }; let actual = serde_json::to_value(&thinking).unwrap(); let expected = serde_json::json!({ - "type": "enabled", - "budget_tokens": 5000 + "type": "adaptive", + "display": "summarized" }); assert_eq!(actual, expected); } #[test] - fn test_reasoning_enabled_with_max_tokens_creates_thinking() { + fn test_thinking_disabled_serializes() { + let thinking = Thinking::Disabled; + let actual = serde_json::to_value(&thinking).unwrap(); + let expected = serde_json::json!({"type": "disabled"}); + + assert_eq!(actual, expected); + } + + #[test] + fn test_reasoning_enabled_with_max_tokens_creates_enabled_thinking() { let fixture = Context::default().reasoning(ReasoningConfig { enabled: Some(true), max_tokens: Some(8000), @@ -582,15 +585,14 @@ mod tests { assert_eq!( actual.thinking, - Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 8000 }) + Some(Thinking::Enabled { budget_tokens: 8000 }) ); assert_eq!(actual.output_config, None); } #[test] - fn test_reasoning_max_tokens_takes_priority_over_effort() { - // When both max_tokens and effort are set, max_tokens triggers the thinking - // path because an explicit budget means the caller wants the older API. + fn test_reasoning_max_tokens_and_effort_emit_both() { + // Effort and budget are independent knobs — neither should hide the other. let fixture = Context::default().reasoning(ReasoningConfig { effort: Some(forge_domain::Effort::Low), enabled: Some(true), @@ -602,14 +604,34 @@ mod tests { assert_eq!( actual.thinking, - Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 8000 }) + Some(Thinking::Enabled { budget_tokens: 8000 }) + ); + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::Low }) + ); + } + + #[test] + fn test_reasoning_max_tokens_alone_emits_enabled_only() { + let fixture = Context::default().reasoning(ReasoningConfig { + effort: None, + enabled: Some(true), + max_tokens: Some(8000), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.thinking, + Some(Thinking::Enabled { budget_tokens: 8000 }) ); assert_eq!(actual.output_config, None); } #[test] - fn test_reasoning_effort_without_budget_creates_output_config() { - // Effort with no max_tokens routes to output_config (newer model path). + fn test_reasoning_effort_without_budget_creates_output_config_and_adaptive() { let fixture = Context::default().reasoning(ReasoningConfig { effort: Some(forge_domain::Effort::Low), enabled: Some(true), @@ -623,11 +645,48 @@ mod tests { actual.output_config, Some(OutputConfig { effort: OutputEffort::Low }) ); - assert_eq!(actual.thinking, None); + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + } + + #[test] + fn test_reasoning_effort_with_exclude_emits_adaptive_omitted() { + let fixture = Context::default().reasoning(ReasoningConfig { + effort: Some(forge_domain::Effort::High), + enabled: Some(true), + max_tokens: None, + exclude: Some(true), + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) }) + ); } #[test] - fn test_reasoning_enabled_without_max_tokens_uses_default_budget() { + fn test_reasoning_xhigh_effort_maps_to_xhigh() { + let fixture = Context::default().reasoning(ReasoningConfig { + effort: Some(forge_domain::Effort::XHigh), + enabled: Some(true), + max_tokens: None, + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::XHigh }) + ); + } + + #[test] + fn test_reasoning_enabled_without_budget_or_effort_defaults_to_adaptive_summarized() { let fixture = Context::default().reasoning(ReasoningConfig { enabled: Some(true), max_tokens: None, @@ -639,7 +698,24 @@ mod tests { assert_eq!( actual.thinking, - Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 10000 }) + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + } + + #[test] + fn test_reasoning_enabled_with_exclude_uses_omitted_display() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: None, + exclude: Some(true), + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) }) ); } @@ -658,7 +734,10 @@ mod tests { } #[test] - fn test_reasoning_enabled_none_does_not_create_thinking() { + fn test_reasoning_enabled_none_with_max_tokens_still_emits_thinking() { + // Matches the domain's `is_reasoning_supported` rule: enabled: None with a + // positive budget counts as on, so inherited/merged configs don't silently + // disable reasoning on the wire. let fixture = Context::default().reasoning(ReasoningConfig { enabled: None, max_tokens: Some(8000), @@ -668,7 +747,94 @@ mod tests { let actual = Request::try_from(fixture).unwrap(); + assert_eq!( + actual.thinking, + Some(Thinking::Enabled { budget_tokens: 8000 }) + ); + } + + #[test] + fn test_reasoning_enabled_none_with_effort_still_emits_output_config() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: None, + max_tokens: None, + effort: Some(forge_domain::Effort::High), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::High }) + ); + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + } + + #[test] + fn test_reasoning_enabled_none_with_zero_max_tokens_does_not_emit() { + // Matches `is_reasoning_supported`: max_tokens > 0 is required. + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: None, + max_tokens: Some(0), + effort: None, + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + assert_eq!(actual.thinking, None); + assert_eq!(actual.output_config, None); + } + + #[test] + fn test_reasoning_effort_none_does_not_emit_anything() { + // Effort::None is an explicit opt-out — no thinking, no output_config. + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: None, + max_tokens: None, + effort: Some(forge_domain::Effort::None), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!(actual.thinking, None); + assert_eq!(actual.output_config, None); + } + + #[test] + fn test_reasoning_effort_none_overrides_enabled_and_max_tokens() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(forge_domain::Effort::None), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!(actual.thinking, None); + assert_eq!(actual.output_config, None); + } + + #[test] + fn test_reasoning_enabled_false_overrides_effort() { + // Explicit opt-out beats inferred enablement. + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(false), + max_tokens: None, + effort: Some(forge_domain::Effort::High), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!(actual.thinking, None); + assert_eq!(actual.output_config, None); } #[test] diff --git a/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs b/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs index cfff041f18..1c82876a1c 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs @@ -5,10 +5,10 @@ pub struct ReasoningTransform; impl Transformer for ReasoningTransform { type Value = Context; fn transform(&mut self, mut context: Self::Value) -> Self::Value { - if let Some(reasoning) = context.reasoning.as_ref() - && reasoning.enabled.unwrap_or(false) - { - // if reasoning is enabled then we've to drop top_k and top_p + // Must stay in lockstep with the Anthropic request builder, which gates + // on the same predicate — otherwise `thinking`/`output_config` ship + // alongside sampling params that Anthropic rejects. + if context.is_reasoning_supported() { context.top_k = None; context.top_p = None; } @@ -85,4 +85,51 @@ mod tests { assert_eq!(actual, expected); } + + #[test] + fn test_enabled_none_with_effort_still_strips_top_k_and_top_p() { + // `enabled: None` + effort is treated as reasoning-on (domain rule). + let fixture = create_context_fixture().reasoning(ReasoningConfig { + enabled: None, + max_tokens: None, + effort: Some(forge_domain::Effort::High), + exclude: None, + }); + let mut transformer = ReasoningTransform; + let actual = transformer.transform(fixture); + + assert_eq!(actual.top_k, None); + assert_eq!(actual.top_p, None); + } + + #[test] + fn test_enabled_none_with_positive_max_tokens_still_strips_top_k_and_top_p() { + let fixture = create_context_fixture().reasoning(ReasoningConfig { + enabled: None, + max_tokens: Some(8000), + effort: None, + exclude: None, + }); + let mut transformer = ReasoningTransform; + let actual = transformer.transform(fixture); + + assert_eq!(actual.top_k, None); + assert_eq!(actual.top_p, None); + } + + #[test] + fn test_enabled_none_with_zero_max_tokens_preserves_top_k_and_top_p() { + // Matches `is_reasoning_supported`: max_tokens == 0 is treated as off. + let fixture = create_context_fixture().reasoning(ReasoningConfig { + enabled: None, + max_tokens: Some(0), + effort: None, + exclude: None, + }); + let mut transformer = ReasoningTransform; + let actual = transformer.transform(fixture.clone()); + + assert_eq!(actual.top_k, fixture.top_k); + assert_eq!(actual.top_p, fixture.top_p); + } } diff --git a/crates/forge_app/src/orch.rs b/crates/forge_app/src/orch.rs index 86157c24e2..a81145ac80 100644 --- a/crates/forge_app/src/orch.rs +++ b/crates/forge_app/src/orch.rs @@ -11,6 +11,7 @@ use tokio::sync::Notify; use tracing::warn; use crate::agent::AgentService; +use crate::transformers::ModelSpecificReasoning; use crate::{EnvironmentInfra, TemplateEngine}; #[derive(Clone, Setters)] @@ -208,7 +209,12 @@ impl> Orc .pipe(DropReasoningDetails.when(|_| !reasoning_supported)) // Strip all reasoning from messages when the model has changed (signatures are // model-specific and invalid across models). No-op when model is unchanged. - .pipe(ReasoningNormalizer::new(model_id.clone())); + .pipe(ReasoningNormalizer::new(model_id.clone())) + // Normalize Anthropic reasoning knobs per model family before provider conversion. + .pipe( + ModelSpecificReasoning::new(model_id.as_str()) + .when(|_| model_id.as_str().to_lowercase().contains("claude")), + ); let response = self .services .chat_agent( diff --git a/crates/forge_app/src/transformers/mod.rs b/crates/forge_app/src/transformers/mod.rs index ef434b62dd..a8b84543ea 100644 --- a/crates/forge_app/src/transformers/mod.rs +++ b/crates/forge_app/src/transformers/mod.rs @@ -1,7 +1,9 @@ mod compaction; mod dedupe_role; mod drop_role; +mod model_specific_reasoning; mod strip_working_dir; mod trim_context_summary; pub use compaction::SummaryTransformer; +pub(crate) use model_specific_reasoning::ModelSpecificReasoning; diff --git a/crates/forge_app/src/transformers/model_specific_reasoning.rs b/crates/forge_app/src/transformers/model_specific_reasoning.rs new file mode 100644 index 0000000000..7c39c2228c --- /dev/null +++ b/crates/forge_app/src/transformers/model_specific_reasoning.rs @@ -0,0 +1,390 @@ +use forge_domain::{Context, Effort, ReasoningConfig, Transformer}; +use tracing::warn; + +/// Default budget applied when converting adaptive-style reasoning into legacy +/// budget-based reasoning for pre-4.6 Anthropic model families. +const DEFAULT_LEGACY_BUDGET_TOKENS: usize = 10000; + +#[derive(Debug, PartialEq, Eq)] +enum AnthropicModelFamily { + AdaptiveOnly, + AdaptiveFriendly, + LegacyWithEffort, + LegacyNoEffort, +} + +/// Normalizes reasoning knobs for Anthropic model families before provider +/// conversion. +pub(crate) struct ModelSpecificReasoning { + model_id: String, +} + +impl ModelSpecificReasoning { + /// Creates a model-specific reasoning normalizer for the given model id. + pub(crate) fn new(model_id: impl Into) -> Self { + Self { model_id: model_id.into() } + } + + fn family(&self) -> AnthropicModelFamily { + let id = self.model_id.to_lowercase(); + if id.contains("opus-4-7") { + AnthropicModelFamily::AdaptiveOnly + } else if id.contains("opus-4-6") || id.contains("sonnet-4-6") { + AnthropicModelFamily::AdaptiveFriendly + } else if id.contains("opus-4-5") { + AnthropicModelFamily::LegacyWithEffort + } else { + AnthropicModelFamily::LegacyNoEffort + } + } +} + +fn replace_xhigh_with_max(reasoning: &mut Option) { + if let Some(reasoning) = reasoning.as_mut() + && reasoning.effort == Some(Effort::XHigh) + { + reasoning.effort = Some(Effort::Max); + } +} + +fn clamp_effort_to_high(reasoning: &mut Option) { + if let Some(reasoning) = reasoning.as_mut() + && matches!(reasoning.effort, Some(Effort::XHigh | Effort::Max)) + { + reasoning.effort = Some(Effort::High); + } +} + +fn set_default_legacy_budget(reasoning: &mut Option) { + if let Some(reasoning) = reasoning.as_mut() + && reasoning.max_tokens.is_none() + { + reasoning.max_tokens = Some(DEFAULT_LEGACY_BUDGET_TOKENS); + } +} + +impl Transformer for ModelSpecificReasoning { + type Value = Context; + + fn transform(&mut self, mut context: Self::Value) -> Self::Value { + let reasoning_on = context.is_reasoning_supported(); + + match self.family() { + AnthropicModelFamily::AdaptiveOnly => { + if reasoning_on + && let Some(reasoning) = context.reasoning.as_mut() + && let Some(max_tokens) = reasoning.max_tokens.take() + { + warn!( + model = %self.model_id, + dropped_max_tokens = max_tokens, + "Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking budgets are unsupported. Use `reasoning.effort` to control thinking depth instead." + ); + } + context.temperature = None; + context.top_p = None; + context.top_k = None; + } + AnthropicModelFamily::AdaptiveFriendly => { + if reasoning_on { + replace_xhigh_with_max(&mut context.reasoning); + } + } + AnthropicModelFamily::LegacyWithEffort => { + if reasoning_on { + set_default_legacy_budget(&mut context.reasoning); + clamp_effort_to_high(&mut context.reasoning); + } + } + AnthropicModelFamily::LegacyNoEffort => { + if reasoning_on { + set_default_legacy_budget(&mut context.reasoning); + if let Some(reasoning) = context.reasoning.as_mut() + && reasoning.effort.is_some() + { + warn!( + model = %self.model_id, + "Dropping `reasoning.effort`: the effort parameter is only supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7." + ); + reasoning.effort = None; + } + } + } + } + + context + } +} + +#[cfg(test)] +mod tests { + use forge_domain::{Context, Effort, ReasoningConfig, Temperature, TopK, TopP, Transformer}; + use pretty_assertions::assert_eq; + + use super::*; + + fn fixture_context_with_sampling() -> Context { + Context::default() + .temperature(Temperature::new(0.5).unwrap()) + .top_p(TopP::new(0.9).unwrap()) + .top_k(TopK::new(40).unwrap()) + } + + #[test] + fn test_opus_4_7_drops_max_tokens_and_sampling_params() { + let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::XHigh), + exclude: Some(true), + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: Some(true), + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_7_strips_sampling_even_without_reasoning() { + let fixture = fixture_context_with_sampling(); + + let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture); + + let expected = Context::default(); + + assert_eq!(actual, expected); + } + + #[test] + fn test_adaptive_friendly_replaces_xhigh_with_max() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-6").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::Max), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_legacy_with_effort_backfills_budget_and_clamps_effort() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::Max), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(DEFAULT_LEGACY_BUDGET_TOKENS), + effort: Some(Effort::High), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_legacy_no_effort_backfills_budget_and_drops_effort() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::High), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-3-7-sonnet-20250219").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(DEFAULT_LEGACY_BUDGET_TOKENS), + effort: None, + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_7_bedrock_prefix_still_matches() { + // Bedrock region prefixes (`us.anthropic.claude-...`) must still be + // classified as AdaptiveOnly so sampling params are stripped and + // `max_tokens` is dropped. + let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::XHigh), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("us.anthropic.claude-opus-4-7").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_7_preserves_effort_when_dropping_max_tokens() { + // When both knobs are set on 4.7, only `max_tokens` should be dropped; + // `effort` is the remaining depth knob and must survive. + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::XHigh), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_5_clamps_max_to_high() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::Max), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::High), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_5_preserves_supported_effort_levels() { + for level in [Effort::Low, Effort::Medium, Effort::High] { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(level.clone()), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(level.clone()), + exclude: None, + }); + + assert_eq!(actual, expected, "effort level {:?}", level); + } + } + + #[test] + fn test_legacy_no_effort_drops_effort_for_all_pre_4_5_ids() { + // All pre-Opus-4.5 Claude ids (plus the newer non-effort family members + // Sonnet 4.5 and Haiku 4.5) should land in LegacyNoEffort and have their + // effort stripped. + for model in [ + "claude-sonnet-4-5-20250929", + "claude-haiku-4-5-20251001", + "claude-opus-4-1-20250805", + "claude-opus-4-20250514", + "claude-3-7-sonnet-20250219", + ] { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::High), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new(model).transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: None, + exclude: None, + }); + + assert_eq!(actual, expected, "model {}", model); + } + } + + #[test] + fn test_no_reasoning_is_preserved_everywhere() { + // A context without `reasoning` must pass through unchanged for every + // family except AdaptiveOnly, which still strips sampling params. + for model in [ + "claude-opus-4-6", + "claude-sonnet-4-6", + "claude-opus-4-5-20251101", + "claude-3-7-sonnet-20250219", + ] { + let fixture = Context::default(); + let actual = ModelSpecificReasoning::new(model).transform(fixture); + let expected = Context::default(); + assert_eq!(actual, expected, "model {}", model); + } + } + + #[test] + fn test_adaptive_friendly_preserves_non_xhigh_effort() { + for level in [Effort::Low, Effort::Medium, Effort::High, Effort::Max] { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(level.clone()), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-6").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(level.clone()), + exclude: None, + }); + + assert_eq!(actual, expected, "effort level {:?}", level); + } + } +} diff --git a/crates/forge_domain/src/context.rs b/crates/forge_domain/src/context.rs index 664000e1eb..13e6c3398b 100644 --- a/crates/forge_domain/src/context.rs +++ b/crates/forge_domain/src/context.rs @@ -640,6 +640,12 @@ impl Context { /// Checks if reasoning is enabled by user or not. pub fn is_reasoning_supported(&self) -> bool { self.reasoning.as_ref().is_some_and(|reasoning| { + // `Effort::None` is a strong opt-out that wins over `enabled` and + // `max_tokens`. + if matches!(reasoning.effort, Some(crate::Effort::None)) { + return false; + } + // When enabled parameter is defined then return it's value directly. if reasoning.enabled.is_some() { return reasoning.enabled.unwrap_or_default(); @@ -1193,6 +1199,37 @@ mod tests { ); } + #[test] + fn test_context_is_reasoning_not_supported_when_effort_is_none() { + // `Effort::None` is documented as "skips the thinking step entirely" and + // must act as an explicit opt-out regardless of other fields. + let fixture = Context::default().reasoning(crate::ReasoningConfig { + effort: Some(crate::Effort::None), + ..Default::default() + }); + + let actual = fixture.is_reasoning_supported(); + + assert!(!actual); + } + + #[test] + fn test_context_is_reasoning_not_supported_when_effort_none_overrides_enabled_true() { + let fixture = Context::default().reasoning(crate::ReasoningConfig { + enabled: Some(true), + effort: Some(crate::Effort::None), + max_tokens: Some(8000), + ..Default::default() + }); + + let actual = fixture.is_reasoning_supported(); + + assert!( + !actual, + "Effort::None must win over enabled: true and max_tokens" + ); + } + #[test] fn test_add_attachments_file_content_is_droppable() { let fixture_attachments = vec![Attachment { diff --git a/crates/forge_repo/src/provider/anthropic.rs b/crates/forge_repo/src/provider/anthropic.rs index 3292f5ab9f..236dc45b7e 100644 --- a/crates/forge_repo/src/provider/anthropic.rs +++ b/crates/forge_repo/src/provider/anthropic.rs @@ -34,7 +34,7 @@ impl Anthropic { Self { http, provider, anthropic_version: version, use_oauth } } - fn get_headers(&self) -> Vec<(String, String)> { + fn get_headers(&self, model: Option<&ModelId>) -> Vec<(String, String)> { let mut headers = vec![( "anthropic-version".to_string(), self.anthropic_version.clone(), @@ -66,25 +66,36 @@ impl Anthropic { // Add beta flags (not needed for Vertex AI) if self.provider.id != ProviderId::VERTEX_AI_ANTHROPIC { + let mut betas: Vec<&'static str> = Vec::new(); if self.use_oauth { - // OAuth requires multiple beta flags including structured outputs - headers.push(( - "anthropic-beta".to_string(), - "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,structured-outputs-2025-11-13".to_string(), - )); - } else { - // API key auth also needs beta flags for structured outputs and thinking - headers.push(( - "anthropic-beta".to_string(), - "interleaved-thinking-2025-05-14,structured-outputs-2025-11-13".to_string(), - )); + betas.push("claude-code-20250219"); + betas.push("oauth-2025-04-20"); + } + // Adaptive thinking auto-enables interleaved thinking on Opus 4.7, + // Opus 4.6, and Sonnet 4.6 — the beta header is redundant there per + // the Opus 4.7 migration guide. Keep it for older models so manual + // `extended-thinking` requests still get interleaved turns. + if interleaved_thinking_required(model) { + betas.push("interleaved-thinking-2025-05-14"); } + betas.push("structured-outputs-2025-11-13"); + headers.push(("anthropic-beta".to_string(), betas.join(","))); } headers } } +/// Returns false when the model auto-enables interleaved thinking through +/// adaptive thinking (Opus 4.7, Opus 4.6, Sonnet 4.6). When the model is +/// unknown (e.g., listing endpoints), the flag is included because it is +/// harmless on non-chat endpoints and necessary on older chat models. +fn interleaved_thinking_required(model: Option<&ModelId>) -> bool { + let Some(model) = model else { return true }; + let id = model.as_str().to_lowercase(); + !(id.contains("opus-4-7") || id.contains("opus-4-6") || id.contains("sonnet-4-6")) +} + impl Anthropic { /// Determines whether this provider should bypass reqwest-eventsource /// content-type validation and parse SSE from raw bytes instead. @@ -145,7 +156,7 @@ impl Anthropic { serde_json::to_vec(&request).with_context(|| "Failed to serialize request")?; let parsed_url = Url::parse(&url).with_context(|| format!("Invalid URL: {}", url))?; - let headers = create_headers(self.get_headers()); + let headers = create_headers(self.get_headers(Some(model))); if self.should_use_raw_sse() { return self.chat_raw_sse(&parsed_url, headers, json_bytes).await; @@ -236,7 +247,7 @@ impl Anthropic { let response = self .http - .http_get(url, Some(create_headers(self.get_headers()))) + .http_get(url, Some(create_headers(self.get_headers(None)))) .await .with_context(|| format_http_context(None, "GET", url)) .with_context(|| "Failed to fetch models")?; @@ -663,7 +674,7 @@ mod tests { false, // API key auth (not OAuth) ); - let actual = fixture.get_headers(); + let actual = fixture.get_headers(None); // Should contain anthropic-version header assert!( @@ -691,9 +702,12 @@ mod tests { beta_value.contains("structured-outputs-2025-11-13"), "Beta header should include structured-outputs flag" ); + // When the model is unknown (e.g., model listing), keep the + // interleaved-thinking header since it is harmless on non-chat + // endpoints and still required for older chat models. assert!( beta_value.contains("interleaved-thinking-2025-05-14"), - "Beta header should include interleaved-thinking flag" + "Beta header should include interleaved-thinking flag when model is unknown" ); } @@ -742,7 +756,7 @@ mod tests { true, // OAuth auth ); - let actual = fixture.get_headers(); + let actual = fixture.get_headers(None); // Should contain anthropic-version header assert!( @@ -776,6 +790,115 @@ mod tests { ); } + #[test] + fn test_get_headers_drops_interleaved_thinking_for_4_6_plus_models() { + // Adaptive thinking auto-enables interleaved thinking on Opus 4.7, + // Opus 4.6, and Sonnet 4.6; the beta header is redundant there. + let chat_url = Url::parse("https://api.anthropic.com/v1/messages").unwrap(); + let model_url = Url::parse("https://api.anthropic.com/v1/models").unwrap(); + + let provider = Provider { + id: forge_app::domain::ProviderId::ANTHROPIC, + provider_type: forge_domain::ProviderType::Llm, + response: Some(forge_app::domain::ProviderResponse::Anthropic), + url: chat_url, + credential: Some(forge_domain::AuthCredential { + id: forge_app::domain::ProviderId::ANTHROPIC, + auth_details: forge_domain::AuthDetails::ApiKey(forge_domain::ApiKey::from( + "sk-test-key".to_string(), + )), + url_params: std::collections::HashMap::new(), + }), + auth_methods: vec![forge_domain::AuthMethod::ApiKey], + url_params: vec![], + models: Some(forge_domain::ModelSource::Url(model_url)), + custom_headers: None, + }; + + let fixture = Anthropic::new( + Arc::new(MockHttpClient::new()), + provider, + "2023-06-01".to_string(), + false, + ); + + for model_id in [ + "claude-opus-4-7", + "claude-opus-4-6", + "claude-sonnet-4-6", + "us.anthropic.claude-opus-4-7", + "global.anthropic.claude-sonnet-4-6", + ] { + let model = ModelId::new(model_id); + let actual = fixture.get_headers(Some(&model)); + let (_, beta_value) = actual + .iter() + .find(|(k, _)| k == "anthropic-beta") + .expect("anthropic-beta header should be present"); + assert!( + !beta_value.contains("interleaved-thinking-2025-05-14"), + "Beta header should NOT include interleaved-thinking flag for {} (auto-enabled by adaptive thinking)", + model_id + ); + assert!( + beta_value.contains("structured-outputs-2025-11-13"), + "structured-outputs flag must still be present for {}", + model_id + ); + } + } + + #[test] + fn test_get_headers_keeps_interleaved_thinking_for_pre_4_6_models() { + let chat_url = Url::parse("https://api.anthropic.com/v1/messages").unwrap(); + let model_url = Url::parse("https://api.anthropic.com/v1/models").unwrap(); + + let provider = Provider { + id: forge_app::domain::ProviderId::ANTHROPIC, + provider_type: forge_domain::ProviderType::Llm, + response: Some(forge_app::domain::ProviderResponse::Anthropic), + url: chat_url, + credential: Some(forge_domain::AuthCredential { + id: forge_app::domain::ProviderId::ANTHROPIC, + auth_details: forge_domain::AuthDetails::ApiKey(forge_domain::ApiKey::from( + "sk-test-key".to_string(), + )), + url_params: std::collections::HashMap::new(), + }), + auth_methods: vec![forge_domain::AuthMethod::ApiKey], + url_params: vec![], + models: Some(forge_domain::ModelSource::Url(model_url)), + custom_headers: None, + }; + + let fixture = Anthropic::new( + Arc::new(MockHttpClient::new()), + provider, + "2023-06-01".to_string(), + false, + ); + + for model_id in [ + "claude-opus-4-5-20251101", + "claude-sonnet-4-5-20250929", + "claude-haiku-4-5-20251001", + "claude-opus-4-1-20250805", + "claude-3-7-sonnet-20250219", + ] { + let model = ModelId::new(model_id); + let actual = fixture.get_headers(Some(&model)); + let (_, beta_value) = actual + .iter() + .find(|(k, _)| k == "anthropic-beta") + .expect("anthropic-beta header should be present"); + assert!( + beta_value.contains("interleaved-thinking-2025-05-14"), + "Beta header should include interleaved-thinking flag for pre-4.6 model {}", + model_id + ); + } + } + #[test] fn test_vertex_ai_removes_output_format() { use forge_domain::ResponseFormat; diff --git a/crates/forge_repo/src/provider/bedrock.rs b/crates/forge_repo/src/provider/bedrock.rs index c5e9653167..1901044f77 100644 --- a/crates/forge_repo/src/provider/bedrock.rs +++ b/crates/forge_repo/src/provider/bedrock.rs @@ -439,6 +439,20 @@ impl FromDomain use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; use aws_sdk_bedrockruntime::types::{InferenceConfiguration, Message, SystemContentBlock}; + // Capture reasoning-related flags before `context.messages` / other fields + // are consumed below. `ModelSpecificReasoning` runs earlier in the pipeline + // and has already normalized `reasoning` per model family, so here we just + // branch on the shape it produced: + // - `max_tokens.is_some()` -> legacy `thinking.enabled` budget shape + // - otherwise -> `thinking.adaptive` (Opus 4.7 / 4.6 / Sonnet 4.6) + let reasoning_on = context.is_reasoning_supported(); + let emits_legacy_thinking = reasoning_on + && context + .reasoning + .as_ref() + .and_then(|r| r.max_tokens) + .is_some(); + // Convert system messages let system: Vec = context .messages @@ -522,18 +536,17 @@ impl FromDomain }; // Convert inference configuration - // When extended thinking is enabled, top_p must be >= 0.95 or unset - let has_thinking = context - .reasoning - .as_ref() - .and_then(|r| r.enabled) - .unwrap_or(false); - let adjusted_top_p = if has_thinking { - // If thinking is enabled and top_p is set, ensure it's at least 0.95 + // When `thinking.enabled` (legacy budget shape) is being emitted below, + // Anthropic-on-Bedrock requires `top_p >= 0.95` or unset. `thinking.adaptive` + // (Opus 4.7 / Opus 4.6 / Sonnet 4.6) has no such constraint, and + // `ModelSpecificReasoning` already strips `top_p` entirely for Opus 4.7. + let adjusted_top_p = if emits_legacy_thinking { + // If legacy thinking is emitted and top_p is set, ensure it's at least 0.95 context.top_p.map(|p| { let value = p.value(); if value < 0.95 { - forge_domain::TopP::new(0.95).unwrap() + // SAFETY: 0.95 is a valid TopP value (between 0.0 and 1.0) + forge_domain::TopP::new(0.95).expect("0.95 is valid TopP") } else { p } @@ -558,29 +571,59 @@ impl FromDomain None }; - // Convert reasoning configuration to additional model request fields - // For Claude models with extended thinking support - // Based on AWS Bedrock docs: additionalModelRequestFields for Claude extended - // thinking https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html + // Convert reasoning configuration to `additional_model_request_fields` + // for Anthropic-on-Bedrock. Two thinking shapes are emitted based on + // `reasoning.max_tokens`, which `ModelSpecificReasoning` has already + // normalized per family: + // + // - `max_tokens: Some(N)` → `{type: "enabled", budget_tokens: N}` (Opus 4.5 + // and older; budget is backfilled to 10k when absent.) + // - `max_tokens: None` → `{type: "adaptive", display: ...}` (Opus 4.7 + // rejects the legacy shape with 400; Opus 4.6 / Sonnet 4.6 accept adaptive + // natively.) + // + // When present, `reasoning.effort` is emitted as `output_config.effort` + // for families that support it (`ModelSpecificReasoning` drops effort + // on LegacyNoEffort, so the Option is already correctly shaped here). + // + // AWS Bedrock passes `additional_model_request_fields` through verbatim + // to Anthropic for Claude models. See + // https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html let additional_model_fields = if let Some(reasoning_config) = &context.reasoning { - if reasoning_config.enabled.unwrap_or(false) { + if !reasoning_on { + None + } else { let mut thinking_config = std::collections::HashMap::new(); - thinking_config.insert( - "type".to_string(), - aws_smithy_types::Document::String("enabled".to_string()), - ); - - // Set budget_tokens (REQUIRED when thinking is enabled) - // The budget_tokens parameter determines the maximum number of tokens - // Claude is allowed to use for its internal reasoning process - // Default to 4000 if not specified (AWS recommendation for good quality) - let budget_tokens = reasoning_config.max_tokens.unwrap_or(4000); - thinking_config.insert( - "budget_tokens".to_string(), - aws_smithy_types::Document::Number(aws_smithy_types::Number::PosInt( - budget_tokens as u64, - )), - ); + if let Some(budget) = reasoning_config.max_tokens { + thinking_config.insert( + "type".to_string(), + aws_smithy_types::Document::String("enabled".to_string()), + ); + thinking_config.insert( + "budget_tokens".to_string(), + aws_smithy_types::Document::Number(aws_smithy_types::Number::PosInt( + budget as u64, + )), + ); + } else { + thinking_config.insert( + "type".to_string(), + aws_smithy_types::Document::String("adaptive".to_string()), + ); + // Opus 4.7 changed the default to `omitted`; preserve the + // caller's `exclude` preference so `exclude: true` stays + // `omitted` and every other case surfaces `summarized` + // (matching the legacy pre-4.7 visible-thinking behavior). + let display = if reasoning_config.exclude == Some(true) { + "omitted" + } else { + "summarized" + }; + thinking_config.insert( + "display".to_string(), + aws_smithy_types::Document::String(display.to_string()), + ); + } let mut fields = std::collections::HashMap::new(); fields.insert( @@ -588,9 +631,29 @@ impl FromDomain aws_smithy_types::Document::Object(thinking_config), ); + if let Some(effort) = reasoning_config.effort.as_ref() { + let effort_str = match effort { + forge_domain::Effort::None => None, + forge_domain::Effort::Minimal | forge_domain::Effort::Low => Some("low"), + forge_domain::Effort::Medium => Some("medium"), + forge_domain::Effort::High => Some("high"), + forge_domain::Effort::XHigh => Some("xhigh"), + forge_domain::Effort::Max => Some("max"), + }; + if let Some(effort_str) = effort_str { + let mut output_config = std::collections::HashMap::new(); + output_config.insert( + "effort".to_string(), + aws_smithy_types::Document::String(effort_str.to_string()), + ); + fields.insert( + "output_config".to_string(), + aws_smithy_types::Document::Object(output_config), + ); + } + } + Some(aws_smithy_types::Document::Object(fields)) - } else { - None } } else { None @@ -1788,6 +1851,184 @@ mod tests { assert!(actual.additional_model_request_fields().is_some()); } + /// Opus 4.7 / Opus 4.6 / Sonnet 4.6 path: `ModelSpecificReasoning` strips + /// `max_tokens`, so Bedrock emits `thinking.adaptive` with the legacy + /// `display: summarized` default (visible thinking). + #[test] + fn test_from_domain_context_emits_adaptive_thinking_when_max_tokens_absent() { + use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; + use forge_domain::{Context, ReasoningConfig}; + + let fixture = Context { + conversation_id: None, + initiator: None, + messages: vec![], + tools: vec![], + tool_choice: None, + temperature: None, + top_p: None, + top_k: None, + max_tokens: None, + reasoning: Some(ReasoningConfig { + effort: None, + max_tokens: None, // normalized away by ModelSpecificReasoning for 4.7/4.6 + exclude: None, + enabled: Some(true), + }), + stream: None, + response_format: None, + }; + + let actual = ConverseStreamInput::from_domain(fixture).unwrap(); + let fields = actual + .additional_model_request_fields() + .expect("adaptive thinking should emit additional_model_request_fields"); + + let thinking = match fields { + aws_smithy_types::Document::Object(m) => m.get("thinking").expect("thinking present"), + _ => panic!("expected object"), + }; + let thinking_map = match thinking { + aws_smithy_types::Document::Object(m) => m, + _ => panic!("expected thinking object"), + }; + assert_eq!( + thinking_map.get("type"), + Some(&aws_smithy_types::Document::String("adaptive".to_string())) + ); + assert_eq!( + thinking_map.get("display"), + Some(&aws_smithy_types::Document::String( + "summarized".to_string() + )) + ); + assert!( + thinking_map.get("budget_tokens").is_none(), + "adaptive must not carry budget_tokens" + ); + } + + /// `exclude: true` preference maps to `display: omitted` on the adaptive + /// shape. + #[test] + fn test_from_domain_context_adaptive_thinking_respects_exclude() { + use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; + use forge_domain::{Context, ReasoningConfig}; + + let fixture = Context { + conversation_id: None, + initiator: None, + messages: vec![], + tools: vec![], + tool_choice: None, + temperature: None, + top_p: None, + top_k: None, + max_tokens: None, + reasoning: Some(ReasoningConfig { + effort: None, + max_tokens: None, + exclude: Some(true), + enabled: Some(true), + }), + stream: None, + response_format: None, + }; + + let actual = ConverseStreamInput::from_domain(fixture).unwrap(); + let fields = actual.additional_model_request_fields().unwrap(); + let thinking = match fields { + aws_smithy_types::Document::Object(m) => m.get("thinking").unwrap(), + _ => panic!("expected object"), + }; + let thinking_map = match thinking { + aws_smithy_types::Document::Object(m) => m, + _ => panic!("expected thinking object"), + }; + assert_eq!( + thinking_map.get("display"), + Some(&aws_smithy_types::Document::String("omitted".to_string())) + ); + } + + /// Adaptive thinking must NOT trigger the legacy `top_p >= 0.95` clamp — + /// that constraint only applies to `thinking.enabled` (budget shape). + #[test] + fn test_from_domain_context_adaptive_thinking_does_not_clamp_top_p() { + use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; + use forge_domain::{Context, ReasoningConfig, TopP}; + + let fixture = Context { + conversation_id: None, + initiator: None, + messages: vec![], + tools: vec![], + tool_choice: None, + temperature: None, + top_p: Some(TopP::new(0.5).unwrap()), + top_k: None, + max_tokens: None, + reasoning: Some(ReasoningConfig { + effort: None, + max_tokens: None, + exclude: None, + enabled: Some(true), + }), + stream: None, + response_format: None, + }; + + let actual = ConverseStreamInput::from_domain(fixture).unwrap(); + let top_p = actual.inference_config().unwrap().top_p().unwrap(); + assert!( + (top_p - 0.5).abs() < f32::EPSILON, + "adaptive thinking must leave top_p untouched, got {top_p}" + ); + } + + /// When `reasoning.effort` survives normalization (i.e. 4.5+/4.6+/4.7 + /// families), it must be emitted as `output_config.effort`. + #[test] + fn test_from_domain_context_emits_output_config_effort() { + use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; + use forge_domain::{Context, Effort, ReasoningConfig}; + + let fixture = Context { + conversation_id: None, + initiator: None, + messages: vec![], + tools: vec![], + tool_choice: None, + temperature: None, + top_p: None, + top_k: None, + max_tokens: None, + reasoning: Some(ReasoningConfig { + effort: Some(Effort::High), + max_tokens: None, + exclude: None, + enabled: Some(true), + }), + stream: None, + response_format: None, + }; + + let actual = ConverseStreamInput::from_domain(fixture).unwrap(); + let fields = actual.additional_model_request_fields().unwrap(); + let output_config = match fields { + aws_smithy_types::Document::Object(m) => m.get("output_config").unwrap(), + _ => panic!("expected object"), + }; + let output_map = match output_config { + aws_smithy_types::Document::Object(m) => m, + _ => panic!("expected output_config object"), + }; + assert_eq!( + output_map.get("effort"), + Some(&aws_smithy_types::Document::String("high".to_string())) + ); + } + #[test] fn test_json_value_to_document_empty_object() { let fixture = serde_json::json!({});