diff --git a/crates/forge_app/src/fmt/fmt_input.rs b/crates/forge_app/src/fmt/fmt_input.rs index edc463f3ef..fc005d38b3 100644 --- a/crates/forge_app/src/fmt/fmt_input.rs +++ b/crates/forge_app/src/fmt/fmt_input.rs @@ -116,6 +116,11 @@ impl FormatContent for ToolCatalog { ToolCatalog::Fetch(input) => { Some(TitleFormat::debug("GET").sub_title(&input.url).into()) } + ToolCatalog::Websearch(input) => Some( + TitleFormat::debug("Web Search") + .sub_title(&input.query) + .into(), + ), ToolCatalog::Followup(input) => Some( TitleFormat::debug("Follow-up") .sub_title(&input.question) diff --git a/crates/forge_app/src/fmt/fmt_output.rs b/crates/forge_app/src/fmt/fmt_output.rs index 63866c3169..3aedc02d1c 100644 --- a/crates/forge_app/src/fmt/fmt_output.rs +++ b/crates/forge_app/src/fmt/fmt_output.rs @@ -50,6 +50,7 @@ impl FormatContent for ToolOperation { | ToolOperation::CodebaseSearch { output: _ } | ToolOperation::FsUndo { input: _, output: _ } | ToolOperation::NetFetch { input: _, output: _ } + | ToolOperation::WebSearch { input: _, output: _ } | ToolOperation::Shell { output: _ } | ToolOperation::FollowUp { output: _ } | ToolOperation::Skill { output: _ } => None, diff --git a/crates/forge_app/src/operation.rs b/crates/forge_app/src/operation.rs index 78c8cc42bb..046b398162 100644 --- a/crates/forge_app/src/operation.rs +++ b/crates/forge_app/src/operation.rs @@ -8,7 +8,7 @@ use forge_config::ForgeConfig; use forge_display::DiffFormat; use forge_domain::{ CodebaseSearchResults, Environment, FSMultiPatch, FSPatch, FSRead, FSRemove, FSSearch, FSUndo, - FSWrite, FileOperation, LineNumbers, Metrics, NetFetch, PlanCreate, ToolKind, + FSWrite, FileOperation, LineNumbers, Metrics, NetFetch, PlanCreate, ToolKind, WebSearch, }; use forge_template::Element; @@ -19,7 +19,7 @@ use crate::truncation::{ use crate::utils::{compute_hash, format_display_path}; use crate::{ FsRemoveOutput, FsUndoOutput, FsWriteOutput, HttpResponse, PatchOutput, PlanCreateOutput, - ReadOutput, ResponseContext, SearchResult, ShellOutput, + ReadOutput, ResponseContext, SearchResult, ShellOutput, WebSearchResponse, }; #[derive(Debug, Default, Setters)] @@ -66,6 +66,10 @@ pub enum ToolOperation { input: NetFetch, output: HttpResponse, }, + WebSearch { + input: WebSearch, + output: WebSearchResponse, + }, Shell { output: ShellOutput, }, @@ -584,6 +588,73 @@ impl ToolOperation { forge_domain::ToolOutput::text(elm) } + ToolOperation::WebSearch { input, output } => { + let mut elm = Element::new("web_search_results") + .attr("query", &input.query) + .attr("engine", &output.engine) + .attr("result_count", output.organic_results.len()); + + elm = elm.attr_if_some("search_id", output.search_id.as_ref()); + + if let Some(answer_box) = &output.answer_box { + let mut answer_elm = Element::new("answer_box"); + answer_elm = answer_elm.attr_if_some("title", answer_box.title.as_ref()); + answer_elm = answer_elm.attr_if_some("link", answer_box.link.as_ref()); + answer_elm = answer_elm.attr_if_some("answer", answer_box.answer.as_ref()); + answer_elm = answer_elm.attr_if_some("snippet", answer_box.snippet.as_ref()); + elm = elm.append(answer_elm); + } + + if let Some(knowledge_graph) = &output.knowledge_graph { + let mut graph_elm = Element::new("knowledge_graph"); + graph_elm = graph_elm.attr_if_some("title", knowledge_graph.title.as_ref()); + graph_elm = + graph_elm.attr_if_some("type", knowledge_graph.entity_type.as_ref()); + graph_elm = + graph_elm.attr_if_some("website", knowledge_graph.website.as_ref()); + graph_elm = graph_elm.attr_if_some( + "description", + knowledge_graph.description.as_ref(), + ); + elm = elm.append(graph_elm); + } + + for result in &output.organic_results { + let mut result_elm = Element::new("organic_result") + .attr("title", &result.title) + .attr("link", &result.link); + let position = result.position.map(|value| value.to_string()); + result_elm = result_elm.attr_if_some("position", position.as_ref()); + result_elm = + result_elm.attr_if_some("displayed_link", result.displayed_link.as_ref()); + result_elm = result_elm.attr_if_some("source", result.source.as_ref()); + result_elm = result_elm.attr_if_some("snippet", result.snippet.as_ref()); + elm = elm.append(result_elm); + } + + for question in &output.related_questions { + let mut question_elm = + Element::new("related_question").attr("question", &question.question); + question_elm = + question_elm.attr_if_some("snippet", question.snippet.as_ref()); + elm = elm.append(question_elm); + } + + for query in &output.related_searches { + elm = elm.append(Element::new("related_search").attr("query", query)); + } + + for story in &output.top_stories { + let mut story_elm = Element::new("top_story").attr("title", &story.title); + story_elm = story_elm.attr_if_some("link", story.link.as_ref()); + story_elm = story_elm.attr_if_some("source", story.source.as_ref()); + story_elm = story_elm.attr_if_some("date", story.date.as_ref()); + story_elm = story_elm.attr_if_some("snippet", story.snippet.as_ref()); + elm = elm.append(story_elm); + } + + forge_domain::ToolOutput::text(elm) + } ToolOperation::Shell { output } => { let mut parent_elem = Element::new("shell_output") .attr("command", &output.output.command) @@ -2378,6 +2449,103 @@ mod tests { insta::assert_snapshot!(to_value(actual)); } + #[test] + fn test_web_search_success() { + let fixture = ToolOperation::WebSearch { + input: forge_domain::WebSearch::default() + .query("saturn facts") + .mode(forge_domain::WebSearchMode::Standard), + output: crate::WebSearchResponse { + query: "saturn facts".to_string(), + engine: "google".to_string(), + search_id: Some("search-123".to_string()), + answer_box: Some(crate::WebSearchAnswerBox { + title: Some("Saturn".to_string()), + answer: Some("A gas giant planet".to_string()), + snippet: None, + link: Some("https://example.com/saturn".to_string()), + }), + knowledge_graph: Some(crate::WebSearchKnowledgeGraph { + title: Some("Saturn".to_string()), + entity_type: Some("Planet".to_string()), + description: Some("The sixth planet from the Sun.".to_string()), + website: Some("https://science.nasa.gov/saturn/".to_string()), + }), + organic_results: vec![crate::WebSearchOrganicResult { + position: Some(1), + title: "Saturn Facts".to_string(), + link: "https://science.nasa.gov/saturn/facts/".to_string(), + displayed_link: Some("science.nasa.gov › saturn › facts".to_string()), + source: Some("NASA".to_string()), + snippet: Some("Saturn facts and figures.".to_string()), + }], + related_questions: vec![crate::WebSearchRelatedQuestion { + question: "What is Saturn made of?".to_string(), + snippet: Some("Mostly hydrogen and helium.".to_string()), + }], + related_searches: vec!["saturn rings".to_string()], + top_stories: vec![crate::WebSearchTopStory { + title: "New Saturn mission announced".to_string(), + link: Some("https://example.com/story".to_string()), + source: Some("Space News".to_string()), + date: Some("1 day ago".to_string()), + snippet: Some("A new mission could launch soon.".to_string()), + }], + }, + }; + + let env = fixture_environment(); + let config = fixture_config(); + + let actual = fixture.into_tool_output( + ToolKind::Websearch, + TempContentFiles::default(), + &env, + &config, + &mut Metrics::default(), + ); + + insta::assert_snapshot!(to_value(actual)); + } + + #[test] + fn test_web_search_light_minimal_output() { + let fixture = ToolOperation::WebSearch { + input: forge_domain::WebSearch::default().query("coffee"), + output: crate::WebSearchResponse { + query: "coffee".to_string(), + engine: "google_light".to_string(), + search_id: None, + answer_box: None, + knowledge_graph: None, + organic_results: vec![crate::WebSearchOrganicResult { + position: Some(1), + title: "Coffee - Wikipedia".to_string(), + link: "https://en.wikipedia.org/wiki/Coffee".to_string(), + displayed_link: Some("en.wikipedia.org › wiki › Coffee".to_string()), + source: None, + snippet: Some("Coffee is a brewed drink.".to_string()), + }], + related_questions: vec![], + related_searches: vec![], + top_stories: vec![], + }, + }; + + let env = fixture_environment(); + let config = fixture_config(); + + let actual = fixture.into_tool_output( + ToolKind::Websearch, + TempContentFiles::default(), + &env, + &config, + &mut Metrics::default(), + ); + + insta::assert_snapshot!(to_value(actual)); + } + #[test] fn test_shell_success() { let fixture = ToolOperation::Shell { diff --git a/crates/forge_app/src/services.rs b/crates/forge_app/src/services.rs index 59f88f3be7..f56e49e7c3 100644 --- a/crates/forge_app/src/services.rs +++ b/crates/forge_app/src/services.rs @@ -108,6 +108,60 @@ pub struct HttpResponse { pub content_type: String, } +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct WebSearchResponse { + pub query: String, + pub engine: String, + pub search_id: Option, + pub answer_box: Option, + pub knowledge_graph: Option, + pub organic_results: Vec, + pub related_questions: Vec, + pub related_searches: Vec, + pub top_stories: Vec, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct WebSearchAnswerBox { + pub title: Option, + pub answer: Option, + pub snippet: Option, + pub link: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct WebSearchKnowledgeGraph { + pub title: Option, + pub entity_type: Option, + pub description: Option, + pub website: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct WebSearchOrganicResult { + pub position: Option, + pub title: String, + pub link: String, + pub displayed_link: Option, + pub source: Option, + pub snippet: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct WebSearchRelatedQuestion { + pub question: String, + pub snippet: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct WebSearchTopStory { + pub title: String, + pub link: Option, + pub source: Option, + pub date: Option, + pub snippet: Option, +} + #[derive(Debug)] pub enum ResponseContext { Parsed, @@ -204,6 +258,7 @@ pub trait AppConfigService: Send + Sync { /// all configuration changes; use [`forge_domain::ConfigOperation`] /// variants to describe each mutation. async fn update_config(&self, ops: Vec) -> anyhow::Result<()>; + } #[async_trait::async_trait] @@ -428,6 +483,15 @@ pub trait NetFetchService: Send + Sync { async fn fetch(&self, url: String, raw: Option) -> anyhow::Result; } +#[async_trait::async_trait] +pub trait WebSearchService: Send + Sync { + /// Searches the public web and returns a normalized structured result set. + async fn web_search( + &self, + params: forge_domain::WebSearch, + ) -> anyhow::Result; +} + #[async_trait::async_trait] pub trait ShellService: Send + Sync { /// Executes a shell command and returns the output. @@ -550,6 +614,7 @@ pub trait Services: Send + Sync + 'static + Clone + EnvironmentInfra { type FollowUpService: FollowUpService; type FsUndoService: FsUndoService; type NetFetchService: NetFetchService; + type WebSearchService: WebSearchService; type ShellService: ShellService; type McpService: McpService; type AuthService: AuthService; @@ -577,6 +642,7 @@ pub trait Services: Send + Sync + 'static + Clone + EnvironmentInfra { fn follow_up_service(&self) -> &Self::FollowUpService; fn fs_undo_service(&self) -> &Self::FsUndoService; fn net_fetch_service(&self) -> &Self::NetFetchService; + fn web_search_service(&self) -> &Self::WebSearchService; fn shell_service(&self) -> &Self::ShellService; fn mcp_service(&self) -> &Self::McpService; fn custom_instructions_service(&self) -> &Self::CustomInstructionsService; @@ -845,6 +911,16 @@ impl NetFetchService for I { } } +#[async_trait::async_trait] +impl WebSearchService for I { + async fn web_search( + &self, + params: forge_domain::WebSearch, + ) -> anyhow::Result { + self.web_search_service().web_search(params).await + } +} + #[async_trait::async_trait] impl ShellService for I { async fn execute( @@ -965,6 +1041,7 @@ impl AppConfigService for I { async fn update_config(&self, ops: Vec) -> anyhow::Result<()> { self.config_service().update_config(ops).await } + } #[async_trait::async_trait] diff --git a/crates/forge_app/src/snapshots/forge_app__operation__tests__web_search_light_minimal_output.snap b/crates/forge_app/src/snapshots/forge_app__operation__tests__web_search_light_minimal_output.snap new file mode 100644 index 0000000000..0f1696c8cb --- /dev/null +++ b/crates/forge_app/src/snapshots/forge_app__operation__tests__web_search_light_minimal_output.snap @@ -0,0 +1,18 @@ +--- +source: crates/forge_app/src/operation.rs +expression: to_value(actual) +--- + + + + diff --git a/crates/forge_app/src/snapshots/forge_app__operation__tests__web_search_success.snap b/crates/forge_app/src/snapshots/forge_app__operation__tests__web_search_success.snap new file mode 100644 index 0000000000..f970409e80 --- /dev/null +++ b/crates/forge_app/src/snapshots/forge_app__operation__tests__web_search_success.snap @@ -0,0 +1,50 @@ +--- +source: crates/forge_app/src/operation.rs +expression: to_value(actual) +--- + + + + + + + + + + + + + + diff --git a/crates/forge_app/src/tool_executor.rs b/crates/forge_app/src/tool_executor.rs index fee0c2dcec..05fe63782f 100644 --- a/crates/forge_app/src/tool_executor.rs +++ b/crates/forge_app/src/tool_executor.rs @@ -11,7 +11,7 @@ use crate::{ AgentRegistry, ConversationService, EnvironmentInfra, FollowUpService, FsPatchService, FsReadService, FsRemoveService, FsSearchService, FsUndoService, FsWriteService, ImageReadService, NetFetchService, PlanCreateService, ProviderService, SkillFetchService, - WorkspaceService, + WebSearchService, WorkspaceService, }; pub struct ToolExecutor { @@ -25,6 +25,7 @@ impl< + FsSearchService + WorkspaceService + NetFetchService + + WebSearchService + FsRemoveService + FsPatchService + FsUndoService @@ -280,6 +281,10 @@ impl< let output = self.services.fetch(input.url.clone(), input.raw).await?; (input, output).into() } + ToolCatalog::Websearch(input) => { + let output = self.services.web_search(input.clone()).await?; + ToolOperation::WebSearch { input, output } + } ToolCatalog::Followup(input) => { let output = self .services diff --git a/crates/forge_domain/src/compact/summary.rs b/crates/forge_domain/src/compact/summary.rs index 3416dfdba8..6a0f81e4d7 100644 --- a/crates/forge_domain/src/compact/summary.rs +++ b/crates/forge_domain/src/compact/summary.rs @@ -357,6 +357,9 @@ fn extract_tool_info(call: &ToolCallFull, current_todos: &[Todo]) -> Option Some(SummaryTool::Undo { path: input.path }), ToolCatalog::Fetch(input) => Some(SummaryTool::Fetch { url: input.url }), + ToolCatalog::Websearch(input) => Some(SummaryTool::Search { + pattern: input.query, + }), ToolCatalog::Followup(input) => { Some(SummaryTool::Followup { question: input.question }) } diff --git a/crates/forge_domain/src/tools/catalog.rs b/crates/forge_domain/src/tools/catalog.rs index 4db441107e..4623c57c7d 100644 --- a/crates/forge_domain/src/tools/catalog.rs +++ b/crates/forge_domain/src/tools/catalog.rs @@ -51,6 +51,7 @@ pub enum ToolCatalog { Undo(FSUndo), Shell(Shell), Fetch(NetFetch), + Websearch(WebSearch), Followup(Followup), Plan(PlanCreate), Skill(SkillFetch), @@ -311,6 +312,77 @@ pub enum OutputMode { Count, } +/// Search mode for websearch. +#[derive( + Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, AsRefStr, EnumIter, Default, +)] +#[serde(rename_all = "snake_case")] +pub enum WebSearchMode { + /// Use SerpApi's lightweight Google engine for faster, smaller results. + #[default] + Light, + /// Use SerpApi's standard Google engine for richer SERP features. + Standard, +} + +impl JsonSchema for WebSearchMode { + fn schema_name() -> Cow<'static, str> { + ::simple_enum_schema_name() + } + + fn json_schema(r#gen: &mut schemars::generate::SchemaGenerator) -> Schema { + ::simple_enum_schema(r#gen) + } +} + +/// Safe search level for Google web search. +#[derive( + Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, AsRefStr, EnumIter, Default, +)] +#[serde(rename_all = "snake_case")] +pub enum WebSearchSafe { + /// Blur or filter explicit content. + #[default] + Active, + /// Disable safe search filtering. + Off, +} + +impl JsonSchema for WebSearchSafe { + fn schema_name() -> Cow<'static, str> { + ::simple_enum_schema_name() + } + + fn json_schema(r#gen: &mut schemars::generate::SchemaGenerator) -> Schema { + ::simple_enum_schema(r#gen) + } +} + +/// Device profile used by SerpApi Google search. +#[derive( + Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, AsRefStr, EnumIter, Default, +)] +#[serde(rename_all = "snake_case")] +pub enum WebSearchDevice { + /// Use desktop search results. + #[default] + Desktop, + /// Use tablet search results. + Tablet, + /// Use mobile search results. + Mobile, +} + +impl JsonSchema for WebSearchDevice { + fn schema_name() -> Cow<'static, str> { + ::simple_enum_schema_name() + } + + fn json_schema(r#gen: &mut schemars::generate::SchemaGenerator) -> Schema { + ::simple_enum_schema(r#gen) + } +} + /// A paired query and use_case for semantic search. Each query must have a /// corresponding use_case for document reranking. #[derive(Default, Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] @@ -629,6 +701,62 @@ pub struct NetFetch { pub raw: Option, } +#[derive( + Default, + Debug, + Clone, + Serialize, + Deserialize, + JsonSchema, + ToolDescription, + PartialEq, + derive_setters::Setters, +)] +#[tool_description_file = "crates/forge_domain/src/tools/descriptions/websearch.md"] +#[setters(strip_option, into)] +pub struct WebSearch { + /// Search query to send to SerpApi-backed Google search. + pub query: String, + + /// Search mode. Defaults to `light`. + #[serde(default)] + #[serde(skip_serializing_if = "is_default")] + pub mode: WebSearchMode, + + /// Geographic location to search from, such as "Austin, Texas, United + /// States". + #[serde(skip_serializing_if = "Option::is_none")] + pub location: Option, + + /// Google domain to use, such as "google.com". + #[serde(skip_serializing_if = "Option::is_none")] + pub google_domain: Option, + + /// Country code for Google search, such as "us". + #[serde(skip_serializing_if = "Option::is_none")] + pub gl: Option, + + /// Language code for Google search, such as "en". + #[serde(skip_serializing_if = "Option::is_none")] + pub hl: Option, + + /// Result offset for pagination. + #[serde(skip_serializing_if = "Option::is_none")] + pub start: Option, + + /// Safe search level. + #[serde(skip_serializing_if = "Option::is_none")] + pub safe: Option, + + /// Device profile for the search. + #[serde(skip_serializing_if = "Option::is_none")] + pub device: Option, + + /// Force a fresh SerpApi fetch instead of using an exact cached response. + #[serde(skip_serializing_if = "Option::is_none")] + pub no_cache: Option, +} + #[derive(Default, Debug, Clone, Serialize, Deserialize, JsonSchema, ToolDescription, PartialEq)] #[tool_description_file = "crates/forge_domain/src/tools/descriptions/followup.md"] pub struct Followup { @@ -808,6 +936,7 @@ impl ToolDescription for ToolCatalog { ToolCatalog::Shell(v) => v.description(), ToolCatalog::Followup(v) => v.description(), ToolCatalog::Fetch(v) => v.description(), + ToolCatalog::Websearch(v) => v.description(), ToolCatalog::FsSearch(v) => v.description(), ToolCatalog::SemSearch(v) => v.description(), ToolCatalog::Read(v) => v.description(), @@ -867,6 +996,7 @@ impl ToolCatalog { ToolCatalog::Shell(_) => r#gen.into_root_schema_for::(), ToolCatalog::Followup(_) => r#gen.into_root_schema_for::(), ToolCatalog::Fetch(_) => r#gen.into_root_schema_for::(), + ToolCatalog::Websearch(_) => r#gen.into_root_schema_for::(), ToolCatalog::FsSearch(_) => r#gen.into_root_schema_for::(), ToolCatalog::SemSearch(_) => r#gen.into_root_schema_for::(), ToolCatalog::Read(_) => r#gen.into_root_schema_for::(), @@ -995,6 +1125,11 @@ impl ToolCatalog { cwd, message: format!("Fetch content from URL: {}", input.url), }), + ToolCatalog::Websearch(input) => Some(crate::policies::PermissionOperation::Fetch { + url: "https://serpapi.com/search".to_string(), + cwd, + message: format!("Search the web for query: {}", input.query), + }), // Operations that don't require permission checks ToolCatalog::SemSearch(_) | ToolCatalog::Undo(_) @@ -1081,6 +1216,14 @@ impl ToolCatalog { })) } + /// Creates a WebSearch tool call with the specified query. + pub fn tool_call_websearch(query: &str) -> ToolCallFull { + ToolCallFull::from(ToolCatalog::Websearch(WebSearch { + query: query.to_string(), + ..Default::default() + })) + } + /// Creates a Followup tool call with the specified question pub fn tool_call_followup(question: &str) -> ToolCallFull { ToolCallFull::from(ToolCatalog::Followup(Followup { @@ -1217,7 +1360,7 @@ mod tests { use strum::IntoEnumIterator; use super::Shell; - use crate::{ToolCatalog, ToolKind, ToolName}; + use crate::{ToolCatalog, ToolKind, ToolName, WebSearchMode}; #[test] fn test_tool_definition() { @@ -1822,6 +1965,49 @@ mod tests { assert_eq!(actual, expected); } + #[test] + fn test_websearch_defaults_to_light_mode() { + use crate::{ToolCallArguments, ToolCallFull}; + + let setup = ToolCallFull { + name: ToolName::new("websearch"), + call_id: None, + arguments: ToolCallArguments::from_json(r#"{"query":"rust async tutorial"}"#), + thought_signature: None, + }; + + let actual = ToolCatalog::try_from(setup).unwrap(); + let expected = ToolCatalog::Websearch( + crate::WebSearch::default() + .query("rust async tutorial") + .mode(WebSearchMode::Light), + ); + + assert_eq!(actual, expected); + } + + #[test] + fn test_websearch_policy_operation_uses_query_message() { + use std::path::PathBuf; + + use crate::policies::PermissionOperation; + + let setup = ToolCatalog::Websearch( + crate::WebSearch::default() + .query("serpapi rust examples") + .mode(WebSearchMode::Standard), + ); + + let actual = setup.to_policy_operation(PathBuf::from("/test/cwd")).unwrap(); + let expected = PermissionOperation::Fetch { + url: "https://serpapi.com/search".to_string(), + cwd: PathBuf::from("/test/cwd"), + message: "Search the web for query: serpapi rust examples".to_string(), + }; + + assert_eq!(actual, expected); + } + #[test] fn test_normalize_tool_name_trims_whitespace() { let actual = super::normalize_tool_name(&ToolName::new(" read ")); diff --git a/crates/forge_domain/src/tools/descriptions/websearch.md b/crates/forge_domain/src/tools/descriptions/websearch.md new file mode 100644 index 0000000000..57353ea93f --- /dev/null +++ b/crates/forge_domain/src/tools/descriptions/websearch.md @@ -0,0 +1 @@ +Searches the public web through SerpApi-backed Google results and returns a normalized summary of search findings. Use this when the user needs current web information, recent facts, or external references that are better answered from search results than by directly fetching a known URL. Default to `mode: "light"` because it is faster and returns the core web results; use `mode: "standard"` only when richer Google result features are needed, such as answer boxes, richer knowledge graph data, top stories, or when light results are insufficient. This tool requires a configured `SERPAPI_API_KEY`, only searches public web content, and returns summarized structured results rather than full page bodies. diff --git a/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap b/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap index 0a2b2e838d..79bd377a06 100644 --- a/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap +++ b/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap @@ -323,6 +323,80 @@ expression: tools "url" ] } +{ + "title": "WebSearch", + "type": "object", + "properties": { + "device": { + "description": "Device profile for the search.", + "type": "string", + "enum": [ + "desktop", + "tablet", + "mobile", + null + ], + "nullable": true + }, + "gl": { + "description": "Country code for Google search, such as \"us\".", + "type": "string", + "nullable": true + }, + "google_domain": { + "description": "Google domain to use, such as \"google.com\".", + "type": "string", + "nullable": true + }, + "hl": { + "description": "Language code for Google search, such as \"en\".", + "type": "string", + "nullable": true + }, + "location": { + "description": "Geographic location to search from, such as \"Austin, Texas, United\nStates\".", + "type": "string", + "nullable": true + }, + "mode": { + "description": "Search mode. Defaults to `light`.", + "type": "string", + "enum": [ + "light", + "standard" + ] + }, + "no_cache": { + "description": "Force a fresh SerpApi fetch instead of using an exact cached response.", + "type": "boolean", + "nullable": true + }, + "query": { + "description": "Search query to send to SerpApi-backed Google search.", + "type": "string" + }, + "safe": { + "description": "Safe search level.", + "type": "string", + "enum": [ + "active", + "off", + null + ], + "nullable": true + }, + "start": { + "description": "Result offset for pagination.", + "type": "integer", + "format": "uint32", + "minimum": 0, + "nullable": true + } + }, + "required": [ + "query" + ] +} { "title": "Followup", "type": "object", diff --git a/crates/forge_repo/src/agents/forge.md b/crates/forge_repo/src/agents/forge.md index 14fea74699..a509196a8b 100644 --- a/crates/forge_repo/src/agents/forge.md +++ b/crates/forge_repo/src/agents/forge.md @@ -16,6 +16,7 @@ tools: - multi_patch - shell - fetch + - websearch - skill - todo_write - todo_read diff --git a/crates/forge_repo/src/agents/muse.md b/crates/forge_repo/src/agents/muse.md index 39f341647c..85678a29ca 100644 --- a/crates/forge_repo/src/agents/muse.md +++ b/crates/forge_repo/src/agents/muse.md @@ -10,6 +10,7 @@ tools: - search - read - fetch + - websearch - plan - mcp_* user_prompt: |- diff --git a/crates/forge_repo/src/agents/sage.md b/crates/forge_repo/src/agents/sage.md index 0287a22101..dcf7fd6e37 100644 --- a/crates/forge_repo/src/agents/sage.md +++ b/crates/forge_repo/src/agents/sage.md @@ -9,6 +9,7 @@ tools: - search - read - fetch + - websearch user_prompt: |- <{{event.name}}>{{event.value}} {{current_date}} diff --git a/crates/forge_services/src/forge_services.rs b/crates/forge_services/src/forge_services.rs index 7ff1d1a2fb..d3972d5363 100644 --- a/crates/forge_services/src/forge_services.rs +++ b/crates/forge_services/src/forge_services.rs @@ -27,6 +27,7 @@ use crate::template::ForgeTemplateService; use crate::tool_services::{ ForgeFetch, ForgeFollowup, ForgeFsPatch, ForgeFsRead, ForgeFsRemove, ForgeFsSearch, ForgeFsUndo, ForgeFsWrite, ForgeImageRead, ForgePlanCreate, ForgeShell, ForgeSkillFetch, + ForgeWebSearch, }; type McpService = ForgeMcpService, F, ::Client>; @@ -72,6 +73,7 @@ pub struct ForgeServices< file_undo_service: Arc>, shell_service: Arc>, fetch_service: Arc, + websearch_service: Arc>, followup_service: Arc>, mcp_service: Arc>, custom_instructions_service: Arc>, @@ -127,6 +129,7 @@ impl< let file_undo_service = Arc::new(ForgeFsUndo::new(infra.clone())); let shell_service = Arc::new(ForgeShell::new(infra.clone())); let fetch_service = Arc::new(ForgeFetch::new()); + let websearch_service = Arc::new(ForgeWebSearch::new(infra.clone())); let followup_service = Arc::new(ForgeFollowup::new(infra.clone())); let custom_instructions_service = Arc::new(ForgeCustomInstructionsService::new(infra.clone())); @@ -157,6 +160,7 @@ impl< file_undo_service, shell_service, fetch_service, + websearch_service, followup_service, mcp_service, custom_instructions_service, @@ -225,6 +229,7 @@ impl< type FollowUpService = ForgeFollowup; type FsUndoService = ForgeFsUndo; type NetFetchService = ForgeFetch; + type WebSearchService = ForgeWebSearch; type ShellService = ForgeShell; type McpService = McpService; type AuthService = AuthService; @@ -299,6 +304,10 @@ impl< &self.fetch_service } + fn web_search_service(&self) -> &Self::WebSearchService { + &self.websearch_service + } + fn shell_service(&self) -> &Self::ShellService { &self.shell_service } diff --git a/crates/forge_services/src/tool_services/mod.rs b/crates/forge_services/src/tool_services/mod.rs index 64a5c6f3c0..ffbfdee5ba 100644 --- a/crates/forge_services/src/tool_services/mod.rs +++ b/crates/forge_services/src/tool_services/mod.rs @@ -10,6 +10,7 @@ mod image_read; mod plan_create; mod shell; mod skill; +mod websearch; pub use fetch::*; pub use followup::*; @@ -23,3 +24,4 @@ pub use image_read::*; pub use plan_create::*; pub use shell::*; pub use skill::*; +pub use websearch::*; diff --git a/crates/forge_services/src/tool_services/websearch.rs b/crates/forge_services/src/tool_services/websearch.rs new file mode 100644 index 0000000000..4f005b6741 --- /dev/null +++ b/crates/forge_services/src/tool_services/websearch.rs @@ -0,0 +1,626 @@ +use std::sync::Arc; + +use anyhow::{Context, anyhow}; +use forge_app::{ + EnvironmentInfra, HttpInfra, WebSearchAnswerBox, WebSearchKnowledgeGraph, WebSearchOrganicResult, + WebSearchRelatedQuestion, WebSearchResponse, WebSearchService, WebSearchTopStory, +}; +use forge_domain::{WebSearch, WebSearchDevice, WebSearchMode, WebSearchSafe}; +use reqwest::Url; +use serde::Deserialize; +use thiserror::Error; + +const SERP_API_URL: &str = "https://serpapi.com/search"; +const SERPAPI_API_KEY_ENV: &str = "SERPAPI_API_KEY"; + +#[derive(Debug, Error)] +enum WebSearchError { + #[error("SERPAPI_API_KEY is not set")] + MissingApiKey, + #[error("Search query cannot be empty")] + EmptyQuery, + #[error("SerpApi returned an error: {0}")] + Api(String), +} + +/// Searches the public web through SerpApi-backed Google search. +pub struct ForgeWebSearch(Arc); + +impl ForgeWebSearch { + pub fn new(infra: Arc) -> Self { + Self(infra) + } +} + +impl> ForgeWebSearch { + fn api_key(&self) -> anyhow::Result { + self.0 + .get_env_var(SERPAPI_API_KEY_ENV) + .filter(|value| !value.trim().is_empty()) + .ok_or_else(|| anyhow!(WebSearchError::MissingApiKey)) + } + + fn build_url(&self, params: &WebSearch, api_key: &str) -> anyhow::Result { + if params.query.trim().is_empty() { + return Err(anyhow!(WebSearchError::EmptyQuery)); + } + + let mut url = Url::parse(SERP_API_URL)?; + let engine = match params.mode { + WebSearchMode::Light => "google_light", + WebSearchMode::Standard => "google", + }; + + { + let mut query = url.query_pairs_mut(); + query.append_pair("engine", engine); + query.append_pair("q", params.query.trim()); + query.append_pair("api_key", api_key); + + if let Some(location) = params.location.as_deref() { + query.append_pair("location", location); + } + if let Some(google_domain) = params.google_domain.as_deref() { + query.append_pair("google_domain", google_domain); + } + if let Some(gl) = params.gl.as_deref() { + query.append_pair("gl", gl); + } + if let Some(hl) = params.hl.as_deref() { + query.append_pair("hl", hl); + } + if let Some(start) = params.start { + query.append_pair("start", &start.to_string()); + } + if let Some(safe) = params.safe { + query.append_pair("safe", match safe { + WebSearchSafe::Active => "active", + WebSearchSafe::Off => "off", + }); + } + if let Some(device) = params.device { + query.append_pair("device", match device { + WebSearchDevice::Desktop => "desktop", + WebSearchDevice::Tablet => "tablet", + WebSearchDevice::Mobile => "mobile", + }); + } + if let Some(no_cache) = params.no_cache { + query.append_pair("no_cache", &no_cache.to_string()); + } + } + + Ok(url) + } + + fn parse_response(&self, params: &WebSearch, body: &[u8]) -> anyhow::Result { + let response: SerpApiResponse = + serde_json::from_slice(body).context("Failed to parse SerpApi response")?; + + if let Some(error) = response.error { + return Err(anyhow!(WebSearchError::Api(error))); + } + + let engine = response + .search_parameters + .and_then(|value| value.engine) + .unwrap_or_else(|| match params.mode { + WebSearchMode::Light => "google_light".to_string(), + WebSearchMode::Standard => "google".to_string(), + }); + + let answer_box = response.answer_box.map(|value| WebSearchAnswerBox { + title: value.title, + answer: value.answer, + snippet: value.snippet, + link: value.link, + }); + + let knowledge_graph = response.knowledge_graph.map(|value| WebSearchKnowledgeGraph { + title: value.title, + entity_type: value.entity_type, + description: value.description, + website: value.website, + }); + + let organic_results = response + .organic_results + .unwrap_or_default() + .into_iter() + .filter_map(|value| { + let title = value.title?; + let link = value.link?; + Some(WebSearchOrganicResult { + position: value.position, + title, + link, + displayed_link: value.displayed_link, + source: value.source, + snippet: value.snippet, + }) + }) + .collect(); + + let related_questions = response + .related_questions + .unwrap_or_default() + .into_iter() + .map(|value| WebSearchRelatedQuestion { + question: value.question, + snippet: value.snippet, + }) + .collect(); + + let related_searches = response + .related_searches + .unwrap_or_default() + .into_iter() + .filter_map(|value| match value { + SerpRelatedSearch::Query { query } => Some(query), + SerpRelatedSearch::Text(query) => Some(query), + }) + .collect(); + + let top_stories = response + .top_stories + .unwrap_or_default() + .into_iter() + .filter_map(|value| { + Some(WebSearchTopStory { + title: value.title?, + link: value.link, + source: value.source, + date: value.date, + snippet: value.snippet, + }) + }) + .collect(); + + Ok(WebSearchResponse { + query: params.query.clone(), + engine, + search_id: response.search_metadata.and_then(|value| value.id), + answer_box, + knowledge_graph, + organic_results, + related_questions, + related_searches, + top_stories, + }) + } +} + +#[async_trait::async_trait] +impl + Send + Sync> + WebSearchService for ForgeWebSearch +{ + async fn web_search(&self, params: WebSearch) -> anyhow::Result { + let api_key = self.api_key()?; + let url = self.build_url(¶ms, &api_key)?; + let response = self + .0 + .http_get(&url, None) + .await + .with_context(|| format!("Failed to execute web search for query '{}'", params.query))?; + let body = response.bytes().await.context("Failed to read SerpApi response body")?; + + self.parse_response(¶ms, &body) + } +} + +#[derive(Debug, Deserialize)] +struct SerpApiResponse { + #[serde(default)] + search_metadata: Option, + #[serde(default)] + search_parameters: Option, + #[serde(default)] + answer_box: Option, + #[serde(default)] + knowledge_graph: Option, + #[serde(default)] + organic_results: Option>, + #[serde(default)] + related_questions: Option>, + #[serde(default)] + related_searches: Option>, + #[serde(default)] + top_stories: Option>, + #[serde(default)] + error: Option, +} + +#[derive(Debug, Deserialize)] +struct SerpSearchMetadata { + #[serde(default)] + id: Option, +} + +#[derive(Debug, Deserialize)] +struct SerpSearchParameters { + #[serde(default)] + engine: Option, +} + +#[derive(Debug, Deserialize)] +struct SerpAnswerBox { + #[serde(default)] + title: Option, + #[serde(default)] + answer: Option, + #[serde(default)] + snippet: Option, + #[serde(default)] + link: Option, +} + +#[derive(Debug, Deserialize)] +struct SerpKnowledgeGraph { + #[serde(default)] + title: Option, + #[serde(default, rename = "type")] + entity_type: Option, + #[serde(default)] + description: Option, + #[serde(default)] + website: Option, +} + +#[derive(Debug, Deserialize)] +struct SerpOrganicResult { + #[serde(default)] + position: Option, + #[serde(default)] + title: Option, + #[serde(default)] + link: Option, + #[serde(default)] + displayed_link: Option, + #[serde(default)] + source: Option, + #[serde(default)] + snippet: Option, +} + +#[derive(Debug, Deserialize)] +struct SerpRelatedQuestion { + question: String, + #[serde(default)] + snippet: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum SerpRelatedSearch { + Query { query: String }, + Text(String), +} + +#[derive(Debug, Deserialize)] +struct SerpTopStory { + #[serde(default)] + title: Option, + #[serde(default)] + link: Option, + #[serde(default)] + source: Option, + #[serde(default)] + date: Option, + #[serde(default)] + snippet: Option, +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + + use forge_app::{HttpInfra, domain::Environment}; + use forge_domain::{ConfigOperation, WebSearch}; + use pretty_assertions::assert_eq; + use reqwest::Response; + use reqwest::header::HeaderMap; + use reqwest_eventsource::EventSource; + + use super::*; + + struct MockInfra { + env: BTreeMap, + } + + impl EnvironmentInfra for MockInfra { + type Config = forge_config::ForgeConfig; + + fn get_env_var(&self, key: &str) -> Option { + self.env.get(key).cloned() + } + + fn get_env_vars(&self) -> BTreeMap { + self.env.clone() + } + + fn get_environment(&self) -> Environment { + use fake::{Fake, Faker}; + Faker.fake() + } + + fn get_config(&self) -> anyhow::Result { + Ok(forge_config::ForgeConfig::default()) + } + + async fn update_environment(&self, _ops: Vec) -> anyhow::Result<()> { + Ok(()) + } + } + + #[async_trait::async_trait] + impl HttpInfra for MockInfra { + async fn http_get( + &self, + _url: &Url, + _headers: Option, + ) -> anyhow::Result { + unimplemented!() + } + + async fn http_post( + &self, + _url: &Url, + _headers: Option, + _body: bytes::Bytes, + ) -> anyhow::Result { + unimplemented!() + } + + async fn http_delete(&self, _url: &Url) -> anyhow::Result { + unimplemented!() + } + + async fn http_eventsource( + &self, + _url: &Url, + _headers: Option, + _body: bytes::Bytes, + ) -> anyhow::Result { + unimplemented!() + } + } + + fn fixture() -> ForgeWebSearch { + ForgeWebSearch::new(Arc::new(MockInfra { + env: BTreeMap::from([(SERPAPI_API_KEY_ENV.to_string(), "secret".to_string())]), + })) + } + + #[tokio::test] + async fn test_web_search_requires_api_key() { + let setup = ForgeWebSearch::new(Arc::new(MockInfra { env: BTreeMap::new() })); + + let actual = setup.web_search(WebSearch::default().query("rust web frameworks")).await; + let expected = "SERPAPI_API_KEY is not set"; + + assert_eq!(actual.unwrap_err().to_string(), expected); + } + + #[test] + fn test_build_url_uses_light_mode_by_default() { + let setup = fixture(); + let actual = setup + .build_url(&WebSearch::default().query("rust web frameworks"), "secret") + .unwrap(); + let expected = Some("google_light"); + + assert_eq!( + actual.query_pairs().find(|(key, _)| key == "engine").map(|(_, value)| value.to_string()).as_deref(), + expected + ); + } + + #[test] + fn test_build_url_uses_standard_mode_when_requested() { + let setup = fixture(); + let actual = setup + .build_url( + &WebSearch::default() + .query("rust web frameworks") + .mode(WebSearchMode::Standard), + "secret", + ) + .unwrap(); + let expected = Some("google"); + + assert_eq!( + actual.query_pairs().find(|(key, _)| key == "engine").map(|(_, value)| value.to_string()).as_deref(), + expected + ); + } + + #[test] + fn test_build_url_forwards_optional_parameters() { + let setup = fixture(); + let actual = setup + .build_url( + &WebSearch::default() + .query("best rust books") + .location("Austin, Texas, United States") + .google_domain("google.com") + .gl("us") + .hl("en") + .start(10_u32) + .safe(WebSearchSafe::Off) + .device(WebSearchDevice::Mobile) + .no_cache(true), + "secret", + ) + .unwrap(); + let expected = BTreeMap::from([ + ("device".to_string(), "mobile".to_string()), + ("engine".to_string(), "google_light".to_string()), + ("gl".to_string(), "us".to_string()), + ("google_domain".to_string(), "google.com".to_string()), + ("hl".to_string(), "en".to_string()), + ("location".to_string(), "Austin, Texas, United States".to_string()), + ("no_cache".to_string(), "true".to_string()), + ("q".to_string(), "best rust books".to_string()), + ("safe".to_string(), "off".to_string()), + ("start".to_string(), "10".to_string()), + ]); + + let actual = actual + .query_pairs() + .filter(|(key, _)| key != "api_key") + .map(|(key, value)| (key.to_string(), value.to_string())) + .collect::>(); + + assert_eq!(actual, expected); + } + + #[test] + fn test_parse_response_normalizes_light_results() { + let setup = fixture(); + let actual = setup + .parse_response( + &WebSearch::default().query("coffee"), + br#"{ + "search_metadata": { "id": "abc123" }, + "search_parameters": { "engine": "google_light" }, + "organic_results": [ + { + "position": 1, + "title": "Coffee - Wikipedia", + "link": "https://en.wikipedia.org/wiki/Coffee", + "displayed_link": "en.wikipedia.org \u203a wiki \u203a Coffee", + "snippet": "Coffee is a brewed drink." + } + ], + "related_searches": [ + { "query": "coffee beans" }, + "coffee near me" + ] + }"#, + ) + .unwrap(); + let expected = WebSearchResponse { + query: "coffee".to_string(), + engine: "google_light".to_string(), + search_id: Some("abc123".to_string()), + answer_box: None, + knowledge_graph: None, + organic_results: vec![WebSearchOrganicResult { + position: Some(1), + title: "Coffee - Wikipedia".to_string(), + link: "https://en.wikipedia.org/wiki/Coffee".to_string(), + displayed_link: Some("en.wikipedia.org \u{203a} wiki \u{203a} Coffee".to_string()), + source: None, + snippet: Some("Coffee is a brewed drink.".to_string()), + }], + related_questions: vec![], + related_searches: vec!["coffee beans".to_string(), "coffee near me".to_string()], + top_stories: vec![], + }; + + assert_eq!(actual, expected); + } + + #[test] + fn test_parse_response_normalizes_standard_rich_results() { + let setup = fixture(); + let actual = setup + .parse_response( + &WebSearch::default() + .query("saturn") + .mode(WebSearchMode::Standard), + br#"{ + "search_metadata": { "id": "xyz789" }, + "search_parameters": { "engine": "google" }, + "answer_box": { + "title": "Saturn", + "answer": "A gas giant planet", + "link": "https://example.com/saturn" + }, + "knowledge_graph": { + "title": "Saturn", + "type": "Planet", + "description": "The sixth planet from the Sun.", + "website": "https://science.nasa.gov/saturn/" + }, + "organic_results": [ + { + "position": 1, + "title": "Saturn Facts", + "link": "https://science.nasa.gov/saturn/facts/", + "displayed_link": "science.nasa.gov \u203a saturn \u203a facts", + "source": "NASA", + "snippet": "Saturn facts and figures." + } + ], + "related_questions": [ + { + "question": "What is Saturn made of?", + "snippet": "Mostly hydrogen and helium." + } + ], + "top_stories": [ + { + "title": "New Saturn mission announced", + "link": "https://example.com/story", + "source": "Space News", + "date": "1 day ago", + "snippet": "A new mission could launch soon." + } + ] + }"#, + ) + .unwrap(); + let expected = WebSearchResponse { + query: "saturn".to_string(), + engine: "google".to_string(), + search_id: Some("xyz789".to_string()), + answer_box: Some(WebSearchAnswerBox { + title: Some("Saturn".to_string()), + answer: Some("A gas giant planet".to_string()), + snippet: None, + link: Some("https://example.com/saturn".to_string()), + }), + knowledge_graph: Some(WebSearchKnowledgeGraph { + title: Some("Saturn".to_string()), + entity_type: Some("Planet".to_string()), + description: Some("The sixth planet from the Sun.".to_string()), + website: Some("https://science.nasa.gov/saturn/".to_string()), + }), + organic_results: vec![WebSearchOrganicResult { + position: Some(1), + title: "Saturn Facts".to_string(), + link: "https://science.nasa.gov/saturn/facts/".to_string(), + displayed_link: Some("science.nasa.gov \u{203a} saturn \u{203a} facts".to_string()), + source: Some("NASA".to_string()), + snippet: Some("Saturn facts and figures.".to_string()), + }], + related_questions: vec![WebSearchRelatedQuestion { + question: "What is Saturn made of?".to_string(), + snippet: Some("Mostly hydrogen and helium.".to_string()), + }], + related_searches: vec![], + top_stories: vec![WebSearchTopStory { + title: "New Saturn mission announced".to_string(), + link: Some("https://example.com/story".to_string()), + source: Some("Space News".to_string()), + date: Some("1 day ago".to_string()), + snippet: Some("A new mission could launch soon.".to_string()), + }], + }; + + assert_eq!(actual, expected); + } + + #[test] + fn test_parse_response_surfaces_serpapi_errors() { + let setup = fixture(); + let actual = setup.parse_response( + &WebSearch::default().query("blocked"), + br#"{ "error": "Invalid API key." }"#, + ); + let expected = "SerpApi returned an error: Invalid API key."; + + assert_eq!(actual.unwrap_err().to_string(), expected); + } +}