diff --git a/README.md b/README.md index ee34109dec..22ade5377b 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,27 @@ print("All done!") For installation instructions and detailed setup, see the [Getting Started Guide](https://docs.openhands.dev/sdk/getting-started). +### Codebase Search (Optional) + +Add natural-language code search to your agent with [Morph's WarpGrep](https://morphllm.com). Requires a `MORPH_API_KEY` ([get one here](https://morphllm.com/dashboard/api-keys)) and Node.js 18+. + +```python +from openhands.tools.codebase_search import register_codebase_search_tools + +register_codebase_search_tools() + +agent = Agent( + llm=llm, + tools=[ + # ... your other tools ... + Tool(name="codebase_search"), # search local repos + Tool(name="github_codebase_search"), # search public GitHub repos + ], +) +``` + +See [`examples/01_standalone_sdk/45_codebase_search.py`](examples/01_standalone_sdk/45_codebase_search.py) for a full working example. + ## Documentation For detailed documentation, tutorials, and API reference, visit: diff --git a/examples/01_standalone_sdk/45_codebase_search.py b/examples/01_standalone_sdk/45_codebase_search.py new file mode 100644 index 0000000000..9a461e2ac9 --- /dev/null +++ b/examples/01_standalone_sdk/45_codebase_search.py @@ -0,0 +1,53 @@ +"""Codebase search using Morph's WarpGrep. + +Natural-language code search backed by an LLM sub-agent that uses ripgrep, +file reads, and directory listing under the hood. Two tools are provided: + + - ``codebase_search`` — search a local repository + - ``github_codebase_search`` — search a public GitHub repository + +Requirements: + - MORPH_API_KEY : Get from https://morphllm.com/dashboard/api-keys + - LLM_API_KEY : Your LLM provider API key + - Node.js 18+ : Required by the MCP server (``npx``) +""" + +import os + +from openhands.sdk import LLM, Agent, Conversation, Tool +from openhands.tools.codebase_search import register_codebase_search_tools +from openhands.tools.file_editor import FileEditorTool +from openhands.tools.terminal import TerminalTool + +# 1. Register the Morph search tools (explicit, not automatic) +register_codebase_search_tools() + +# 2. Configure the LLM +llm = LLM( + model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), + api_key=os.getenv("LLM_API_KEY"), + base_url=os.getenv("LLM_BASE_URL", None), +) + +# 3. Build the agent with search + editing tools +agent = Agent( + llm=llm, + tools=[ + Tool(name=TerminalTool.name), + Tool(name=FileEditorTool.name), + # Morph search tools — pass api_key here or set MORPH_API_KEY env var + Tool(name="codebase_search"), + Tool(name="github_codebase_search"), + ], +) + +# 4. Run a conversation +cwd = os.getcwd() +conversation = Conversation(agent=agent, workspace=cwd) +conversation.send_message( + "Use codebase_search to find how errors are handled in this project, " + "then summarize what you found." +) +conversation.run() + +print(f"EXAMPLE_COST: {llm.metrics.accumulated_cost}") diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile index 8b8cc5ae7e..eaad050f8f 100644 --- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile +++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile @@ -86,6 +86,7 @@ RUN set -eux; \ rm -rf /var/lib/apt/lists/* # Pre-install ACP servers for ACPAgent support (Claude Code + Codex) +# and Morph WarpGrep SDK for codebase_search / github_codebase_search tools. # Install Node.js/npm if not present (SWE-bench base images may lack them) RUN set -eux; \ if ! command -v npm >/dev/null 2>&1; then \ @@ -93,7 +94,7 @@ RUN set -eux; \ apt-get install -y --no-install-recommends nodejs && \ rm -rf /var/lib/apt/lists/*; \ fi; \ - npm install -g @zed-industries/claude-agent-acp @zed-industries/codex-acp + npm install -g @zed-industries/claude-agent-acp @zed-industries/codex-acp @morphllm/morphsdk # Configure Claude Code managed settings for headless operation: # Allow all tool permissions (no human in the loop to approve). diff --git a/openhands-agent-server/openhands/agent_server/tool_router.py b/openhands-agent-server/openhands/agent_server/tool_router.py index 6403a82be4..57e68f55af 100644 --- a/openhands-agent-server/openhands/agent_server/tool_router.py +++ b/openhands-agent-server/openhands/agent_server/tool_router.py @@ -3,6 +3,7 @@ from fastapi import APIRouter from openhands.sdk.tool.registry import list_registered_tools +from openhands.tools.codebase_search import register_codebase_search_tools from openhands.tools.preset.default import register_default_tools from openhands.tools.preset.gemini import register_gemini_tools from openhands.tools.preset.planning import register_planning_tools @@ -12,6 +13,7 @@ register_default_tools(enable_browser=True) register_gemini_tools(enable_browser=True) register_planning_tools() +register_codebase_search_tools() # Tool listing diff --git a/openhands-tools/openhands/tools/__init__.py b/openhands-tools/openhands/tools/__init__.py index 622b2e954c..39b41a85d9 100644 --- a/openhands-tools/openhands/tools/__init__.py +++ b/openhands-tools/openhands/tools/__init__.py @@ -18,6 +18,7 @@ from importlib.metadata import PackageNotFoundError, version +from openhands.tools.codebase_search import register_codebase_search_tools from openhands.tools.delegate import DelegationVisualizer from openhands.tools.file_editor import FileEditorTool from openhands.tools.preset.default import ( @@ -39,6 +40,7 @@ __all__ = [ "__version__", "DelegationVisualizer", + "register_codebase_search_tools", "FileEditorTool", "TaskTrackerTool", "TerminalTool", diff --git a/openhands-tools/openhands/tools/codebase_search/__init__.py b/openhands-tools/openhands/tools/codebase_search/__init__.py new file mode 100644 index 0000000000..55416b981b --- /dev/null +++ b/openhands-tools/openhands/tools/codebase_search/__init__.py @@ -0,0 +1,28 @@ +"""Codebase search tools powered by Morph's WarpGrep SDK. + +Two tools are registered: + +- ``codebase_search`` — search a local repository +- ``github_codebase_search`` — search a public GitHub repository + +Requires ``MORPH_API_KEY`` (get one at https://morphllm.com/dashboard/api-keys), +Node.js 18+, and ``@morphllm/morphsdk`` (``npm install -g @morphllm/morphsdk``). +""" + +from openhands.tools.codebase_search.definition import ( + CodebaseSearchAction, + CodebaseSearchObservation, + CodebaseSearchTool, + GitHubCodebaseSearchAction, + GitHubCodebaseSearchTool, + register_codebase_search_tools, +) + +__all__ = [ + "CodebaseSearchAction", + "CodebaseSearchObservation", + "CodebaseSearchTool", + "GitHubCodebaseSearchAction", + "GitHubCodebaseSearchTool", + "register_codebase_search_tools", +] diff --git a/openhands-tools/openhands/tools/codebase_search/bridge.js b/openhands-tools/openhands/tools/codebase_search/bridge.js new file mode 100644 index 0000000000..86c03203d3 --- /dev/null +++ b/openhands-tools/openhands/tools/codebase_search/bridge.js @@ -0,0 +1,52 @@ +#!/usr/bin/env node +/** + * Bridge script that wraps @morphllm/morphsdk for use from Python. + * + * Reads a single JSON object from stdin, calls the appropriate SDK method, + * and writes the result as JSON to stdout. + * + * Input schema: + * { "type": "local"|"github", "query": "...", "repo_path": "...", "owner_repo": "...", "github_url": "...", "branch": "..." } + * + * Output schema: + * { "success": bool, "contexts": [...], "summary": "...", "error": "..." } + */ + +const { WarpGrepClient } = require("@morphllm/morphsdk/tools/warp-grep"); + +async function main() { + // Read JSON from stdin + const chunks = []; + for await (const chunk of process.stdin) chunks.push(chunk); + const input = JSON.parse(Buffer.concat(chunks).toString()); + + const client = new WarpGrepClient({ + morphApiKey: process.env.MORPH_API_KEY, + morphApiUrl: process.env.MORPH_API_URL || undefined, + timeout: Number(process.env.MORPH_WARP_GREP_TIMEOUT) || undefined, + }); + + let result; + if (input.type === "github") { + const github = input.github_url || input.owner_repo || ""; + result = await client.searchGitHub({ + searchTerm: input.query, + github, + branch: input.branch || undefined, + }); + } else { + result = await client.execute({ + searchTerm: input.query, + repoRoot: input.repo_path || ".", + }); + } + + process.stdout.write(JSON.stringify(result)); +} + +main().catch((err) => { + process.stdout.write( + JSON.stringify({ success: false, error: err.message || String(err) }) + ); + process.exit(0); // exit clean so Python gets the JSON +}); diff --git a/openhands-tools/openhands/tools/codebase_search/definition.py b/openhands-tools/openhands/tools/codebase_search/definition.py new file mode 100644 index 0000000000..965381764c --- /dev/null +++ b/openhands-tools/openhands/tools/codebase_search/definition.py @@ -0,0 +1,317 @@ +"""Codebase search tools powered by Morph's WarpGrep SDK. + +Registers ``codebase_search`` and ``github_codebase_search`` as native +OpenHands tools. Each tool calls ``@morphllm/morphsdk`` via a small +Node.js bridge script (``bridge.js``), which handles the multi-turn +WarpGrep agent loop internally and returns aggregated results. +""" + +from __future__ import annotations + +import json +import os +import subprocess +from collections.abc import Sequence +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from pydantic import Field + +from openhands.sdk.logger import get_logger +from openhands.sdk.tool import ( + Action, + Observation, + ToolAnnotations, + ToolDefinition, + ToolExecutor, + register_tool, +) + +if TYPE_CHECKING: + from openhands.sdk.conversation import LocalConversation + from openhands.sdk.conversation.state import ConversationState + +logger = get_logger(__name__) + +_BRIDGE_SCRIPT = Path(__file__).parent / "bridge.js" + +_CODEBASE_SEARCH_DESCRIPTION = """\ +Search the local codebase using natural language. This tool uses a \ +specialised code-search sub-agent (WarpGrep) that runs ripgrep and file \ +reads internally, then returns the most relevant code snippets. + +Pass a natural-language question — do NOT pass regex or symbol-only queries. + +Good: "Where does authentication get handled?" +Bad: "auth()" or "grep -r auth" +""" + +_GITHUB_CODEBASE_SEARCH_DESCRIPTION = """\ +Search a public GitHub repository using natural language. Provide either \ +a full GitHub URL or an owner/repo shorthand (e.g. "expressjs/express"). \ +The tool clones and searches the repo remotely — no local checkout needed. + +Pass a natural-language question — do NOT pass regex or symbol-only queries. +""" + + +# ── Actions ───────────────────────────────────────────────────────────── + + +class CodebaseSearchAction(Action): + """Search a local repository with a natural-language query.""" + + search_string: str = Field( + description=( + "Natural-language question about the code you want to understand. " + "Good: 'Where does auth get handled?' " + "Bad: 'auth()'" + ), + ) + repo_path: str = Field( + description="Absolute path to the repository root to search.", + ) + + +class GitHubCodebaseSearchAction(Action): + """Search a public GitHub repository with a natural-language query.""" + + search_string: str = Field( + description=( + "Natural-language question about the code you want to understand. " + "Good: 'Where does auth get handled?' " + "Bad: 'auth()'" + ), + ) + github_url: str | None = Field( + default=None, + description=( + "Full GitHub URL (e.g. 'https://github.com/expressjs/express'). " + "Provide either github_url or owner_repo." + ), + ) + owner_repo: str | None = Field( + default=None, + description=( + "Repository shorthand (e.g. 'expressjs/express'). " + "Provide either github_url or owner_repo." + ), + ) + branch: str | None = Field( + default=None, + description="Branch to search. Defaults to the repo's default branch.", + ) + + +# ── Observations ──────────────────────────────────────────────────────── + + +class CodebaseSearchObservation(Observation): + """Results from a codebase search.""" + + pass # Uses base Observation's text field + + +# ── Executors ─────────────────────────────────────────────────────────── + + +def _validate_api_key(api_key: str | None) -> str: + """Return a validated MORPH_API_KEY or raise with a helpful message.""" + key = api_key or os.environ.get("MORPH_API_KEY") + if not key: + raise ValueError( + "MORPH_API_KEY is required for codebase_search.\n" + "Set it as an environment variable:\n" + " export MORPH_API_KEY=sk-morph-...\n" + "Or pass it in Tool params:\n" + " Tool(name='codebase_search', params={'api_key': 'sk-morph-...'})\n\n" + "Get your key at https://morphllm.com/dashboard/api-keys" + ) + return key + + +def _run_bridge(payload: dict, api_key: str) -> dict: + """Call the Node.js bridge script and return parsed JSON.""" + env = {**os.environ, "MORPH_API_KEY": api_key} + + try: + proc = subprocess.run( + ["node", str(_BRIDGE_SCRIPT)], + input=json.dumps(payload), + capture_output=True, + text=True, + timeout=120, + env=env, + ) + except FileNotFoundError: + raise ValueError( + "Node.js is required for codebase_search but 'node' was not found.\n" + "Install Node.js 18+ from https://nodejs.org/" + ) + except subprocess.TimeoutExpired: + return {"success": False, "error": "Search timed out after 120 seconds."} + + if proc.returncode != 0 and not proc.stdout.strip(): + stderr = proc.stderr.strip() + # Check for missing SDK + if "Cannot find module" in stderr or "MODULE_NOT_FOUND" in stderr: + return { + "success": False, + "error": ( + "@morphllm/morphsdk is not installed. Run:\n" + " npm install -g @morphllm/morphsdk" + ), + } + return {"success": False, "error": stderr[:500] if stderr else "Bridge process failed."} + + try: + return json.loads(proc.stdout) + except json.JSONDecodeError: + return {"success": False, "error": f"Invalid JSON from bridge: {proc.stdout[:200]}"} + + +def _format_result(result: dict) -> str: + """Format a WarpGrep result dict into readable text for the LLM.""" + if not result.get("success"): + return f"Search failed: {result.get('error', 'Unknown error')}" + + contexts = result.get("contexts", []) + if not contexts: + return "No relevant code found." + + parts: list[str] = [] + if result.get("summary"): + parts.append(result["summary"]) + parts.append("") + + for ctx in contexts: + file_path = ctx.get("file", "unknown") + content = ctx.get("content", "") + if content: + parts.append(f"--- {file_path} ---") + parts.append(content) + parts.append("") + + return "\n".join(parts).strip() + + +class CodebaseSearchExecutor(ToolExecutor[CodebaseSearchAction, CodebaseSearchObservation]): + """Execute local codebase search via the Morph SDK.""" + + def __init__(self, api_key: str) -> None: + self._api_key = api_key + + def __call__( + self, + action: CodebaseSearchAction, + conversation: LocalConversation | None = None, + ) -> CodebaseSearchObservation: + result = _run_bridge( + {"type": "local", "query": action.search_string, "repo_path": action.repo_path}, + self._api_key, + ) + return CodebaseSearchObservation.from_text(text=_format_result(result)) + + +class GitHubCodebaseSearchExecutor(ToolExecutor[GitHubCodebaseSearchAction, CodebaseSearchObservation]): + """Execute GitHub codebase search via the Morph SDK.""" + + def __init__(self, api_key: str) -> None: + self._api_key = api_key + + def __call__( + self, + action: GitHubCodebaseSearchAction, + conversation: LocalConversation | None = None, + ) -> CodebaseSearchObservation: + result = _run_bridge( + { + "type": "github", + "query": action.search_string, + "github_url": action.github_url, + "owner_repo": action.owner_repo, + "branch": action.branch, + }, + self._api_key, + ) + return CodebaseSearchObservation.from_text(text=_format_result(result)) + + +# ── Tool Definitions ──────────────────────────────────────────────────── + + +class CodebaseSearchTool(ToolDefinition[CodebaseSearchAction, CodebaseSearchObservation]): + """Local codebase search powered by Morph WarpGrep.""" + + @classmethod + def create( + cls, + conv_state: ConversationState, + api_key: str | None = None, + **kwargs: Any, + ) -> Sequence[CodebaseSearchTool]: + key = _validate_api_key(api_key) + working_dir = conv_state.workspace.working_dir + description = ( + f"{_CODEBASE_SEARCH_DESCRIPTION}\n" + f"Your current working directory is: {working_dir}" + ) + return [ + cls( + description=description, + action_type=CodebaseSearchAction, + observation_type=CodebaseSearchObservation, + executor=CodebaseSearchExecutor(api_key=key), + annotations=ToolAnnotations( + title="codebase_search", + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=True, + ), + ) + ] + + +class GitHubCodebaseSearchTool(ToolDefinition[GitHubCodebaseSearchAction, CodebaseSearchObservation]): + """GitHub codebase search powered by Morph WarpGrep.""" + + name = "github_codebase_search" # override auto-naming of "git_hub_codebase_search" + + @classmethod + def create( + cls, + conv_state: ConversationState, + api_key: str | None = None, + **kwargs: Any, + ) -> Sequence[GitHubCodebaseSearchTool]: + key = _validate_api_key(api_key) + return [ + cls( + description=_GITHUB_CODEBASE_SEARCH_DESCRIPTION, + action_type=GitHubCodebaseSearchAction, + observation_type=CodebaseSearchObservation, + executor=GitHubCodebaseSearchExecutor(api_key=key), + annotations=ToolAnnotations( + title="github_codebase_search", + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=True, + ), + ) + ] + + +# ── Registration ──────────────────────────────────────────────────────── + + +def register_codebase_search_tools() -> None: + """Register ``codebase_search`` and ``github_codebase_search`` tools. + + Call this once before creating an Agent that uses these tools. + Registration is explicit (not at import time) to avoid import-time + side-effects. + """ + register_tool(CodebaseSearchTool.name, CodebaseSearchTool) + register_tool(GitHubCodebaseSearchTool.name, GitHubCodebaseSearchTool) diff --git a/tests/examples/test_examples.py b/tests/examples/test_examples.py index c28f15a2a9..c7a98ee1f1 100644 --- a/tests/examples/test_examples.py +++ b/tests/examples/test_examples.py @@ -51,6 +51,8 @@ "examples/01_standalone_sdk/16_llm_security_analyzer.py", "examples/01_standalone_sdk/27_observability_laminar.py", "examples/01_standalone_sdk/35_subscription_login.py", + # Requires a running Morph MCP server and MORPH_API_KEY + "examples/01_standalone_sdk/45_codebase_search.py", # Requires interactive input() which fails in CI with EOFError "examples/02_remote_agent_server/05_vscode_with_docker_sandboxed_server.py", }