From 971f613ee4cb1ed4053e6d37df3f543857b7de01 Mon Sep 17 00:00:00 2001
From: ryantzr1 <dreisprung1821@gmail.com>
Date: Mon, 12 Jan 2026 16:33:29 +0800
Subject: [PATCH] feat: add multi-agent orchestration pattern

- Add multi-agent example with conductor and sub-agents
- Add skills documentation (overview, authoring guide, orchestration)
- Fix codex model routing to check only matched ID
- Remove unsafe eval() from docs example
---
 docs/cookbooks/multi-agent.mdx    | 398 ++++++++++++++++++++++++++++++
 docs/docs.json                    |   1 +
 examples/07_multi_agent.py        | 267 ++++++++++++++++++++
 hud/agents/resolver.py            |  13 +-
 hud/agents/tests/test_resolver.py |  38 +++
 5 files changed, 716 insertions(+), 1 deletion(-)
 create mode 100644 docs/cookbooks/multi-agent.mdx
 create mode 100644 examples/07_multi_agent.py
diff --git a/docs/cookbooks/multi-agent.mdx b/docs/cookbooks/multi-agent.mdx
new file mode 100644
index 00000000..1887cc4f
--- /dev/null
+++ b/docs/cookbooks/multi-agent.mdx
@@ -0,0 +1,398 @@
+---
+title: "Multi-Agent Orchestration"
+description: "Compose specialized agents into coordinated multi-agent systems"
+icon: "diagram-project"
+---
+
+Multi-agent systems let you combine specialized agents—each with their own environment, tools, and model—into a coordinated workflow. A "conductor" agent orchestrates the specialists, dispatching tasks and synthesizing results.
+
+<Card
+  title="Example Code"
+  icon="github"
+  href="https://github.com/hud-evals/hud-python/blob/main/examples/07_multi_agent.py"
+>
+  Follow along with the full working example on GitHub.
+</Card>
+
+## Overview
+
+The multi-agent pattern solves a common problem: as agent capabilities grow, a single agent with 50+ tools becomes unwieldy. By splitting responsibilities across specialized agents, each one stays focused and effective.
+
+```mermaid
+flowchart TD
+    subgraph orch["Coordinator (Conductor)"]
+        O["2 sub-agent tools"]
+    end
+    
+    subgraph browser["Browser Agent"]
+        B1["navigate"]
+        B2["click"]
+        B3["extract_text"]
+    end
+    
+    subgraph coding["Coding Agent"]
+        C1["shell"]
+        C2["apply_patch"]
+        C3["read_file"]
+    end
+    
+    O --> browser
+    O --> coding
+```
+
+The conductor sees only 2 tools—one per specialist. Each specialist has a focused toolset for its domain.
+
+## Key Concepts
+
+| Concept | Description |
+|---------|-------------|
+| **Coordinator** | An Environment with sub-agents registered as tools |
+| **AgentTool** | Wraps an environment + scenario as a callable tool |
+| **Conductor** | The agent that runs the coordinator (makes decisions) |
+| **Sub-agent** | A specialized agent wrapped as a tool |
+| **Eval-only params** | Parameters hidden from conductor but available for evaluation |
+
+## Quick Start
+
+### Prerequisites
+
+```bash
+export HUD_API_KEY="sk-hud-..."
+```
+
+Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys).
+
+<Note>
+  **Prerequisites**: You must deploy two hub environments before running this example:
+  
+  1. **Remote Browser**: Go to [hud-evals/hud-remote-browser](https://github.com/hud-evals/hud-remote-browser) → Fork to your GitHub → [hud.ai](https://hud.ai) → **New** → **Environment** → Import from your repo. Set required browser provider API keys (e.g., `ANCHOR_API_KEY`).
+  
+  2. **Codex Sandbox**: Go to [hud.ai](https://hud.ai) → **New** → **Environment** → Import from [hud-evals/codex_environment_sandbox](https://github.com/hud-evals/codex_environment_sandbox).
+  
+  Once deployed, update the `connect_hub()` calls to use your environment slugs (e.g., `my-org/remote-browser`).
+</Note>
+
+### Running the Example
+
+```bash
+# Default task: research and save to markdown
+uv run python examples/07_multi_agent.py
+
+# Custom research task
+uv run python examples/07_multi_agent.py \
+  --task "Find current prices of Bitcoin and Ethereum and save to crypto.md"
+
+# Verbose mode
+uv run python examples/07_multi_agent.py --verbose
+```
+
+## Building a Multi-Agent System
+
+The pattern is simple:
+1. Create `AgentTool`s that wrap environments + models
+2. Register them on a coordinator `Environment`
+3. Run a "conductor" agent that dispatches work to sub-agents
+
+### Step 1: Create Sub-Agent Environments
+
+Each sub-agent is an `Environment` with its own tools and scenario. Connect to HUD Hub environments or define local tools:
+
+```python
+from hud import Environment
+from hud.tools.agent import AgentTool
+
+
+def create_browser_agent() -> AgentTool:
+    """Create a browser sub-agent for web research."""
+    env = Environment("browser")
+    env.connect_hub("hud-remote-browser-2")
+
+    @env.scenario()
+    async def web_research(
+        task: str,
+        start_url: str | None = None,
+        expected_outcome: str | None = None,  # Eval-only (hidden from conductor)
+    ):
+        """Research information on the web."""
+        prompt = f"""You are a web research agent with browser access.
+
+Research Task: {task}
+"""
+        if start_url:
+            prompt += f"\nStart URL: {start_url}"
+
+        prompt += """
+
+Find relevant information, extract key data, and return structured findings."""
+
+        yield prompt
+        yield 1.0
+
+    return AgentTool(
+        env("web_research"),
+        model="claude-sonnet-4-5",  # Good at browser navigation
+        name="web_research",
+        description="Research information on the web. Use for finding articles, "
+        "scraping data, comparing prices, and extracting structured information.",
+    )
+```
+
+### Step 2: Define the Coding Agent
+
+```python
+def create_coding_agent() -> AgentTool:
+    """Create a coding sub-agent for file operations."""
+    env = Environment("coding")
+    env.connect_hub("codex_environment_sandbox")
+
+    @env.scenario()
+    async def create_markdown(
+        filename: str,
+        content: str,
+        expected_result: str | None = None,  # Eval-only
+    ):
+        """Create a markdown file with the given content."""
+        prompt = f"""You are a file creation assistant.
+
+Task: Create a markdown file named '{filename}' with the following content:
+
+{content}
+
+IMPORTANT: Use the `apply_patch` tool to create the file.
+
+Steps:
+1. Use apply_patch to create '{filename}' with the content above
+2. Confirm it was created successfully
+
+Return a confirmation message."""
+
+        yield prompt
+        yield 1.0
+
+    return AgentTool(
+        env("create_markdown"),
+        model="gpt-5.1",  # Codex-capable for native shell/apply_patch
+        name="create_markdown",
+        description="Create a markdown file with specified content. Use for "
+        "saving research findings, creating reports, and documenting results.",
+    )
+```
+
+### Step 3: Create the Coordinator
+
+Create an `Environment` with sub-agents as tools, then run a conductor agent:
+
+```python
+import hud
+from hud import Environment
+from hud.agents import create_agent
+
+
+async def run_research(task: str):
+    # Create sub-agents as tools
+    browser_agent = create_browser_agent()
+    coding_agent = create_coding_agent()
+
+    # Create coordinator environment with sub-agents as tools
+    coordinator = Environment("coordinator")
+    coordinator.add_tool(browser_agent)
+    coordinator.add_tool(coding_agent)
+
+    # Define the coordination scenario
+    @coordinator.scenario()
+    async def coordinate(prompt: str):
+        yield prompt
+        yield 1.0
+
+    # System prompt for the conductor
+    system_prompt = """You are a research assistant coordinating specialized agents.
+
+Available sub-agents (call as tools):
+- web_research: Find information on the web
+- create_markdown: Create markdown files
+
+CRITICAL: Sub-agents don't share context. When calling create_markdown, 
+you MUST pass the content you want to save.
+
+Workflow:
+1. web_research: Gather data
+2. Format the data into markdown content
+3. create_markdown: Save the formatted content
+"""
+
+    # Run with eval context
+    async with hud.eval(
+        coordinator("coordinate", prompt=task),
+        name="multi-agent-research",
+    ) as ctx:
+        conductor = create_agent("gpt-4o", system_prompt=system_prompt)
+        result = await conductor.run(ctx, max_steps=10)
+
+    print(f"Reward: {ctx.reward}")
+    print(f"Result: {result.content}")
+```
+
+## AgentTool API
+
+`AgentTool` wraps an environment's scenario as a callable tool:
+
+```python
+from hud.tools.agent import AgentTool
+
+tool = AgentTool(
+    env("scenario_name"),      # Task from environment
+    model="claude-sonnet-4-5", # Model for this sub-agent
+    name="tool_name",          # Name shown to conductor
+    description="...",         # Description for conductor
+    agent=None,                # Or provide custom agent class
+    agent_params={},           # Params passed to agent
+    trace=False,               # Enable separate tracing
+)
+```
+
+### Eval-Only Parameters
+
+Parameters with `| None = None` are automatically hidden from the conductor's tool schema:
+
+```python
+@env.scenario()
+async def investigate(
+    query: str,                           # Visible to conductor
+    expected_finding: str | None = None,  # Hidden (eval-only)
+):
+    response = yield f"Investigate: {query}"
+    
+    # Use expected_finding for scoring
+    if expected_finding and response:
+        yield 1.0 if expected_finding.lower() in response.lower() else 0.0
+    else:
+        yield 1.0
+```
+
+This lets you include ground truth for evaluations without exposing it to the conductor.
+
+## Context Isolation
+
+<Warning>
+**Sub-agents don't share context.** Each sub-agent runs in its own isolated environment. The conductor must explicitly pass all necessary data when calling a sub-agent.
+</Warning>
+
+```python
+# ❌ Wrong: Assuming sub-agent knows about previous results
+result = await ctx.call_tool(name="web_research", arguments={"task": "Find stock prices"})
+# The create_markdown agent won't know what web_research found!
+await ctx.call_tool(name="create_markdown", arguments={"filename": "report.md"})
+
+# ✅ Correct: Pass data explicitly
+result = await ctx.call_tool(name="web_research", arguments={"task": "Find stock prices"})
+await ctx.call_tool(name="create_markdown", arguments={
+    "filename": "report.md",
+    "content": result.content  # Pass the data!
+})
+```
+
+Your system prompt should remind the conductor about this:
+
+```python
+system_prompt="""...
+CRITICAL: Sub-agents don't share context. When calling create_markdown, 
+you MUST pass the content you want to save.
+..."""
+```
+
+## Trace Continuity
+
+All sub-agent activity appears in a single trace on the HUD platform. When the conductor calls a sub-agent tool, the inference and tool calls are recorded under the parent trace—no separate URLs to track.
+
+```
+🎭 Coordinator Trace
+├── 🤖 Conductor: "I'll research GOOGL prices first..."
+│   └── 🔧 web_research(task="Find GOOGL price")
+│       ├── 🤖 Browser Agent: "Navigating to finance site..."
+│       │   └── 🔧 navigate(url="https://finance.google.com")
+│       │   └── 🔧 extract_text(selector=".price")
+│       └── ✅ "GOOGL: $185.42"
+├── 🤖 Conductor: "Now I'll save to markdown..."
+│   └── 🔧 create_markdown(filename="googl.md", content="# GOOGL Price\n...")
+│       ├── 🤖 Coding Agent: "Creating file..."
+│       │   └── 🔧 apply_patch(type="create_file", path="googl.md", ...)
+│       └── ✅ "Created googl.md"
+└── ✅ "Research complete!"
+```
+
+## Advanced Patterns
+
+### Custom Conductor Agent
+
+Use a custom agent class for the conductor:
+
+```python
+from hud.agents.claude import ClaudeAgent
+
+# Create and run with a custom agent
+async with hud.eval(coordinator("coordinate", prompt=task)) as ctx:
+    conductor = ClaudeAgent.create(
+        checkpoint_name="claude-sonnet-4-5",
+        system_prompt=system_prompt,
+        max_tokens=8192,
+    )
+    result = await conductor.run(ctx, max_steps=10)
+```
+
+### Multiple Scenarios
+
+Define multiple scenarios on the coordinator:
+
+```python
+@coordinator.scenario()
+async def research(prompt: str):
+    yield prompt
+    yield 1.0
+
+@coordinator.scenario()
+async def summarize(topic: str, length: str = "short"):
+    yield f"Summarize {topic} in a {length} format"
+    yield 1.0
+
+# Use different scenarios
+async with hud.eval(coordinator("research", prompt="Find Python frameworks")) as ctx:
+    ...
+
+async with hud.eval(coordinator("summarize", topic="ML", length="detailed")) as ctx:
+    ...
+```
+
+### Mixing AgentTools with Regular Tools
+
+You can add both AgentTools (sub-agents) and regular tools:
+
+```python
+from hud.tools.base import BaseTool
+
+class CalculatorTool(BaseTool):
+    def __init__(self):
+        super().__init__(name="calculator", description="Add two numbers")
+    
+    async def __call__(self, a: float, b: float) -> str:
+        return str(a + b)
+
+coordinator = Environment("hybrid")
+coordinator.add_tool(browser_agent)      # AgentTool (spawns sub-agent)
+coordinator.add_tool(CalculatorTool())   # Regular tool (runs directly)
+```
+
+## CLI Options
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--task` | Stock research | The task for the coordinator |
+| `--conductor` | `gpt-4o` | Model for the conductor agent |
+| `--max-steps` | `10` | Maximum conductor steps |
+| `--verbose` | Off | Enable verbose output |
+
+## See Also
+
+- [Ops Diagnostics](/cookbooks/ops-diagnostics) - A more complex multi-agent example
+- [AgentTool Reference](/reference/tools#agenttool) - Detailed AgentTool API
+- [Building Environments](/build-environments) - Creating custom environments
+- [Scenarios](/reference/environments#scenarios) - Scenario patterns and best practices
diff --git a/docs/docs.json b/docs/docs.json
index d3f7332c..1ca08a0e 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -64,6 +64,7 @@
                 "group": "Cookbooks",
                 "pages": [
                   "cookbooks/codex-coding",
+                  "cookbooks/multi-agent",
                   "cookbooks/ops-diagnostics"
                 ]
               },
diff --git a/examples/07_multi_agent.py b/examples/07_multi_agent.py
new file mode 100644
index 00000000..4a4a356c
--- /dev/null
+++ b/examples/07_multi_agent.py
@@ -0,0 +1,267 @@
+"""
+Multi-Agent Example - Smart Research Assistant
+
+This example demonstrates how to compose multiple specialized agents
+into a multi-agent system using AgentTools.
+
+The pattern is simple:
+1. Create AgentTools that wrap environments + models
+2. Register them on a coordinator environment
+3. Run a "conductor" agent that dispatches work to sub-agents
+
+The Smart Research Assistant combines:
+- Browser agent: Finds information, scrapes data, navigates websites
+- Coding agent: Creates markdown files with research findings
+
+Uses real HUD Hub environments:
+- codex_environment_sandbox: Coding environment with shell and file editing tools
+- hud-remote-browser-2: Browser automation for web tasks
+
+Usage:
+    export HUD_API_KEY="sk-hud-..."
+    uv run python examples/07_multi_agent.py
+
+    # Custom task
+    uv run python examples/07_multi_agent.py \\
+        --task "Find current prices of Bitcoin and Ethereum and save to crypto.md"
+"""
+
+import argparse
+import asyncio
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import hud
+from hud import Environment
+from hud.agents import create_agent
+from hud.settings import settings
+from hud.tools.agent import AgentTool
+
+
+# =============================================================================
+# Create Sub-Agents from Hub Environments
+# =============================================================================
+
+
+def create_coding_agent() -> AgentTool:
+    """Create a coding sub-agent for markdown file creation."""
+    env = Environment("coding")
+    env.connect_hub("codex_environment_sandbox")
+
+    @env.scenario()
+    async def create_markdown(
+        filename: str,
+        content: str,
+        expected_result: str | None = None,  # Eval-only param (hidden from tool schema)
+    ):
+        """Create a markdown file with the given content."""
+        prompt = f"""You are a file creation assistant with access to a coding environment.
+
+Task: Create a markdown file named '{filename}' with the following content:
+
+{content}
+
+IMPORTANT: Use the `apply_patch` tool to create the file. Do NOT use shell commands like cat or echo.
+
+Steps:
+1. Use apply_patch to create '{filename}' with the content above
+2. Use list_files or read_file to confirm it was created
+
+Return a confirmation message with the filename and location."""
+
+        yield prompt
+        yield 1.0
+
+    return AgentTool(
+        env("create_markdown"),
+        model="gpt-5.1",
+        name="create_markdown",
+        description="Create a markdown file with specified content. Use for: "
+        "saving research findings, creating reports, documenting results.",
+    )
+
+
+def create_browser_agent() -> AgentTool:
+    """Create a browser automation sub-agent for web research."""
+    env = Environment("browser")
+    env.connect_hub("hud-remote-browser-2")
+
+    @env.scenario()
+    async def web_research(
+        task: str,
+        start_url: str | None = None,
+        expected_outcome: str | None = None,  # Eval-only param
+    ):
+        """Research information on the web using browser automation."""
+        prompt = f"""You are a web research agent with access to a browser.
+
+Research Task: {task}
+"""
+        if start_url:
+            prompt += f"\nStart URL: {start_url}"
+
+        prompt += """
+
+Your job is to:
+1. Navigate to relevant websites
+2. Search for information related to the task
+3. Extract key data, facts, and information
+4. Return a clear, structured summary
+
+Include: key findings, data points (prices, numbers, dates), and sources visited."""
+
+        yield prompt
+        yield 1.0
+
+    return AgentTool(
+        env("web_research"),
+        model="claude-sonnet-4-5",
+        name="web_research",
+        description="Research information on the web. Use for finding articles, "
+        "scraping data, comparing prices, and extracting structured information.",
+    )
+
+
+# =============================================================================
+# Multi-Agent Orchestration Pattern
+# =============================================================================
+
+
+async def run_multi_agent(
+    task: str,
+    conductor_model: str = "gpt-4o",
+    max_steps: int = 10,
+    verbose: bool = False,
+) -> None:
+    """
+    Run a multi-agent system with a conductor dispatching to sub-agents.
+
+    This shows the core pattern for multi-agent orchestration:
+    1. Create an Environment for the coordinator
+    2. Add AgentTools as callable tools
+    3. Run a conductor agent that dispatches work
+    """
+
+    if not settings.api_key:
+        raise ValueError(
+            "HUD_API_KEY is required for hub environments.\n"
+            "Get yours at: https://hud.ai/project/api-keys\n"
+            "Then: export HUD_API_KEY='sk-hud-...'"
+        )
+
+    # Create sub-agents as tools
+    coding_agent = create_coding_agent()
+    browser_agent = create_browser_agent()
+
+    # Create coordinator environment with sub-agents as tools
+    coordinator = Environment("coordinator")
+    coordinator.add_tool(browser_agent)
+    coordinator.add_tool(coding_agent)
+
+    # Define the coordination scenario
+    @coordinator.scenario()
+    async def coordinate(prompt: str):
+        yield prompt
+        yield 1.0
+
+    # System prompt for the conductor
+    system_prompt = """You are a Smart Research Assistant coordinating specialized agents.
+
+Available sub-agents (call as tools):
+- web_research: Find information, scrape data, compare prices
+- create_markdown: Create markdown files with specified content
+
+CRITICAL: Sub-agents don't share context. When calling create_markdown,
+you MUST pass the full content you want to save.
+
+Workflow:
+1. Use web_research to gather data (prices, facts, numbers)
+2. Format the data into markdown content
+3. Use create_markdown to save the formatted content
+4. Iterate if needed"""
+
+    print("🎭 Smart Research Assistant")
+    print(f"🤖 Conductor: {conductor_model}")
+    print(f"🔧 Sub-agents: {browser_agent.name}, {coding_agent.name}")
+    print(f"📋 Task: {task}")
+    print("=" * 70)
+
+    # Run with eval context
+    async with hud.eval(
+        coordinator("coordinate", prompt=task),
+        name="multi-agent-research",
+    ) as ctx:
+        # Create conductor agent and run
+        conductor = create_agent(
+            conductor_model,
+            system_prompt=system_prompt,
+            verbose=verbose,
+        )
+        result = await conductor.run(ctx, max_steps=max_steps)
+
+    print("=" * 70)
+    print("✅ Research Complete!")
+    print(f"📊 Reward: {ctx.reward}")
+    if result.content:
+        print(f"\n📝 Summary:\n{result.content}")
+
+
+# =============================================================================
+# CLI
+# =============================================================================
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Multi-agent research assistant",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  uv run python examples/07_multi_agent.py \\
+    --task "Research AAPL stock price and save to stock_prices.md"
+
+  uv run python examples/07_multi_agent.py \\
+    --task "Find 3 laptops under $2000 and save specs to laptops.md"
+""",
+    )
+    parser.add_argument(
+        "--task",
+        type=str,
+        default="Research current price of GOOGL and save to googl_price.md",
+        help="Research task to complete",
+    )
+    parser.add_argument(
+        "--conductor",
+        type=str,
+        default="gpt-4o",
+        help="Model for the conductor agent (default: gpt-4o)",
+    )
+    parser.add_argument(
+        "--max-steps",
+        type=int,
+        default=10,
+        help="Maximum steps for conductor (default: 10)",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable verbose output",
+    )
+    return parser.parse_args()
+
+
+async def main() -> None:
+    args = _parse_args()
+
+    await run_multi_agent(
+        task=args.task,
+        conductor_model=args.conductor,
+        max_steps=args.max_steps,
+        verbose=args.verbose,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/hud/agents/resolver.py b/hud/agents/resolver.py
index 80351800..fb1b1bf3 100644
--- a/hud/agents/resolver.py
+++ b/hud/agents/resolver.py
@@ -15,6 +15,12 @@
 _PROVIDER_TO_AGENT = {"anthropic": "claude"}
 
 
+def _is_codex_capable_model(model_id: str) -> bool:
+    """Check if model is Codex-capable (needs OpenAIAgent for native tools)."""
+    m = (model_id or "").lower()
+    return m in {"gpt-5.1", "gpt-5.1-codex"} or "codex" in m
+
+
 def _fetch_gateway_models() -> list[dict[str, Any]]:
     """Fetch available models from HUD gateway (cached)."""
     global _models_cache
@@ -59,7 +65,12 @@ def resolve_cls(model: str) -> tuple[type[MCPAgent], dict[str, Any] | None]:
 
     # Gateway lookup
     for m in _fetch_gateway_models():
-        if model in (m.get("id"), m.get("name"), m.get("model")):
+        candidate_ids = (m.get("id"), m.get("name"), m.get("model"))
+        if model in candidate_ids:
+            # Only check if the matched model string is codex-capable
+            if _is_codex_capable_model(model):
+                return AgentType.OPENAI.cls, m
+
             provider = (m.get("provider") or "openai_compatible").lower()
             agent_str = _PROVIDER_TO_AGENT.get(provider, provider)
             try:
diff --git a/hud/agents/tests/test_resolver.py b/hud/agents/tests/test_resolver.py
index 04e6f51e..c826805b 100644
--- a/hud/agents/tests/test_resolver.py
+++ b/hud/agents/tests/test_resolver.py
@@ -57,6 +57,44 @@ def test_resolves_gateway_model(self) -> None:
             assert info is not None
             assert info["id"] == "gpt-4o"
 
+    def test_resolves_codex_model_to_openai_agent_even_if_provider_is_openai_compatible(
+        self,
+    ) -> None:
+        """Codex-capable models use OpenAIAgent for native tools."""
+        from hud.agents import OpenAIAgent
+
+        mock_models = [
+            {"id": "gpt-5.1-codex", "model": "gpt-5.1-codex", "provider": "openai_compatible"},
+        ]
+
+        with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
+            cls, info = resolve_cls("gpt-5.1-codex")
+            assert cls == OpenAIAgent
+            assert info is not None
+            assert info["id"] == "gpt-5.1-codex"
+
+    def test_does_not_misroute_claude_when_alias_is_codex_capable(self) -> None:
+        """Only the matched ID should be checked for codex capability, not aliases."""
+        from hud.agents.claude import ClaudeAgent
+
+        # Contrived example: a model entry where one alias is codex-capable
+        # but the requested ID is not
+        mock_models = [
+            {
+                "id": "claude-via-gateway",
+                "name": "gpt-5.1-codex",  # Alias happens to be codex-capable
+                "model": "claude-3-sonnet",
+                "provider": "anthropic",
+            },
+        ]
+
+        with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
+            # Request by the non-codex ID - should route to Claude, not OpenAI
+            cls, info = resolve_cls("claude-via-gateway")
+            assert cls == ClaudeAgent
+            assert info is not None
+            assert info["id"] == "claude-via-gateway"
+
     def test_resolves_anthropic_provider_to_claude(self) -> None:
         """Provider 'anthropic' maps to ClaudeAgent."""
         from hud.agents.claude import ClaudeAgent