diff --git a/go.mod b/go.mod index 640db9a..25fb356 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.26.2 require ( github.com/coder/websocket v1.8.14 github.com/creack/pty v1.1.24 - github.com/gsd-build/protocol-go v0.32.0 + github.com/gsd-build/protocol-go v0.33.0 github.com/spf13/cobra v1.10.2 gopkg.in/natefinch/lumberjack.v2 v2.2.1 ) diff --git a/go.sum b/go.sum index 817f9a6..ac197e2 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,8 @@ github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6p github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= -github.com/gsd-build/protocol-go v0.32.0 h1:4Vk/8GFH8s539xx01EFENO7snhJkndvnp9OxiANoCSI= -github.com/gsd-build/protocol-go v0.32.0/go.mod h1:vECSwMFp59Ihu5ZH4aLF5fuW9zJ4a3ZXCYngmzfBn8s= +github.com/gsd-build/protocol-go v0.33.0 h1:/UBKhB5bcW7QVvGNDH0h7KZIaVVqvE9/OtYi0uH4RrI= +github.com/gsd-build/protocol-go v0.33.0/go.mod h1:vECSwMFp59Ihu5ZH4aLF5fuW9zJ4a3ZXCYngmzfBn8s= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= diff --git a/internal/browser/manager.go b/internal/browser/manager.go index 1f8d144..6748527 100644 --- a/internal/browser/manager.go +++ b/internal/browser/manager.go @@ -187,6 +187,7 @@ func (m *Manager) sendFrame(ctx context.Context, browserID string) { DevicePixelRatio: frame.DevicePixelRatio, CapturedAt: frame.CapturedAt, }) + m.sendRefs(ctx, browserID) m.mu.Lock() if current := m.byID[browserID]; current == state && frame.Sequence > current.lastFrameSeq { current.lastFrameSeq = frame.Sequence @@ -205,6 +206,52 @@ func (m *Manager) sendFrame(ctx context.Context, browserID string) { } } +func (m *Manager) sendRefs(ctx context.Context, browserID string) { + m.mu.Lock() + state, ok := m.byID[browserID] + if !ok { + m.mu.Unlock() + return + } + req := state.openRequest + m.mu.Unlock() + + refsCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + refs, err := m.service.Refs(refsCtx, browserID) + if err != nil { + return + } + + out := make([]protocol.BrowserRef, 0, len(refs.Refs)) + for _, ref := range refs.Refs { + out = append(out, protocol.BrowserRef{ + Ref: ref.Ref, + Key: ref.Key, + Role: ref.Role, + Name: ref.Name, + X: ref.X, + Y: ref.Y, + W: ref.W, + H: ref.H, + }) + } + + capturedAt := refs.CapturedAt + if capturedAt == "" { + capturedAt = time.Now().UTC().Format(time.RFC3339Nano) + } + _ = m.sender.Send(ctx, &protocol.BrowserRefs{ + Type: protocol.MsgTypeBrowserRefs, + BrowserID: browserID, + SessionID: req.SessionID, + ChannelID: req.ChannelID, + Version: refs.Version, + Refs: out, + CapturedAt: capturedAt, + }) +} + func (m *Manager) GrantForTask(taskID string) (Grant, bool) { if taskID == "" { return Grant{}, false @@ -430,6 +477,55 @@ func (m *Manager) Tool(ctx context.Context, msg *protocol.BrowserToolCall) error m.mu.Unlock() return fmt.Errorf("browser control belongs to %s", state.owner) } + req := state.openRequest + risk := classifyBrowserTool(msg.Method, msg.ParamsJSON) + if msg.Method == "vault_save" { + m.mu.Unlock() + if err := m.sender.Send(ctx, &protocol.BrowserToolResult{ + Type: protocol.MsgTypeBrowserToolResult, + BrowserID: msg.BrowserID, + GrantID: msg.GrantID, + TaskID: msg.TaskID, + ToolUseID: msg.ToolUseID, + OK: false, + Error: "agent-initiated vault_save is not allowed", + }); err != nil { + return fmt.Errorf("send browser vault_save rejection: %w", err) + } + return fmt.Errorf("agent-initiated vault_save is not allowed") + } + if browserRiskRequiresApproval(risk) { + previousOwner := state.owner + previousVersion := state.controlVersion + state.owner = OwnerApproval + state.controlVersion++ + nextVersion := state.controlVersion + m.mu.Unlock() + requestID := fmt.Sprintf("browser_sensitive_%d", time.Now().UnixNano()) + if err := m.sender.Send(ctx, &protocol.BrowserSensitiveActionRequest{ + Type: protocol.MsgTypeBrowserSensitiveActionRequest, + BrowserID: msg.BrowserID, + RequestID: requestID, + SessionID: req.SessionID, + ChannelID: req.ChannelID, + TaskID: msg.TaskID, + ToolUseID: msg.ToolUseID, + Category: string(risk), + Summary: browserApprovalSummary(msg.Method, risk), + ExpiresAt: time.Now().Add(2 * time.Minute).UTC().Format(time.RFC3339Nano), + }); err != nil { + m.mu.Lock() + if current := m.byID[msg.BrowserID]; current == state && + current.owner == OwnerApproval && + current.controlVersion == nextVersion { + current.owner = previousOwner + current.controlVersion = previousVersion + } + m.mu.Unlock() + return fmt.Errorf("send browser sensitive action request: %w", err) + } + return fmt.Errorf("browser action requires approval: %s", risk) + } m.mu.Unlock() result, err := m.service.Tool(ctx, msg.BrowserID, msg.Method, msg.ParamsJSON) if err != nil { @@ -446,3 +542,7 @@ func (m *Manager) Tool(ctx context.Context, msg *protocol.BrowserToolCall) error Error: result.Error, }) } + +func browserApprovalSummary(method string, risk BrowserRisk) string { + return fmt.Sprintf("Run browser method %s (%s)", method, risk) +} diff --git a/internal/browser/manager_test.go b/internal/browser/manager_test.go index e1c2ce8..51a5505 100644 --- a/internal/browser/manager_test.go +++ b/internal/browser/manager_test.go @@ -2,6 +2,7 @@ package browser import ( "context" + "encoding/json" "errors" "strings" "sync" @@ -12,8 +13,10 @@ import ( ) type fakeService struct { - mu sync.Mutex - calls []string + mu sync.Mutex + calls []string + refs []Ref + toolCalls int } func (f *fakeService) Open(ctx context.Context, req OpenRequest) (OpenResult, error) { @@ -47,13 +50,51 @@ func (f *fakeService) Frame(ctx context.Context, browserID string) (Frame, error }, nil } +func (f *fakeService) Refs(ctx context.Context, browserID string) (Refs, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.calls = append(f.calls, "refs:"+browserID) + return Refs{ + Version: 1, + Refs: append([]Ref(nil), f.refs...), + CapturedAt: time.Now().UTC().Format(time.RFC3339Nano), + }, nil +} + func (f *fakeService) Tool(ctx context.Context, browserID string, method string, params []byte) (ToolResult, error) { f.mu.Lock() defer f.mu.Unlock() f.calls = append(f.calls, "tool:"+method) + f.toolCalls++ return ToolResult{OK: true, ResultJSON: []byte(`{"ok":true}`)}, nil } +func (r *recordingSender) hasType(messageType protocol.MessageType) bool { + r.mu.Lock() + defer r.mu.Unlock() + for _, msg := range r.msgs { + switch messageType { + case protocol.MsgTypeBrowserRefs: + if _, ok := msg.(*protocol.BrowserRefs); ok { + return true + } + case protocol.MsgTypeBrowserFrame: + if _, ok := msg.(*protocol.BrowserFrame); ok { + return true + } + case protocol.MsgTypeBrowserToolResult: + if _, ok := msg.(*protocol.BrowserToolResult); ok { + return true + } + case protocol.MsgTypeBrowserSensitiveActionRequest: + if _, ok := msg.(*protocol.BrowserSensitiveActionRequest); ok { + return true + } + } + } + return false +} + func (f *fakeService) UserInput(ctx context.Context, browserID string, input *protocol.BrowserUserInput) error { f.mu.Lock() defer f.mu.Unlock() @@ -110,6 +151,187 @@ func TestBrowserUserInputParamsPreservesZeroRenderedOrigin(t *testing.T) { } } +func TestBrowserUserInputToolRoutesSidebarCommands(t *testing.T) { + method, params, ok := browserUserInputTool(&protocol.BrowserUserInput{ + Type: protocol.MsgTypeBrowserUserInput, + Kind: protocol.BrowserInputKindNavigate, + Text: "https://example.com", + }) + if !ok { + t.Fatal("expected navigate input to route through browser tool") + } + if method != "navigate" { + t.Fatalf("method = %q, want navigate", method) + } + var payload map[string]string + if err := json.Unmarshal(params, &payload); err != nil { + t.Fatalf("unmarshal params: %v", err) + } + if payload["url"] != "https://example.com" { + t.Fatalf("url = %q", payload["url"]) + } + + method, params, ok = browserUserInputTool(&protocol.BrowserUserInput{ + Type: protocol.MsgTypeBrowserUserInput, + Kind: protocol.BrowserInputKindRefAction, + Text: "@v1:button-primary", + }) + if !ok { + t.Fatal("expected ref action input to route through browser tool") + } + if method != "click_ref" { + t.Fatalf("method = %q, want click_ref", method) + } + if err := json.Unmarshal(params, &payload); err != nil { + t.Fatalf("unmarshal ref params: %v", err) + } + if payload["ref"] != "@v1:button-primary" { + t.Fatalf("ref = %q", payload["ref"]) + } +} + +func openBrowserForTest(t *testing.T, m *Manager, browserID string) { + t.Helper() + if err := m.Open(context.Background(), &protocol.BrowserSessionOpen{ + Type: protocol.MsgTypeBrowserSessionOpen, + RequestID: "req_1", + GrantID: "grant_1", + SessionID: "session_1", + ProjectID: "project_1", + TaskID: "task_1", + ChannelID: "channel_1", + MachineID: "machine_1", + Mode: "clean", + ExpiresAt: time.Now().Add(time.Hour).Format(time.RFC3339Nano), + }); err != nil { + t.Fatalf("open browser: %v", err) + } + if browserID != "" { + m.mu.Lock() + defer m.mu.Unlock() + if _, ok := m.byID[browserID]; !ok { + t.Fatalf("browser %s not opened", browserID) + } + } +} + +func TestManagerForwardsBrowserRefs(t *testing.T) { + service := &fakeService{ + refs: []Ref{{ + Ref: "@v1:e1", + Key: "e1", + Role: "button", + Name: "Submit", + X: 10, + Y: 20, + W: 80, + H: 32, + }}, + } + sender := &recordingSender{} + m := NewManager(ManagerOptions{ + Service: service, + Sender: sender, + FrameInterval: time.Hour, + }) + + err := m.Open(context.Background(), &protocol.BrowserSessionOpen{ + Type: protocol.MsgTypeBrowserSessionOpen, + RequestID: "req_1", + GrantID: "grant_1", + SessionID: "session_1", + ChannelID: "channel_1", + ExpiresAt: time.Now().Add(time.Hour).Format(time.RFC3339Nano), + }) + if err != nil { + t.Fatalf("open browser: %v", err) + } + + m.sendRefs(context.Background(), "browser_1") + + if !sender.hasType(protocol.MsgTypeBrowserRefs) { + t.Fatalf("expected browserRefs message, got %#v", sender.snapshot()) + } +} + +func TestManagerBlocksSensitiveToolUntilApproval(t *testing.T) { + service := &fakeService{} + sender := &recordingSender{} + m := NewManager(ManagerOptions{Service: service, Sender: sender, FrameInterval: time.Hour}) + openBrowserForTest(t, m, "browser_1") + + err := m.Tool(context.Background(), &protocol.BrowserToolCall{ + Type: protocol.MsgTypeBrowserToolCall, + BrowserID: "browser_1", + GrantID: "grant_1", + TaskID: "task_1", + ToolUseID: "tool_1", + Method: "vault_login", + }) + + if err == nil { + t.Fatal("expected sensitive tool to wait for approval") + } + service.mu.Lock() + toolCalls := service.toolCalls + service.mu.Unlock() + if toolCalls != 0 { + t.Fatalf("sensitive tool executed before approval") + } + if !sender.hasType(protocol.MsgTypeBrowserSensitiveActionRequest) { + t.Fatalf("expected sensitive action request, got %#v", sender.snapshot()) + } +} + +func TestManagerRollsBackApprovalOwnerWhenRequestSendFails(t *testing.T) { + service := &fakeService{} + m := NewManager(ManagerOptions{Service: service, Sender: &recordingSender{}, FrameInterval: time.Hour}) + openBrowserForTest(t, m, "browser_1") + m.sender = failingSender{} + + err := m.Tool(context.Background(), &protocol.BrowserToolCall{ + Type: protocol.MsgTypeBrowserToolCall, + BrowserID: "browser_1", + GrantID: "grant_1", + TaskID: "task_1", + ToolUseID: "tool_1", + Method: "vault_login", + }) + + if err == nil { + t.Fatal("expected send failure") + } + m.mu.Lock() + owner := m.byID["browser_1"].owner + version := m.byID["browser_1"].controlVersion + m.mu.Unlock() + if owner != OwnerAgent { + t.Fatalf("owner = %s, want %s", owner, OwnerAgent) + } + if version != 0 { + t.Fatalf("controlVersion = %d, want 0", version) + } +} + +func TestClassifyBrowserBatchUsesHighestRiskNestedMethod(t *testing.T) { + category := classifyBrowserTool("batch", json.RawMessage(`{"steps":[{"action":"mock_route"}]}`)) + if category != BrowserRiskNetworkMutation { + t.Fatalf("category = %s, want %s", category, BrowserRiskNetworkMutation) + } +} + +func TestClassifyBrowserBatchFailsClosed(t *testing.T) { + for _, params := range []json.RawMessage{ + json.RawMessage(`not-json`), + json.RawMessage(`{"steps":[{}]}`), + } { + category := classifyBrowserTool("batch", params) + if category != BrowserRiskExternalEffect { + t.Fatalf("category = %s, want %s for %s", category, BrowserRiskExternalEffect, params) + } + } +} + func TestManagerPausesToolCallsWhileLexControlsBrowser(t *testing.T) { svc := &fakeService{} sent := &recordingSender{} diff --git a/internal/browser/safety.go b/internal/browser/safety.go new file mode 100644 index 0000000..94043a5 --- /dev/null +++ b/internal/browser/safety.go @@ -0,0 +1,100 @@ +package browser + +import "encoding/json" + +type BrowserRisk string + +const ( + BrowserRiskInspection BrowserRisk = "inspection" + BrowserRiskInteraction BrowserRisk = "interaction" + BrowserRiskExternalEffect BrowserRisk = "external_effect" + BrowserRiskNetworkMutation BrowserRisk = "network_mutation" + BrowserRiskCredentialAuth BrowserRisk = "credential_auth" + BrowserRiskArtifactGeneration BrowserRisk = "artifact_generation" +) + +func classifyBrowserTool(method string, params json.RawMessage) BrowserRisk { + switch method { + case "click", "type", "press", "hover", "scroll", "select_option", "set_checked", "drag", "set_viewport", "click_ref", "hover_ref", "fill_ref", "emulate_device": + return BrowserRiskInteraction + case "eval", "fill_form", "act": + return BrowserRiskExternalEffect + case "mock_route", "block_urls", "clear_routes": + return BrowserRiskNetworkMutation + case "save_state", "restore_state", "vault_save", "vault_login", "vault_list": + return BrowserRiskCredentialAuth + case "upload_file", "debug_bundle", "screenshot", "zoom_region", "save_pdf", "visual_diff", "generate_test", "har_export", "trace_start", "trace_stop": + return BrowserRiskArtifactGeneration + case "batch": + return classifyBrowserBatch(params) + default: + return BrowserRiskInspection + } +} + +func classifyBrowserBatch(params json.RawMessage) BrowserRisk { + if len(params) == 0 { + return BrowserRiskInspection + } + var payload struct { + Steps []map[string]json.RawMessage `json:"steps"` + } + if err := json.Unmarshal(params, &payload); err != nil { + return BrowserRiskExternalEffect + } + risk := BrowserRiskInspection + for _, step := range payload.Steps { + method := stringValue(step["method"]) + if method == "" { + method = stringValue(step["action"]) + } + nested := BrowserRiskExternalEffect + if method != "" { + nested = classifyBrowserTool(method, stepPayload(step)) + } + if riskRank(nested) > riskRank(risk) { + risk = nested + } + } + return risk +} + +func stringValue(raw json.RawMessage) string { + var value string + _ = json.Unmarshal(raw, &value) + return value +} + +func stepPayload(step map[string]json.RawMessage) json.RawMessage { + if raw := step["params"]; len(raw) > 0 { + return raw + } + data, _ := json.Marshal(step) + return data +} + +func browserRiskRequiresApproval(risk BrowserRisk) bool { + switch risk { + case BrowserRiskExternalEffect, BrowserRiskNetworkMutation, BrowserRiskCredentialAuth: + return true + default: + return false + } +} + +func riskRank(risk BrowserRisk) int { + switch risk { + case BrowserRiskCredentialAuth: + return 5 + case BrowserRiskNetworkMutation: + return 4 + case BrowserRiskExternalEffect: + return 3 + case BrowserRiskArtifactGeneration: + return 2 + case BrowserRiskInteraction: + return 1 + default: + return 0 + } +} diff --git a/internal/browser/service.go b/internal/browser/service.go index b174a83..fcba3d4 100644 --- a/internal/browser/service.go +++ b/internal/browser/service.go @@ -18,6 +18,7 @@ type Service interface { Open(context.Context, OpenRequest) (OpenResult, error) Close(context.Context, string) error Frame(context.Context, string) (Frame, error) + Refs(context.Context, string) (Refs, error) Tool(context.Context, string, string, []byte) (ToolResult, error) UserInput(context.Context, string, *protocol.BrowserUserInput) error } @@ -117,10 +118,59 @@ func (s LocalService) Frame(ctx context.Context, browserID string) (Frame, error }, nil } +func (s LocalService) Refs(ctx context.Context, browserID string) (Refs, error) { + var out struct { + Version int `json:"version"` + Refs []Ref `json:"refs"` + CapturedAtMs int64 `json:"capturedAtMs"` + CapturedAt string `json:"capturedAt"` + } + if err := s.rpc(ctx, browserID, "cloud_refs", map[string]any{}, &out); err != nil { + return Refs{}, err + } + capturedAt := out.CapturedAt + if capturedAt == "" && out.CapturedAtMs != 0 { + capturedAt = time.UnixMilli(out.CapturedAtMs).UTC().Format(time.RFC3339Nano) + } + return Refs{Version: out.Version, Refs: out.Refs, CapturedAt: capturedAt}, nil +} + func (s LocalService) UserInput(ctx context.Context, browserID string, input *protocol.BrowserUserInput) error { + if method, params, ok := browserUserInputTool(input); ok { + _, err := s.Tool(ctx, browserID, method, params) + return err + } return s.rpc(ctx, browserID, "cloud_user_input", browserUserInputParams(input), nil) } +func browserUserInputTool(input *protocol.BrowserUserInput) (string, []byte, bool) { + switch input.Kind { + case protocol.BrowserInputKindNavigate: + params, err := json.Marshal(map[string]any{"url": input.Text}) + return "navigate", params, err == nil + case protocol.BrowserInputKindBack: + return "back", []byte(`{}`), true + case protocol.BrowserInputKindForward: + return "forward", []byte(`{}`), true + case protocol.BrowserInputKindReload: + return "reload", []byte(`{}`), true + case protocol.BrowserInputKindRefAction: + params, err := json.Marshal(map[string]any{"ref": input.Text}) + return "click_ref", params, err == nil + case protocol.BrowserInputKindSetViewport: + params, err := json.Marshal(map[string]any{ + "width": input.ViewportWidth, + "height": input.ViewportHeight, + }) + return "set_viewport", params, err == nil + case protocol.BrowserInputKindEmulateDevice: + params, err := json.Marshal(map[string]any{"device": input.Text}) + return "emulate_device", params, err == nil + default: + return "", nil, false + } +} + func browserUserInputParams(input *protocol.BrowserUserInput) map[string]any { params := map[string]any{ "kind": input.Kind, diff --git a/internal/browser/types.go b/internal/browser/types.go index d2eeb3b..87062a8 100644 --- a/internal/browser/types.go +++ b/internal/browser/types.go @@ -46,6 +46,23 @@ type Frame struct { Title string } +type Refs struct { + Version int + Refs []Ref + CapturedAt string +} + +type Ref struct { + Ref string `json:"ref"` + Key string `json:"key"` + Role string `json:"role"` + Name string `json:"name,omitempty"` + X float64 `json:"x"` + Y float64 `json:"y"` + W float64 `json:"w"` + H float64 `json:"h"` +} + type ToolResult struct { OK bool ResultJSON json.RawMessage diff --git a/internal/loop/daemon_test.go b/internal/loop/daemon_test.go index d12cf77..c928fd2 100644 --- a/internal/loop/daemon_test.go +++ b/internal/loop/daemon_test.go @@ -942,6 +942,14 @@ func (loopBrowserService) Frame(ctx context.Context, browserID string) (browser. }, nil } +func (loopBrowserService) Refs(ctx context.Context, browserID string) (browser.Refs, error) { + return browser.Refs{ + Version: 1, + Refs: nil, + CapturedAt: time.Now().UTC().Format(time.RFC3339Nano), + }, nil +} + func (loopBrowserService) Tool(ctx context.Context, browserID string, method string, params []byte) (browser.ToolResult, error) { return browser.ToolResult{OK: true, ResultJSON: []byte(`{"ok":true}`)}, nil } diff --git a/internal/pi/extension/browser-methods.ts b/internal/pi/extension/browser-methods.ts new file mode 100644 index 0000000..bf40f3d --- /dev/null +++ b/internal/pi/extension/browser-methods.ts @@ -0,0 +1,91 @@ +import { Type } from "@sinclair/typebox"; + +export const BROWSER_TOOL_CATEGORIES = [ + "navigation", + "interaction", + "artifact_generation", + "inspection", + "external_effect", + "network_mutation", + "credential_auth", + "composite", +] as const; + +export type BrowserToolCategory = (typeof BROWSER_TOOL_CATEGORIES)[number]; + +export const BROWSER_METHOD_CATEGORY = { + navigate: "navigation", + back: "navigation", + forward: "navigation", + reload: "navigation", + list_pages: "navigation", + switch_page: "navigation", + close_page: "navigation", + list_frames: "navigation", + select_frame: "navigation", + click: "interaction", + type: "interaction", + press: "interaction", + hover: "interaction", + scroll: "interaction", + select_option: "interaction", + set_checked: "interaction", + drag: "interaction", + set_viewport: "interaction", + click_ref: "interaction", + hover_ref: "interaction", + fill_ref: "interaction", + emulate_device: "interaction", + upload_file: "artifact_generation", + debug_bundle: "artifact_generation", + screenshot: "artifact_generation", + zoom_region: "artifact_generation", + save_pdf: "artifact_generation", + visual_diff: "artifact_generation", + generate_test: "artifact_generation", + har_export: "artifact_generation", + trace_start: "artifact_generation", + trace_stop: "artifact_generation", + snapshot: "inspection", + get_ref: "inspection", + accessibility_tree: "inspection", + find: "inspection", + page_source: "inspection", + assert: "inspection", + diff: "inspection", + wait_for: "inspection", + analyze_form: "inspection", + find_best: "inspection", + console: "inspection", + network: "inspection", + dialog: "inspection", + timeline: "inspection", + session_summary: "inspection", + extract: "inspection", + action_cache: "inspection", + check_injection: "inspection", + eval: "external_effect", + fill_form: "external_effect", + act: "external_effect", + mock_route: "network_mutation", + block_urls: "network_mutation", + clear_routes: "network_mutation", + save_state: "credential_auth", + restore_state: "credential_auth", + vault_save: "credential_auth", + vault_login: "credential_auth", + vault_list: "credential_auth", + batch: "composite", +} as const satisfies Record; + +export const BROWSER_TOOL_METHODS = Object.keys(BROWSER_METHOD_CATEGORY) as BrowserToolMethod[]; + +export type BrowserToolMethod = keyof typeof BROWSER_METHOD_CATEGORY; + +export const BrowserToolMethodSchema = Type.Union( + BROWSER_TOOL_METHODS.map((method) => Type.Literal(method)) as any, +); + +export const BrowserToolCategorySchema = Type.Union( + BROWSER_TOOL_CATEGORIES.map((category) => Type.Literal(category)) as any, +); diff --git a/internal/pi/extension/browser-tool.test.mjs b/internal/pi/extension/browser-tool.test.mjs index 158e90e..32df071 100644 --- a/internal/pi/extension/browser-tool.test.mjs +++ b/internal/pi/extension/browser-tool.test.mjs @@ -1,6 +1,11 @@ import assert from "node:assert/strict"; import { describe, it } from "node:test"; import { buildClaudeCliBrowserTools, mergeClaudeCliTools } from "./index.ts"; +import { + BROWSER_METHOD_CATEGORY, + BROWSER_TOOL_CATEGORIES, + BROWSER_TOOL_METHODS, +} from "./browser-methods.ts"; const browserGrant = { grantId: "grant_1", @@ -23,10 +28,83 @@ describe("browser tool registration", () => { const [tool] = buildClaudeCliBrowserTools({ browserGrant }); assert.ok(tool); assert.deepEqual(tool.input_schema.properties.method.enum.includes("navigate"), true); + assert.deepEqual(tool.input_schema.properties.method.enum.includes("visual_diff"), true); + assert.deepEqual(tool.input_schema.properties.method.enum.includes("vault_login"), true); assert.deepEqual(tool.input_schema.properties.method.enum.includes("browser.navigate"), false); + assert.deepEqual(tool.input_schema.properties.category.enum, BROWSER_TOOL_CATEGORIES); assert.match(tool.description, /do not prefix/i); }); + it("keeps browser method registry and categories explicit", () => { + assert.deepEqual(BROWSER_TOOL_METHODS, [ + "navigate", + "back", + "forward", + "reload", + "list_pages", + "switch_page", + "close_page", + "list_frames", + "select_frame", + "click", + "type", + "press", + "hover", + "scroll", + "select_option", + "set_checked", + "drag", + "set_viewport", + "click_ref", + "hover_ref", + "fill_ref", + "emulate_device", + "upload_file", + "debug_bundle", + "screenshot", + "zoom_region", + "save_pdf", + "visual_diff", + "generate_test", + "har_export", + "trace_start", + "trace_stop", + "snapshot", + "get_ref", + "accessibility_tree", + "find", + "page_source", + "assert", + "diff", + "wait_for", + "analyze_form", + "find_best", + "console", + "network", + "dialog", + "timeline", + "session_summary", + "extract", + "action_cache", + "check_injection", + "eval", + "fill_form", + "act", + "mock_route", + "block_urls", + "clear_routes", + "save_state", + "restore_state", + "vault_save", + "vault_login", + "vault_list", + "batch", + ]); + assert.equal(BROWSER_METHOD_CATEGORY.eval, "external_effect"); + assert.equal(BROWSER_METHOD_CATEGORY.batch, "composite"); + assert.equal(BROWSER_METHOD_CATEGORY.vault_login, "credential_auth"); + }); + it("adds gsd_browser when pi context does not include it", () => { const tools = mergeClaudeCliTools([{ name: "ask_human", description: "Ask", parameters: {} }], browserGrant); assert.equal(tools.filter((tool) => tool.name === "gsd_browser").length, 1); diff --git a/internal/pi/extension/index.ts b/internal/pi/extension/index.ts index 730568b..32a4a76 100644 --- a/internal/pi/extension/index.ts +++ b/internal/pi/extension/index.ts @@ -44,6 +44,12 @@ import { registerCodexAppServerProvider } from "./codex-appserver-provider.js"; import { registerOpenRouterProvider } from "./openrouter-provider.js"; import { WarmClaudeSdkWorker } from "./claude-sdk-worker.js"; import { registerSubagentTool } from "./subagent.js"; +import { + BROWSER_TOOL_CATEGORIES, + BROWSER_TOOL_METHODS, + BrowserToolCategorySchema, + BrowserToolMethodSchema, +} from "./browser-methods.js"; import { filterToolsByPolicy, hasSubagentToolPolicy, @@ -105,34 +111,9 @@ function installClaudeSdkPipeGuard() { installClaudeSdkPipeGuard(); -const BROWSER_TOOL_METHODS = [ - "navigate", - "back", - "forward", - "reload", - "click", - "type", - "press", - "hover", - "scroll", - "snapshot", - "get_ref", - "click_ref", - "hover_ref", - "fill_ref", - "wait_for", - "extract", - "assert", - "screenshot", - "console", - "network", - "dialog", -] as const; - -const BrowserToolMethod = Type.Union(BROWSER_TOOL_METHODS.map((method) => Type.Literal(method)) as any); - const BrowserToolParams = Type.Object({ - method: BrowserToolMethod, + method: BrowserToolMethodSchema, + category: Type.Optional(BrowserToolCategorySchema), params: Type.Optional(Type.Record(Type.String(), Type.Any())), }); @@ -141,7 +122,7 @@ function browserToolDefinition() { name: "gsd_browser", label: "GSD Browser", description: - "Use the active task-scoped GSD shared browser session. Pass bare method names such as navigate, snapshot, console, or network; do not prefix methods with browser.", + "Use the active task-scoped GSD shared browser session for page navigation, inspection, ref-based interaction, screenshots, network controls, auth state, traces, and artifacts. Prefer snapshot with refs before interacting. Pass bare method names such as navigate, snapshot, click_ref, visual_diff, or vault_login; do not prefix methods with browser.", parameters: BrowserToolParams, input_schema: { type: "object", @@ -153,6 +134,12 @@ function browserToolDefinition() { description: "Bare browser operation name. Use navigate, not browser.navigate.", }, + category: { + type: "string", + enum: BROWSER_TOOL_CATEGORIES, + description: + "Optional method classification for UI and policy. The daemon executes method and params.", + }, params: { type: "object", additionalProperties: true }, }, required: ["method"],