diff --git a/.claude/plugins/freshell-orchestration/.claude-plugin/plugin.json b/.claude/plugins/freshell-orchestration/.claude-plugin/plugin.json deleted file mode 100644 index 71e3aac0a..000000000 --- a/.claude/plugins/freshell-orchestration/.claude-plugin/plugin.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "name": "freshell-orchestration", - "version": "1.0.0", - "description": "Session-scoped Freshell orchestration skill plugin for pane/tab/panel automation." -} diff --git a/.claude/plugins/freshell-orchestration/skills/freshell-orchestration b/.claude/plugins/freshell-orchestration/skills/freshell-orchestration deleted file mode 120000 index f46b670ab..000000000 --- a/.claude/plugins/freshell-orchestration/skills/freshell-orchestration +++ /dev/null @@ -1 +0,0 @@ -../../../skills/freshell-orchestration \ No newline at end of file diff --git a/.claude/skills/freshell-orchestration/SKILL.md b/.claude/skills/freshell-orchestration/SKILL.md deleted file mode 100644 index 946ab897c..000000000 --- a/.claude/skills/freshell-orchestration/SKILL.md +++ /dev/null @@ -1,215 +0,0 @@ ---- -name: freshell-orchestration -description: "Use when interacting with Freshell panes, panels, or tabs from the CLI for tmux-style automation and multi-pane workflows, outside external-browser automation tasks." ---- - -# Freshell tmux-style automation - -## Start state - -Freshell injects `FRESHELL_URL` and `FRESHELL_TOKEN` into every spawned terminal. They are already set in your environment. - -## MCP tool (preferred) - -If you have the `freshell` MCP tool available, use it directly -- it's faster and doesn't require Bash approval for each command. - -Quick start: -- `freshell({action: "help"})` -- see all available commands -- `freshell({action: "list-tabs"})` -- see open tabs -- `freshell({action: "new-tab", params: {name: "Work", mode: "claude"}})` -- create a tab -- `freshell({action: "send-keys", params: {target: "p1", keys: "hello\n"}})` -- send input - -The MCP tool accepts the same commands as the CLI below but with structured JSON input instead of positional arguments. - -## CLI fallback - -If the MCP tool is not available (e.g., running outside Freshell or in a context without MCP support), use the CLI: - -```bash -FSH="npx tsx server/cli/index.ts" -$FSH health -``` - -Use absolute paths for `--cwd` and `--editor`. - -## Mental model - -- Freshell CLI is an HTTP client over `/api/*`, not a local tmux socket client. -- Tabs and pane trees live in `layoutStore`. -- Terminal lifecycle + scrollback live in `terminalRegistry`. -- Pane kinds: `terminal`, `editor`, `browser`, `agent-chat` (Claude/Codex), `picker` (transient). -- **Picker panes are ephemeral.** A freshly-created tab without `--mode`/`--browser`/`--editor` starts as a `picker` pane while the user chooses what to launch. Once they select, the picker is replaced by the real pane with a **new pane ID**. Never target a `picker` pane for splits or other mutations — wait until it resolves to its final kind, or use `--mode`/`--browser`/`--editor` flags on `new-tab`/`split-pane` to skip the picker entirely. -- Typical loop: `new-tab/split-pane` -> `send-keys` -> `wait-for` -> `capture-pane`/`screenshot-*`. - -## Command reference - -Output behavior: -- Most commands print JSON. -- `list-tabs` and `list-panes` print TSV unless `--json`. -- `list-panes --titles` appends a fifth TSV column with the pane title. -- `capture-pane` and `display` print plain text. - -Targets: -- Tab target: tab ID or exact tab title. -- Pane target: pane ID, pane index in active tab, or `tabRef.paneIndex`. -- Omitted target on `rename-tab` means the active tab. -- Omitted target on `rename-pane` means the active pane in the active tab. -- Omitted target falls back to active pane in active tab when command supports it. -- If a target or name contains spaces, quote it. -- Use the flagged `-t/-n` form when you want to make the target and name explicit. - -Tab commands: -- `new-tab [-n NAME] [--claude|--codex|--mode MODE] [--shell SHELL] [--cwd DIR] [--browser URL] [--editor FILE] [--resume SESSION_ID] [--prompt TEXT]` -- `list-tabs [--json]` -- `select-tab [TARGET]` or `select-tab -t TARGET` -- `kill-tab [TARGET]` or `kill-tab -t TARGET` -- `rename-tab NEW_NAME` - rename the active tab -- `rename-tab TARGET NEW_NAME` -- `rename-tab -t TARGET -n NEW_NAME` -- `has-tab TARGET` or `has-tab -t TARGET` -- `next-tab` -- `prev-tab` - -Pane/layout commands: -- `split-pane [-t PANE_TARGET] [-v] [--mode MODE] [--shell SHELL] [--cwd DIR] [--browser URL] [--editor FILE]` -- `list-panes [-t TAB_TARGET] [--json] [--titles]` -- `select-pane PANE_TARGET` or `select-pane -t PANE_TARGET` -- `rename-pane NEW_NAME` - rename the active pane -- `rename-pane TARGET NEW_NAME` -- `rename-pane -t TARGET -n NEW_NAME` -- `kill-pane PANE_TARGET` or `kill-pane -t PANE_TARGET` -- `resize-pane PANE_TARGET [--x X_PCT] [--y Y_PCT]` -- `swap-pane PANE_TARGET --other OTHER_PANE_TARGET` -- `respawn-pane PANE_TARGET [--mode MODE] [--shell SHELL] [--cwd DIR]` -- `attach TERMINAL_ID [PANE_TARGET]` or `attach -t TERMINAL_ID -p PANE_TARGET` - -Terminal interaction: -- `send-keys [-t PANE_TARGET] [-l] KEYS...` -- `capture-pane [-t PANE_TARGET] [-S START] [-J] [-e]` -- `wait-for [-t PANE_TARGET] [-p PATTERN] [--stable SECONDS] [--exit] [--prompt] [-T TIMEOUT_SECONDS]` -- `display -p FORMAT [-t PANE_TARGET]` or `display FORMAT [PANE_TARGET]` -- `run [--capture|-c] [--detach|-d] [-T TIMEOUT_SECONDS] [-n NAME] [--cwd DIR] COMMAND...` -- `summarize PANE_TARGET` or `summarize -t PANE_TARGET` -- `list-terminals` - -Browser/navigation: -- `open-browser URL [-n NAME]` -- `navigate URL [PANE_TARGET]` or `navigate --url URL -t PANE_TARGET` - -Screenshot commands: -- `screenshot --scope pane|tab|view --name NAME [--path DIR_OR_FILE] [--overwrite] [-t TARGET]` -- Aliases: - - `screenshot-pane -t PANE_TARGET --name NAME [--path ...] [--overwrite]` - - `screenshot-tab -t TAB_TARGET --name NAME [--path ...] [--overwrite]` - - `screenshot-view --name NAME [--path ...] [--overwrite]` -- `--name` is required. -- `--path` is optional; default output root is OS temp dir. -- Pane/tab scopes resolve target before capture; view scope captures current app viewport. - -Session/service: -- `list-sessions` -- `search-sessions QUERY` or `search-sessions -q QUERY` -- `health` -- `lan-info` - -tmux-style aliases: -- `new-window`, `new-session` -> `new-tab` -- `list-windows` -> `list-tabs` -- `select-window` -> `select-tab` -- `kill-window` -> `kill-tab` -- `rename-window` -> `rename-tab` -- `next-window` -> `next-tab` -- `previous-window`, `prev-window` -> `prev-tab` -- `split-window` -> `split-pane` -- `display-message` -> `display` -- `screenshot-pane`, `screenshot-tab`, `screenshot-view` -> `screenshot` - -## tmux differences - -- Transport/auth: tmux uses local socket; Freshell uses HTTP API + token auth. -- Pane types: tmux terminal-only; Freshell supports terminal/editor/browser. -- Target model: tmux session/window/pane grammar vs Freshell ID/title/index resolution. -- Runtime model: tmux TTY-local; Freshell browser-first and remote-friendly. -- Feature model: Freshell adds session indexing/search and AI summary workflows. - -## Playbook: open file in editor pane - -New tab: - -```bash -FILE="/absolute/path/to/file.ts" -$FSH new-tab -n "Edit $(basename "$FILE")" --editor "$FILE" -``` - -Split current tab: - -```bash -FILE="/absolute/path/to/file.ts" -$FSH split-pane --editor "$FILE" -``` - -## Playbook: create, split, and rename without UI interaction - -```bash -FSH="npx tsx server/cli/index.ts" -CWD="/absolute/path/to/repo" -FILE="/absolute/path/to/repo/README.md" - -WS="$($FSH new-tab -n 'Triager' --codex --cwd "$CWD")" -TAB_ID="$(printf '%s' "$WS" | jq -r '.data.tabId')" -P0="$(printf '%s' "$WS" | jq -r '.data.paneId')" -P1="$($FSH split-pane -t "$P0" --editor "$FILE" | jq -r '.data.paneId')" - -$FSH rename-tab -t "$TAB_ID" -n "Issue 166 work" -$FSH rename-pane -t "$P0" -n "Codex" -$FSH select-pane -t "$P1" -$FSH rename-pane "Editor" -``` - -## Playbook: parallel Claude panes - -```bash -FSH="npx tsx server/cli/index.ts" -CWD="/absolute/path/to/repo" -PROMPT="Implement . Run tests. Summarize tradeoffs." - -SEED_JSON="$($FSH new-tab -n 'Claude x4 Eval' --claude --cwd "$CWD")" -P0="$(printf '%s' "$SEED_JSON" | jq -r '.data.paneId')" -J1="$($FSH split-pane -t "$P0" --mode claude --cwd "$CWD")" -P1="$(printf '%s' "$J1" | jq -r '.data.paneId')" -J2="$($FSH split-pane -t "$P0" -v --mode claude --cwd "$CWD")" -P2="$(printf '%s' "$J2" | jq -r '.data.paneId')" -J3="$($FSH split-pane -t "$P1" -v --mode claude --cwd "$CWD")" -P3="$(printf '%s' "$J3" | jq -r '.data.paneId')" - -for p in "$P0" "$P1" "$P2" "$P3"; do - $FSH send-keys -t "$p" -l "$PROMPT" - $FSH send-keys -t "$p" ENTER - $FSH wait-for -t "$p" --stable 8 -T 1800 - $FSH capture-pane -t "$p" -S -120 > "/tmp/${p}.txt" -done -``` - -## Screenshot-specific guidance - -- Use a dedicated canary tab when validating screenshot behavior so live project panes are not contaminated. -- Close temporary tabs/panes after verification unless user asked to keep them open. -- Browser panes: - - Same-origin iframe content is captured best-effort. - - If iframe content is not capturable (for example cross-origin/security), screenshots intentionally render a placeholder message with source URL context instead of a silent blank region. - - For assertions, allow either explicit page content or the explicit non-capturable placeholder text depending on origin/security context. - -## REST API patterns - -- Auth header: `x-auth-token: ` (not Bearer). -- `POST /api/tabs` with `{ name, mode: "shell", shell: "wsl", cwd }` creates a tab with a terminal, bypassing the picker. -- `POST /api/panes/:id/split` with `{ direction: "horizontal"|"vertical", browser?, editor?, mode?, cwd? }` — always defaults to 50/50. -- `POST /api/panes/:id/resize` with `{ sizes: [left, right] }` (percentages summing to 100) — call immediately after split to fix proportions. -- Editor panes show "Loading..." until visited. When screenshotting multiple tabs, visit each tab once first to trigger editor loading, then loop back for screenshots. -- `DELETE /api/terminals/:id` removes orphaned terminals. Freshell has a 50 PTY limit; orphans from scripted runs accumulate silently. - -## Gotchas - -- Use `send-keys -l` for natural-language prompts. -- `wait-for --stable` is usually more reliable than prompt heuristics across providers. -- If target resolution fails, run `list-tabs` and `list-panes --json`, then retry with explicit IDs. diff --git a/.codex/skills/freshell-orchestration b/.codex/skills/freshell-orchestration deleted file mode 120000 index bd2141b50..000000000 --- a/.codex/skills/freshell-orchestration +++ /dev/null @@ -1 +0,0 @@ -../../.claude/skills/freshell-orchestration \ No newline at end of file diff --git a/docs/lab-notes/2026-03-18-freshell-feature-research.md b/docs/lab-notes/2026-03-18-freshell-feature-research.md index 9118dd585..34e0b5d8e 100644 --- a/docs/lab-notes/2026-03-18-freshell-feature-research.md +++ b/docs/lab-notes/2026-03-18-freshell-feature-research.md @@ -78,7 +78,7 @@ Commands: `new-tab`, `list-tabs`, `select-tab`, `kill-tab`, `rename-tab`, `split **Design spec:** `TMUX-SEMANTICS-PROPOSAL.md` (775 lines). Transport is HTTP+token, not Unix sockets. -**Orchestration skill:** `.claude/skills/freshell-orchestration/SKILL.md` provides agent guidance for CLI automation. +**Orchestration surface:** the `freshell` MCP tool in `server/mcp/freshell-tool.ts` provides the canonical automation guidance and action reference. --- diff --git a/docs/plans/2026-02-22-screenshot-commands.md b/docs/plans/2026-02-22-screenshot-commands.md index 73dde75b1..30ae21170 100644 --- a/docs/plans/2026-02-22-screenshot-commands.md +++ b/docs/plans/2026-02-22-screenshot-commands.md @@ -1286,11 +1286,11 @@ git commit -m "test+fix(screenshot): add integration coverage for focus-preservi --- -### Task 8: Update User-Facing Docs (Skill + Mock UI) +### Task 8: Update User-Facing Docs (MCP Tool + Mock UI) **Files:** - Modify: `docs/index.html` -- Rewrite: `.claude/skills/freshell-orchestration/SKILL.md` (or the canonical skill path used in this repo/session) +- Rewrite: `server/mcp/freshell-tool.ts` (the canonical MCP instruction source used in this repo/session) **Step 1: Write the failing doc check (manual checklist)** @@ -1322,8 +1322,8 @@ Required sections: **Step 3: Commit** ```bash -git add docs/index.html .claude/skills/freshell-orchestration/SKILL.md -git commit -m "docs: update screenshot command guidance in mock UI and tmux-style skill" +git add docs/index.html server/mcp/freshell-tool.ts +git commit -m "docs: update screenshot command guidance in mock UI and freshell MCP help" ``` --- diff --git a/docs/plans/2026-02-27-freshclaude-plugin-injection-design.md b/docs/plans/2026-02-27-freshclaude-plugin-injection-design.md index 062a77b8d..e1f8165b4 100644 --- a/docs/plans/2026-02-27-freshclaude-plugin-injection-design.md +++ b/docs/plans/2026-02-27-freshclaude-plugin-injection-design.md @@ -2,7 +2,7 @@ ## Problem -Freshclaude sessions need to load specific skills (via plugins) at creation time. The immediate use case is ensuring every freshclaude session has `freshell-orchestration` available. The future use case is custom pane types (e.g., "kilroy") that wrap freshclaude and inject skills from external repos. +Freshclaude sessions may need to load explicit Claude SDK plugin bundles at creation time. Freshell orchestration is no longer one of them: the canonical orchestration surface is the `freshell` MCP tool, not a default plugin or skill path. ## Design @@ -21,8 +21,7 @@ ClaudeChatPaneContent.plugins: string[] (absolute paths) - Add `plugins?: string[]` — array of absolute paths to plugin directories **2. `AppSettings.freshclaude`** (`src/store/types.ts`) -- Add `defaultPlugins?: string[]` — default plugin paths for new sessions -- Server resolves `freshell-orchestration` to its absolute path as the fallback default +- Add `defaultPlugins?: string[]` — default plugin paths for new sessions when the user explicitly wants extra Claude SDK plugins **3. WS Protocol** (`shared/ws-protocol.ts`) - Add `plugins: z.array(z.string()).optional()` to `SdkCreateSchema` @@ -30,7 +29,7 @@ ClaudeChatPaneContent.plugins: string[] (absolute paths) **4. `SdkBridge.createSession()`** (`server/sdk-bridge.ts`) - Accept `plugins?: string[]` in options - Map to `plugins: paths.map(p => ({ type: 'local' as const, path: p }))` in the `query()` call -- Default: `[path.resolve(projectRoot, '.claude/plugins/freshell-orchestration')]` +- No implicit orchestration plugin default; Freshell orchestration comes from MCP **5. WS Handler** (`server/ws-handler.ts`) - Pass `m.plugins` through to `sdkBridge.createSession()` @@ -49,5 +48,5 @@ ClaudeChatPaneContent.plugins: string[] (absolute paths) ### Default Behavior -- Every freshclaude session gets `freshell-orchestration` loaded by default +- Every freshclaude session gets Freshell orchestration through the `freshell` MCP tool, while explicit plugin bundles remain opt-in - This is additive — the session still discovers project-level skills from its CWD normally diff --git a/docs/plans/2026-02-27-freshclaude-plugin-injection.md b/docs/plans/2026-02-27-freshclaude-plugin-injection.md index e41726e6e..9e089da80 100644 --- a/docs/plans/2026-02-27-freshclaude-plugin-injection.md +++ b/docs/plans/2026-02-27-freshclaude-plugin-injection.md @@ -2,9 +2,9 @@ > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. -**Goal:** Allow freshclaude sessions to load plugins (which expose skills) at creation time, defaulting to `freshell-orchestration`. +**Goal:** Allow freshclaude sessions to load explicit plugin bundles at creation time. Freshell orchestration should come from the `freshell` MCP tool, not a default skill/plugin path. -**Architecture:** Add a `plugins` parameter (array of absolute paths to plugin directories) that flows from `ClaudeChatPaneContent` through the WS protocol to `SdkBridge.createSession()`, which maps them to the SDK's `plugins` option on `query()`. The server resolves a default plugin path for `freshell-orchestration` at runtime. +**Architecture:** Add a `plugins` parameter (array of absolute paths to plugin directories) that flows from `ClaudeChatPaneContent` through the WS protocol to `SdkBridge.createSession()`, which maps them to the SDK's `plugins` option on `query()`. Orchestration itself is provided separately by the `freshell` MCP tool. **Tech Stack:** TypeScript, Zod (WS protocol), React/Redux (client), Node.js (server), Vitest (tests) @@ -26,11 +26,11 @@ it('parses sdk.create with plugins array', () => { type: 'sdk.create', requestId: 'req-1', cwd: '/home/user/project', - plugins: ['/path/to/.claude/plugins/my-skill'], + plugins: ['/path/to/.claude/plugins/my-plugin'], }) expect(result.success).toBe(true) if (result.success) { - expect(result.data.plugins).toEqual(['/path/to/.claude/plugins/my-skill']) + expect(result.data.plugins).toEqual(['/path/to/.claude/plugins/my-plugin']) } }) @@ -350,7 +350,7 @@ git commit -m "feat(client): wire plugins through pane creation and sdk.create m --- -### Task 6: Add default freshell-orchestration plugin resolution +### Task 6: Keep orchestration on MCP instead of a default plugin **Files:** - Modify: `server/sdk-bridge.ts` (top of file + createSession) @@ -359,16 +359,13 @@ git commit -m "feat(client): wire plugins through pane creation and sdk.create m **Step 1: Write the failing test** ```typescript -describe('default plugins', () => { - it('resolves freshell-orchestration as default when no plugins specified', async () => { +describe('plugins option', () => { + it('omits plugins when not set', async () => { await bridge.createSession({ cwd: '/tmp' }) - expect(mockQueryOptions?.plugins).toBeDefined() - expect(mockQueryOptions?.plugins).toHaveLength(1) - expect(mockQueryOptions?.plugins[0].type).toBe('local') - expect(mockQueryOptions?.plugins[0].path).toContain('.claude/plugins/freshell-orchestration') + expect(mockQueryOptions?.plugins).toBeUndefined() }) - it('does not add defaults when plugins are explicitly provided', async () => { + it('passes explicit plugins through unchanged', async () => { await bridge.createSession({ cwd: '/tmp', plugins: ['/custom/plugin'] }) expect(mockQueryOptions?.plugins).toEqual([ { type: 'local', path: '/custom/plugin' }, @@ -384,58 +381,28 @@ describe('default plugins', () => { **Step 2: Run test to verify it fails** -Run: `npx vitest run test/unit/server/sdk-bridge.test.ts -t "default plugins"` -Expected: FAIL — currently plugins is undefined when not set +Run: `npx vitest run test/unit/server/sdk-bridge.test.ts -t "plugins option"` +Expected: FAIL — a legacy default orchestration plugin is still being injected when `plugins` is omitted -**Step 3: Implement default plugin resolution** +**Step 3: Remove the default orchestration plugin resolution** -At top of `server/sdk-bridge.ts`, add path resolution: - -```typescript -import path from 'path' -import { fileURLToPath } from 'url' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const PROJECT_ROOT = path.resolve(__dirname, '..') -const DEFAULT_PLUGINS = [ - path.resolve(PROJECT_ROOT, '.claude/plugins/freshell-orchestration'), -] -``` - -Then update the plugins mapping in `createSession` to use defaults: - -```typescript -const resolvedPlugins = options.plugins !== undefined - ? options.plugins.map(p => ({ type: 'local' as const, path: p })) - : DEFAULT_PLUGINS.map(p => ({ type: 'local' as const, path: p })) -``` - -And pass `plugins: resolvedPlugins` directly (not spread-conditionally) in the query options. +In `server/sdk-bridge.ts`, remove the default orchestration plugin constant and keep `plugins` omitted unless the caller explicitly provided them. Freshell orchestration continues to come from the `mcpServers.freshell` entry in the same query options. **Step 4: Run test to verify it passes** -Run: `npx vitest run test/unit/server/sdk-bridge.test.ts -t "default plugins"` +Run: `npx vitest run test/unit/server/sdk-bridge.test.ts -t "plugins option"` Expected: PASS **Step 5: Run all tests** Run: `npx vitest run` -Expected: PASS — the `omits plugins when not set` test from Task 2 will now fail and needs updating to expect the default plugin instead. Update it: - -```typescript -it('uses default plugins when not set', async () => { - await bridge.createSession({ cwd: '/tmp' }) - expect(mockQueryOptions?.plugins).toBeDefined() - expect(mockQueryOptions?.plugins).toHaveLength(1) - expect(mockQueryOptions?.plugins[0].path).toContain('freshell-orchestration') -}) -``` +Expected: PASS **Step 6: Commit** ```bash git add server/sdk-bridge.ts test/unit/server/sdk-bridge.test.ts -git commit -m "feat(sdk-bridge): resolve freshell-orchestration as default plugin" +git commit -m "refactor(sdk-bridge): remove legacy orchestration plugin fallback" ``` --- @@ -454,7 +421,7 @@ Expected: PASS **Step 3: Manual smoke test (if dev server available)** -1. Create a freshclaude pane (no custom plugins) — should load freshell-orchestration by default -2. Verify the session can use the freshell-orchestration skill +1. Create a freshclaude pane (no custom plugins) — should rely on the `freshell` MCP tool for orchestration and not inject a default orchestration plugin +2. Verify the session exposes the `freshell` MCP tool for orchestration **Step 4: Final commit (if any fixups needed)** diff --git a/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration-test-plan.md b/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration-test-plan.md index ab0fd950e..75fb48f38 100644 --- a/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration-test-plan.md +++ b/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration-test-plan.md @@ -11,9 +11,9 @@ Reconciliation note: the implementation plan does not invalidate the expected te - `Plan-1`: Rename stays server-authoritative; names are normalized once at the HTTP boundary; blank names are rejected; rename broadcasts are success-only. Source: [2026-03-09-issue-166-rename-tab-pane-orchestration.md](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration.md). - `Plan-2`: Pane rename persists through `LayoutStore.renamePane(paneId, title)` and `PATCH /api/panes/:id`, with pane ownership resolved internally from the pane target. Source: [2026-03-09-issue-166-rename-tab-pane-orchestration.md](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration.md). - `Plan-3`: Connected UIs converge by handling `tab.rename` and `pane.rename` `ui.command` broadcasts after successful authoritative mutations. Source: [2026-03-09-issue-166-rename-tab-pane-orchestration.md](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration.md). -- `Skill-1`: `rename-tab` supports active-target default, explicit positional target, and flagged `-t/-n` forms. Source: post-change contract in [.claude/skills/freshell-orchestration/SKILL.md](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/.claude/skills/freshell-orchestration/SKILL.md) and the implementation plan. -- `Skill-2`: `rename-pane` supports active-target default, explicit positional target, and flagged `-t/-n` forms. Source: post-change contract in [.claude/skills/freshell-orchestration/SKILL.md](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/.claude/skills/freshell-orchestration/SKILL.md) and the implementation plan. -- `Skill-3`: The orchestration skill documents a concrete create/split/select/rename flow that assigns tab and pane names with no manual UI interaction. Source: post-change contract in [.claude/skills/freshell-orchestration/SKILL.md](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/.claude/skills/freshell-orchestration/SKILL.md) and the implementation plan. +- `MCP-1`: `rename-tab` supports caller-target default via `{ name }` and explicit target via `{ target, name }`. Source: post-change contract in [server/mcp/freshell-tool.ts](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/server/mcp/freshell-tool.ts) and the implementation plan. +- `MCP-2`: `rename-pane` supports caller-target default via `{ name }` and explicit target via `{ target, name }`. Source: post-change contract in [server/mcp/freshell-tool.ts](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/server/mcp/freshell-tool.ts) and the implementation plan. +- `MCP-3`: The `freshell` MCP help text documents a concrete create/split/select/rename flow that assigns tab and pane names with no manual UI interaction. Source: post-change contract in [server/mcp/freshell-tool.ts](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/server/mcp/freshell-tool.ts) and the implementation plan. ## Harness requirements @@ -41,13 +41,13 @@ Reconciliation note: the implementation plan does not invalidate the expected te - Estimated complexity: low; existing coverage already seeds snapshots directly in [test/unit/server/agent-layout-store-write.test.ts](/home/user/code/freshell/.worktrees/trycycle-issues-166-163-162-160-158-156-151/test/unit/server/agent-layout-store-write.test.ts). - Tests that depend on it: 9. -5. `skill-doc-inspection` harness - - What it does: reads the canonical orchestration skill markdown and asserts on the published command reference and example workflow. +5. `mcp-help-inspection` harness + - What it does: reads the canonical `freshell` MCP help source and asserts on the published command reference and example workflow. - What it exposes: file-content assertions against documented commands and playbooks. - Estimated complexity: low. - Tests that depend on it: 8. -Build order: `agent-api-route`, `layout-store-snapshot`, and `ui-command-dispatch` can be extended first because they unblock the lower-level red tests. `cli-real-layout` comes next because it proves the end-to-end acceptance flow. `skill-doc-inspection` is last because it depends on the documented contract after the CLI surface is settled. +Build order: `agent-api-route`, `layout-store-snapshot`, and `ui-command-dispatch` can be extended first because they unblock the lower-level red tests. `cli-real-layout` comes next because it proves the end-to-end acceptance flow. `mcp-help-inspection` is last because it depends on the documented contract after the CLI surface is settled. ## Test plan @@ -140,16 +140,16 @@ Build order: `agent-api-route`, `layout-store-snapshot`, and `ui-command-dispatc - The failed-mutation request does not emit any `pane.rename` broadcast because the authoritative store did not confirm an owning `{ tabId, paneId }` rename result. Source: `Plan-1`, `Plan-2`, `Plan-3`. - **Interactions:** HTTP validation boundary, layout-store error path, websocket broadcast suppression. -8. **Name:** The orchestration skill advertises both rename commands, their active-target defaults, and a no-UI create/split/rename playbook +8. **Name:** The `freshell` MCP help text advertises both rename commands, their caller-target defaults, and a no-UI create/split/rename playbook - **Type:** regression - - **Harness:** `skill-doc-inspection` - - **Preconditions:** Read the canonical skill file at `.claude/skills/freshell-orchestration/SKILL.md`. - - **Actions:** Assert that the skill markdown contains the `rename-tab` and `rename-pane` command forms, the omitted-target semantics, and a concrete create/split/select/rename example. + - **Harness:** `mcp-help-inspection` + - **Preconditions:** Read the canonical MCP help source at `server/mcp/freshell-tool.ts`. + - **Actions:** Assert that the MCP help text contains the `rename-tab` and `rename-pane` command forms, the omitted-target semantics, and a concrete create/split/select/rename example. - **Expected outcome:** - - The command reference includes `rename-tab NEW_NAME`, `rename-tab TARGET NEW_NAME`, and `rename-tab -t TARGET -n NEW_NAME`. Source: `AC-1`, `AC-3`, `Skill-1`. - - The command reference includes `rename-pane NEW_NAME`, `rename-pane TARGET NEW_NAME`, and `rename-pane -t TARGET -n NEW_NAME`. Source: `AC-2`, `AC-3`, `Skill-2`. - - The skill explicitly states that omitted targets use the active tab or active pane, and includes a documented create/split/select/rename workflow that needs no manual UI interaction. Source: `AC-4`, `Skill-2`, `Skill-3`. - - **Interactions:** Agent skill consumers, plugin indirection through the canonical skill file. + - The command reference includes `rename-tab` with `{ name }` and `rename-tab` with `{ target, name }`. Source: `AC-1`, `AC-3`, `MCP-1`. + - The command reference includes `rename-pane` with `{ name }` and `rename-pane` with `{ target, name }`. Source: `AC-2`, `AC-3`, `MCP-2`. + - The MCP help text explicitly states that omitted targets use the caller tab or caller pane, and includes a documented create/split/select/rename workflow that needs no manual UI interaction. Source: `AC-4`, `MCP-2`, `MCP-3`. + - **Interactions:** Agent MCP consumers using the canonical `freshell` tool help surface. 9. **Name:** Renaming a pane in `LayoutStore` writes the title under the pane’s owning tab and never renames the tab itself - **Type:** invariant @@ -171,9 +171,9 @@ Covered action space: - Multi-word rename names in both flagged and positional CLI forms. - HTTP rename validation, trimming, and success-only broadcast behavior for both tabs and panes. - Client convergence for the new `pane.rename` broadcast. -- Published orchestration-skill documentation for the new rename surface and the no-UI workflow. +- Published `freshell` MCP help documentation for the new rename surface and the no-UI workflow. Explicit exclusions: - No browser-level manual UI automation is planned for this issue. The acceptance criteria require a no-UI orchestration flow, and the highest-value proof is the real CLI plus real `LayoutStore` scenario coverage. Residual risk: a live browser WebSocket client could still mishandle a rename even if `handleUiCommand()` is correct, though that risk is limited because `tab.rename` already exists and the new client-side work is a single `pane.rename` dispatch path. - No dedicated performance benchmark is planned. These renames are single PATCH requests plus small in-memory mutations, so performance risk is low. Residual risk: a pathological regression in target resolution or broadcast fan-out would not be caught by this plan, but any such regression would more likely surface as functional test timeouts first. -- No separate test targets the plugin pointer file at `.claude/plugins/freshell-orchestration/skills/freshell-orchestration`, because the implementation plan explicitly leaves that pointer unchanged and the canonical skill file is the source of truth. Residual risk: if the pointer is independently broken, agents loading the plugin indirection could still miss the updated docs despite the canonical file being correct. +- No separate legacy plugin-pointer coverage is planned. The canonical orchestration reference is the `freshell` MCP tool help text, so there is no remaining plugin indirection to keep in sync. diff --git a/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration.md b/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration.md index 2212f9e84..f697e1138 100644 --- a/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration.md +++ b/docs/plans/2026-03-09-issue-166-rename-tab-pane-orchestration.md @@ -2,9 +2,9 @@ > **For Claude:** REQUIRED SUB-SKILL: Use trycycle-executing to implement this plan task-by-task. -**Goal:** Expose orchestration operations for renaming tabs and panes, including active-target defaults and explicit identifiers, and document those operations in the Freshell orchestration skill. +**Goal:** Expose orchestration operations for renaming tabs and panes, including active-target defaults and explicit identifiers, and document those operations in the canonical `freshell` MCP help/instruction surface. -**Architecture:** Keep rename orchestration server-authoritative. Normalize names once at the HTTP boundary, trim blanks out of both rename routes, and only broadcast rename `ui.command` events after the layout store confirms the mutation. Add the missing pane rename write path as a pane-targeted mutation in `LayoutStore`, expose it through `PATCH /api/panes/:id`, mirror the resulting `pane.rename` broadcast into Redux, and extend the CLI plus orchestration skill so agents can create or split workspaces and assign stable tab and pane names entirely through the orchestration surface. +**Architecture:** Keep rename orchestration server-authoritative. Normalize names once at the HTTP boundary, trim blanks out of both rename routes, and only broadcast rename `ui.command` events after the layout store confirms the mutation. Add the missing pane rename write path as a pane-targeted mutation in `LayoutStore`, expose it through `PATCH /api/panes/:id`, mirror the resulting `pane.rename` broadcast into Redux, and extend the CLI plus `freshell` MCP help text so agents can create or split workspaces and assign stable tab and pane names entirely through the orchestration surface. **Tech Stack:** TypeScript, Express agent API, Freshell CLI (`server/cli`), React/Redux UI command handling, Vitest, supertest, child-process CLI e2e tests. @@ -28,7 +28,7 @@ - `rename-pane` is added as a first-class orchestration operation and supports both active-pane and explicit-pane targets. - The agent API exposes `PATCH /api/panes/:id` and `LayoutStore.renamePane(paneId, title)` so pane rename shares the same authoritative write path as the rest of layout orchestration. - Connected UIs converge immediately because successful mutations broadcast `tab.rename` and `pane.rename` `ui.command` events only after the authoritative store returns a resolved target. -- The orchestration skill documents both commands, their target grammar, and a concrete create/split/select/rename flow that uses both explicit targets and active-target defaults without any manual UI interaction. +- The canonical `freshell` MCP help text documents both commands, their target grammar, and a concrete create/split/select/rename flow that uses both explicit targets and active-target defaults without any manual UI interaction. ### Task 1: Harden Tab Rename Validation and Broadcast Semantics @@ -728,76 +728,66 @@ git add test/e2e/agent-cli-flow.test.ts server/cli/index.ts git commit -m "feat(cli): add rename-pane orchestration" ``` -### Task 6: Update the Orchestration Skill to Expose the New Surface +### Task 6: Update the MCP Help Surface to Expose the New Rename Actions **Files:** -- Modify: `.claude/skills/freshell-orchestration/SKILL.md` +- Modify: `server/mcp/freshell-tool.ts` **Step 1: Rewrite the rename command reference** -In `.claude/skills/freshell-orchestration/SKILL.md`, change the command reference to: +In `server/mcp/freshell-tool.ts`, update the help text and tests so the reference advertises the structured MCP equivalents: ```md Tab commands: -- `rename-tab NEW_NAME` - rename the active tab -- `rename-tab TARGET NEW_NAME` -- `rename-tab -t TARGET -n NEW_NAME` +- `rename-tab` with `{ name }` renames the caller tab +- `rename-tab` with `{ target, name }` renames an explicit tab Pane/layout commands: -- `rename-pane NEW_NAME` - rename the active pane -- `rename-pane TARGET NEW_NAME` -- `rename-pane -t TARGET -n NEW_NAME` +- `rename-pane` with `{ name }` renames the caller pane +- `rename-pane` with `{ target, name }` renames an explicit pane ``` Under “Targets”, add: ```md -- Omitted target on `rename-tab` means the active tab. -- Omitted target on `rename-pane` means the active pane in the active tab. -- If a target contains spaces, or if you want an active-target rename with an unquoted multi-word name, prefer the flagged `-t/-n` form. +- Omitted target on `rename-tab` means the caller tab (falling back to the active tab if no caller identity is available). +- Omitted target on `rename-pane` means the caller pane (falling back to the active pane if no caller identity is available). ``` -Important detail: -- Update the canonical skill at `.claude/skills/freshell-orchestration/SKILL.md`; the plugin directory contains a pointer file at `.claude/plugins/freshell-orchestration/skills/freshell-orchestration` that already resolves back to this skill directory, so do not edit the pointer file for this issue. - **Step 2: Add a concrete create/split/rename playbook** Append this example: ```bash -FSH="npx tsx server/cli/index.ts" -CWD="/absolute/path/to/repo" -FILE="/absolute/path/to/repo/README.md" +seed = freshell({ action: "new-tab", params: { name: "Triager", mode: "codex", cwd: "/absolute/path/to/repo" } }) +tabId = seed.data.tabId +p0 = seed.data.paneId +p1 = freshell({ action: "split-pane", params: { target: p0, editor: "/absolute/path/to/repo/README.md" } }).data.paneId -WS="$($FSH new-tab -n 'Triager' --codex --cwd "$CWD")" -TAB_ID="$(printf '%s' "$WS" | jq -r '.data.tabId')" -P0="$(printf '%s' "$WS" | jq -r '.data.paneId')" -P1="$($FSH split-pane -t "$P0" --editor "$FILE" | jq -r '.data.paneId')" - -$FSH rename-tab -t "$TAB_ID" -n "Issue 166 work" -$FSH rename-pane -t "$P0" -n "Codex" -$FSH select-pane -t "$P1" -$FSH rename-pane "Editor" +freshell({ action: "rename-tab", params: { target: tabId, name: "Issue 166 work" } }) +freshell({ action: "rename-pane", params: { target: p0, name: "Codex" } }) +freshell({ action: "select-pane", params: { target: p1 } }) +freshell({ action: "rename-pane", params: { name: "Editor" } }) ``` -**Step 3: Sanity-check the skill markdown** +**Step 3: Sanity-check the MCP help text** Run: ```bash -sed -n '1,240p' .claude/skills/freshell-orchestration/SKILL.md +npm run test:vitest -- --config vitest.server.config.ts --run test/unit/server/mcp/freshell-tool.test.ts ``` Expected: - `rename-pane` is documented -- Active-target defaults are explicit for both rename commands +- Caller-target defaults are explicit for both rename commands - The playbook shows a create/split/rename flow with no UI interaction **Step 4: Commit** ```bash -git add .claude/skills/freshell-orchestration/SKILL.md -git commit -m "docs(skill): document tab and pane rename orchestration" +git add server/mcp/freshell-tool.ts test/unit/server/mcp/freshell-tool.test.ts +git commit -m "docs(mcp): document tab and pane rename orchestration" ``` ### Task 7: Final Verification diff --git a/docs/plans/2026-03-31-tool-coalesce-test-plan.md b/docs/plans/2026-03-31-tool-coalesce-test-plan.md new file mode 100644 index 000000000..e20fdb1fb --- /dev/null +++ b/docs/plans/2026-03-31-tool-coalesce-test-plan.md @@ -0,0 +1,375 @@ +# Test Plan: Tool Coalescing + +appendum +> **Feature:** Coalesce consecutive tool-only assistant messages into a single message so that multiple tool uses appear in ONE ToolStrip showing "N tools used" instead of multiple separate "1 tool used" strips. +> **Implementation Plan:** `docs/plans/2026-03-31-tool-coalesce.md` +> +> --- +> +> ## Strategy Reconciliation +> +> The testing strategy from the conversation aligns with the implementation plan: +> +> | Strategy Element | Implementation Plan Coverage | Status | +> |-----------------|------------------------------|--------| +> | Unit tests for Redux coalescing | Task 1: 5 tests in `agentChatSlice.test.ts` | Aligned | +> | Unit tests for JSONL loader coalescing | Task 2: 4 tests in `session-history-loader.test.ts` | Aligned | +> | Browser-use test with LLM judge | Task 4: `tool_coalesce.py` + `tool_coalesce_test.py` | Aligned | +> +> **No strategy changes requiring user approval.** The implementation plan already includes the browser-use tests with LLM as judge that the user explicitly requested. +> +> --- +> +> ## Action Space +> +> ### User-Visible Actions Affected +> +> 1. **Freshclaude session viewing** - When a user opens a Freshclaude session in the sidebar, the assistant messages should show coalesced tool strips +> 2. **Session history loading** - When resuming a session, JSONL history should show coalesced tool strips +> 3. **Live message streaming** - When an assistant uses multiple tools in sequence during a live session, they should coalesce into one strip +> +> ### Internal Actions +> +> 1. `addAssistantMessage` Redux action - Coalesces when previous message is tool-only assistant +> 2. `extractChatMessagesFromJsonl` function - Coalesces when parsing JSONL history +> +> --- +> +> ## Test Plan +> +> ### Priority 1: Problem-Statement Red Checks (Acceptance Gates) +> +> These tests directly address the reported bug: seeing "1 tool used" twice instead of "2 tools used". +> +> #### Test 1.1: Coalesces consecutive tool-only assistant messages +> - **Name:** Coalesces consecutive tool-only assistant messages +> - **Type:** regression +> - **Disposition:** new +> - **Harness:** Redux slice unit test (direct reducer calls) +> - **Source of Truth:** User's reported bug - seeing "1 tool used" twice instead of "2 tools used" +> - **Preconditions:** Session 's1' exists with no messages +> - **Actions:** +> 1. Dispatch `addAssistantMessage({ sessionId: 's1', content: [{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }] })` +> 2. Dispatch `addAssistantMessage({ sessionId: 's1', content: [{ type: 'tool_result', tool_use_id: 't1', content: 'output' }, { type: 'tool_use', id: 't2', name: 'Read', input: { file_path: 'f.ts' } }] })` +> - **Expected outcome:** Session has exactly 1 message with exactly 3 content blocks (tool_use, tool_result, tool_use) +> - **Interactions:** None +> +> #### Test 1.2: Does not coalesce when previous message has text content +> - **Name:** Does not coalesce when previous message has text content +> - **Type:** boundary +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Implementation plan - text content breaks coalescing +> - **Preconditions:** Session 's1' exists with no messages +> - **Actions:** +> 1. Dispatch `addAssistantMessage({ sessionId: 's1', content: [{ type: 'text', text: 'Hello' }] })` +> 2. Dispatch `addAssistantMessage({ sessionId: 's1', content: [{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }] })` +> - **Expected outcome:** Session has exactly 2 messages +> - **Interactions:** None +> +> #### Test 1.3: Does not coalesce when new message has text content +> - **Name:** Does not coalesce when new message has text content +> - **Type:** boundary +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Implementation plan - text content breaks coalescing +> - **Preconditions:** Session 's1' exists with no messages +> - **Actions:** +> 1. Dispatch `addAssistantMessage({ sessionId: 's1', content: [{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }] })` +> 2. Dispatch `addAssistantMessage({ sessionId: 's1', content: [{ type: 'text', text: 'Done' }] })` +> - **Expected outcome:** Session has exactly 2 messages +> - **Interactions:** None +> +> #### Test 1.4: Coalesces multiple consecutive tool-only messages +> - **Name:** Coalesces multiple consecutive tool-only messages +> - **Type:** regression +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** User's reported bug - multiple tools should all coalesce +> - **Preconditions:** Session 's1' exists with no messages +> - **Actions:** +> 1. Dispatch `addAssistantMessage` with tool_use t1 +> 2. Dispatch `addAssistantMessage` with tool_result for t1 +> 3. Dispatch `addAssistantMessage` with tool_use t2 +> 4. Dispatch `addAssistantMessage` with tool_result for t2 +> - **Expected outcome:** Session has exactly 1 message with exactly 4 content blocks +> - **Interactions:** None +> +> #### Test 1.5: Does not coalesce across user messages +> - **Name:** Does not coalesce across user messages +> - **Type:** boundary +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Implementation plan - user messages break coalescing +> - **Preconditions:** Session 's1' exists with no messages +> - **Actions:** +> 1. Dispatch `addAssistantMessage` with tool_use t1 +> 2. Dispatch `addUserMessage` with text +> 3. Dispatch `addAssistantMessage` with tool_use t2 +> - **Expected outcome:** Session has exactly 3 messages (assistant, user, assistant) +> - **Interactions:** None +> +> #### Test 1.6: JSONL loader coalesces consecutive tool-only assistant messages +> - **Name:** JSONL loader coalesces consecutive tool-only assistant messages +> - **Type:** regression +> - **Disposition:** new +> - **Harness:** JSONL parser unit test (direct function calls) +> - **Source of Truth:** User's reported bug - restored sessions also show separate "1 tool used" +> - **Preconditions:** None +> - **Actions:** +> 1. Call `extractChatMessagesFromJsonl` with JSONL containing 3 consecutive tool-only assistant messages +> - **Expected outcome:** Returns 1 message with 3 content blocks +> - **Interactions:** None +> +> #### Test 1.7: JSONL loader does not coalesce when assistant message has text content +> - **Name:** JSONL loader does not coalesce when assistant message has text content +> - **Type:** boundary +> - **Disposition:** new +> - **Harness:** JSONL parser unit test +> - **Source of Truth:** Implementation plan - text content breaks coalescing +> - **Preconditions:** None +> - **Actions:** +> 1. Call `extractChatMessagesFromJsonl` with JSONL containing text message followed by tool_use message +> - **Expected outcome:** Returns 2 messages +> - **Interactions:** None +> +> #### Test 1.8: JSONL loader does not coalesce across user messages +> - **Name:** JSONL loader does not coalesce across user messages +> - **Type:** boundary +> - **Disposition:** new +> - **Harness:** JSONL parser unit test +> - **Source of Truth:** Implementation plan - user messages break coalescing +> - **Preconditions:** None +> - **Actions:** +> 1. Call `extractChatMessagesFromJsonl` with JSONL containing tool_use, user, tool_use +> - **Expected outcome:** Returns 3 messages +> - **Interactions:** None +> +> #### Test 1.9: JSONL loader preserves timestamp from first message in coalesced group +> - **Name:** JSONL loader preserves timestamp from first message in coalesced group +> - **Type:** invariant +> - **Disposition:** new +> - **Harness:** JSONL parser unit test +> - **Source of Truth:** Implementation plan - timestamp from first message preserved +> - **Preconditions:** None +> - **Actions:** +> 1. Call `extractChatMessagesFromJsonl` with JSONL containing 2 tool-only messages with different timestamps +> - **Expected outcome:** Coalesced message has timestamp of first message +> - **Interactions:** None +> +> --- +> +> ### Priority 2: High-Value Existing Integration Tests +> +> The existing `MessageBubble.test.tsx` already verifies tool grouping within a single message. These tests remain unchanged and provide confidence that the rendering layer still works correctly after the Redux/JSONL changes. +> +> **Existing tests to keep passing (no modifications needed):** +> - `groups contiguous tool blocks into a single ToolStrip` - verifies rendering of coalesced content +> - `creates separate strips for non-contiguous tool groups` - verifies text breaks tool groups +> - `renders collapsed strip with summary text when showTools is false` - verifies "N tools used" display +> +> --- +> +> ### Priority 3: New Integration Tests +> +> #### Test 3.1: End-to-end tool strip rendering with coalesced messages +> - **Name:** End-to-end tool strip rendering with coalesced messages +> - **Type:** integration +> - **Disposition:** new +> - **Harness:** React Testing Library with Redux store +> - **Source of Truth:** User-visible behavior - one strip showing "N tools used" +> - **Preconditions:** Redux store with session 's1' +> - **Actions:** +> 1. Dispatch `sessionCreated` for 's1' +> 2. Dispatch `addAssistantMessage` with tool_use t1 +> 3. Dispatch `addAssistantMessage` with tool_result t1 + tool_use t2 +> 4. Render `MessageBubble` with the resulting message content +> - **Expected outcome:** One ToolStrip showing "2 tools used" (collapsed) or 2 tool blocks (expanded) +> - **Interactions:** Redux store -> MessageBubble component +> +> #### Test 3.2: Session history loading produces coalesced messages +> - **Name:** Session history loading produces coalesced messages +> - **Type:** integration +> - **Disposition:** new +> - **Harness:** JSONL file I/O + Redux store +> - **Source of Truth:** User-visible behavior - restored sessions show coalesced tools +> - **Preconditions:** Temporary JSONL file with 3 tool-only assistant messages +> - **Actions:** +> 1. Call `loadSessionHistory` with the temp file +> 2. Dispatch messages to Redux store +> 3. Render `MessageBubble` with the resulting messages +> - **Expected outcome:** One ToolStrip showing "3 tools used" +> - **Interactions:** File system -> JSONL loader -> Redux store -> MessageBubble +> +> --- +> +> ### Priority 4: Boundary and Edge Cases +> +> #### Test 4.1: Empty content array does not trigger coalescing +> - **Name:** Empty content array does not trigger coalescing +> - **Type:** boundary +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Implementation plan - `isToolOnlyContent([])` returns false +> - **Preconditions:** Session 's1' exists with one tool-only message +> - **Actions:** +> 1. Dispatch `addAssistantMessage` with empty content array +> - **Expected outcome:** Empty message is added (not coalesced) - 2 messages total +> - **Interactions:** None +> +> #### Test 4.2: Thinking block breaks coalescing +> - **Name:** Thinking block breaks coalescing +> - **Type:** boundary +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Implementation plan - only tool_use/tool_result are tool-only +> - **Preconditions:** Session 's1' exists with one tool-only message +> - **Actions:** +> 1. Dispatch `addAssistantMessage` with thinking block +> - **Expected outcome:** New message is added (not coalesced) - 2 messages total +> - **Interactions:** None +> +> #### Test 4.3: Session not found returns early without modification +> - **Name:** Session not found returns early without modification +> - **Type:** boundary +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Existing reducer behavior - early return for unknown session +> - **Preconditions:** No session 'nonexistent' +> - **Actions:** +> 1. Dispatch `addAssistantMessage` with sessionId 'nonexistent' +> - **Expected outcome:** State unchanged +> - **Interactions:** None +> +> #### Test 4.4: Malformed JSONL skips bad lines gracefully +> - **Name:** Malformed JSONL skips bad lines gracefully +> - **Type:** boundary +> - **Disposition:** extend +> - **Harness:** JSONL parser unit test +> - **Source of Truth:** Existing behavior - malformed JSON skipped +> - **Preconditions:** None +> - **Actions:** +> 1. Call `extractChatMessagesFromJsonl` with JSONL containing malformed JSON between valid messages +> - **Expected outcome:** Valid messages parsed, malformed lines skipped, coalescing works on valid messages +> - **Interactions:** None +> +> --- +> +> ### Priority 5: Invariant Tests +> +> #### Test 5.1: Message order preserved after coalescing +> - **Name:** Message order preserved after coalescing +> - **Type:** invariant +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Implementation plan - coalescing appends content, never reorders +> - **Preconditions:** Session exists with tool-only message +> - **Actions:** +> 1. Add tool_use t1 +> 2. Add tool_result t1 +> 3. Add tool_use t2 +> - **Expected outcome:** Content blocks in order: t1 tool_use, t1 tool_result, t2 tool_use +> - **Interactions:** None +> +> #### Test 5.2: Model preserved from first message in coalesced group +> - **Name:** Model preserved from first message in coalesced group +> - **Type:** invariant +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Implementation plan - first message's metadata preserved +> - **Preconditions:** Session exists +> - **Actions:** +> 1. Add tool_use with model 'claude-opus' +> 2. Add tool_result (no model specified) +> - **Expected outcome:** Coalesced message has model 'claude-opus' +> - **Interactions:** None +> +> #### Test 5.3: Status updated to 'running' after coalescing +> - **Name:** Status updated to 'running' after coalescing +> - **Type:** invariant +> - **Disposition:** new +> - **Harness:** Redux slice unit test +> - **Source of Truth:** Existing reducer behavior - status set to 'running' on addAssistantMessage +> - **Preconditions:** Session with status 'idle' +> - **Actions:** +> 1. Add tool_use message +> 2. Add tool_result message +> - **Expected outcome:** Session status is 'running' +> - **Interactions:** None +> +> --- +> +> ### Priority 6: Browser-Use Scenario Test (LLM Judge) +> +> #### Test 6.1: Browser-use verifies tool strip coalescing in live UI +> - **Name:** Browser-use verifies tool strip coalescing in live UI +> - **Type:** scenario +> - **Disposition:** new +> - **Harness:** Browser-use with ChatBrowserUse LLM +> - **Source of Truth:** User's explicit request for browser-use tests with LLM as judge +> - **Preconditions:** Freshell server running, Freshclaude session exists with multiple tool uses +> - **Actions:** +> 1. Open Freshell in browser +> 2. Navigate to Freshclaude session with tool uses +> 3. Examine tool strip display for assistant messages +> 4. Judge whether strips are coalesced +> - **Expected outcome:** Output line `TOOL_COALESCE_RESULT: PASS` if one strip per turn, `TOOL_COALESCE_RESULT: FAIL - ` otherwise +> - **Interactions:** Browser automation, LLM judgment,> +> #### Test 6.2: Browser-use parsing utilities unit tests +> - **Name:** Browser-use parsing utilities unit tests +> - **Type:** unit +> - **Disposition:** new +> - **Harness:** Python pytest +> - **Source of Truth:** Browser-use test contract enforcement +> - **Preconditions:** None +> - **Actions:** +> 1. Test `_parse_result("TOOL_COALESCE_RESULT: PASS")` returns (True, None) +> 2. Test `_parse_result("TOOL_COALESCE_RESULT: FAIL - reason")` returns (False, None) +> 3. Test `_parse_result("")` returns (False, "missing_final_result") +> 4. Test `_parse_result("PASS\nextra")` returns (False, "final_result_not_single_line") +> - **Expected outcome:** All parsing tests pass +> - **Interactions:** None +> +> --- +> +> ## Coverage Summary +> +> | Area | Coverage | Priority | +> |------|---------|----------| +> | Redux coalescing logic | Tests 1.1-1.5, 4.1-4.3, 5.1-5.3 | 1, 4, 5 | +> | JSONL loader coalescing | Tests 1.6-1.9, 4.4 | 1, 4 | +> | Component rendering with coalesced data | Tests 3.1-3.2 | 3 | +> | End-to-end browser verification | Tests 6.1-6.2 | 6 | +> | Existing MessageBubble tests | Unchanged | 2 | +> +> ### Explicitly Excluded +> +> | Area | Reason | Risk | +> |------|--------|------| +> | Server-side WebSocket message handling | Not in scope - fix is client-side Redux | Low - WebSocket sends individual messages, coalescing happens after | +> | `sdk-bridge.ts` changes | Not modified - coalescing happens at Redux layer | None | +> | `turnBodyReceived` timeline hydration | Uses separate `timelineBodies` storage, not live messages | Low - timeline is read-only view | +> | Performance testing | Low risk - coalescing is O(1) per message, no loops | None | +> +> ### Test Count by Type +> +> | Type | Count | +> |------|-------| +> | regression | 3 | +> | boundary | 7 | +> | invariant | 3 | +> | integration | 2 | +> | scenario | 1 | +> | unit | 1 | +> | **Total** | **17** | +> +> --- +> +> ## Execution Order +> +> 1. **Phase 1: Red checks** (Tests 1.1-1.9) - Verify the fix works for the reported bug +> 2. **Phase 2: Run existing tests** - Verify no regressions in MessageBubble rendering +> 3. **Phase 3: Integration tests** (Tests 3.1-3.2) - Verify end-to-end flow +> 4. **Phase 4: Boundary cases** (Tests 4.1-4.4) - Verify edge cases +> 5. **Phase 5: Invariant tests** (Tests 5.1-5.3) - Verify properties preserved +> 6. **Phase 6: Browser-use** (Tests 6.1-6.2) - Verify in real UI with LLM judge diff --git a/docs/plans/2026-03-31-tool-coalesce.md b/docs/plans/2026-03-31-tool-coalesce.md new file mode 100644 index 000000000..e20dd556c --- /dev/null +++ b/docs/plans/2026-03-31-tool-coalesce.md @@ -0,0 +1,854 @@ +# Tool Coalescing Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use trycycle-executing to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Coalesce consecutive tool-only assistant messages into a single message so that multiple tool uses appear in ONE ToolStrip showing "N tools used" instead of multiple separate "1 tool used" strips. + +**Architecture:** The SDK sends separate `assistant` messages (one per tool_use block). The fix intercepts at two points: (1) the Redux `addAssistantMessage` reducer for live messages, and (2) the `extractChatMessagesFromJsonl` function for restored sessions. Both locations check if the previous message is also a tool-only assistant message, and if so, append content blocks instead of creating a new message. + +**Tech Stack:** Redux Toolkit, TypeScript, Vitest, Testing Library, browser-use (Python) + +--- + +## Root Cause Analysis + +The Claude SDK sends multiple `assistant` messages (one per `tool_use` block). The server's `sdk-bridge.ts` creates a **new Redux message** for each SDK message. Each message becomes a separate `MessageBubble`, each with its own `ToolStrip` showing "1 tool used". + +The `MessageBubble` grouping logic correctly groups consecutive `tool_use` blocks **within a single message's content array**, but cannot merge across separate messages. + +## Strategy + +Fix at the message creation boundary, not the rendering boundary. This is cleaner because: +1. **Single source of truth**: Messages in Redux state are already coalesced +2. **Consistent behavior**: Both live sessions and restored sessions behave identically +3. **Simpler testing**: Unit tests verify the Redux slice and loader, not UI rendering + +--- + +## Files to Modify/Create + +| File | Action | Purpose | +|------|--------|---------| +| `src/store/agentChatSlice.ts` | Modify | Add coalescing logic to `addAssistantMessage` | +| `server/session-history-loader.ts` | Modify | Add coalescing logic to `extractChatMessagesFromJsonl` | +| `test/unit/client/agentChatSlice.test.ts` | Modify | Add tests for coalescing | +| `test/unit/server/session-history-loader.test.ts` | Modify | Add tests for coalescing | +| `test/browser_use/tool_coalesce.py` | Create | Automated browser-use test with LLM judge | +| `test/browser_use/tool_coalesce_test.py` | Create | Unit tests for parsing logic | + +--- + +## Task 1: Implement Coalescing in addAssistantMessage Reducer + +**Files:** +- Modify: `src/store/agentChatSlice.ts` (add helper after imports, modify reducer) + +- [ ] **Step 1: Write the failing test** + +Add test to `test/unit/client/agentChatSlice.test.ts`: + +```typescript +describe('tool message coalescing', () => { + it('coalesces consecutive tool-only assistant messages', () => { + let state = agentChatReducer(initial, sessionCreated({ requestId: 'r', sessionId: 's1' })) + + // First tool-only message + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }], + })) + expect(state.sessions['s1'].messages).toHaveLength(1) + + // Second tool-only message - should coalesce + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [ + { type: 'tool_result', tool_use_id: 't1', content: 'output' }, + { type: 'tool_use', id: 't2', name: 'Read', input: { file_path: 'f.ts' } }, + ], + })) + expect(state.sessions['s1'].messages).toHaveLength(1) + expect(state.sessions['s1'].messages[0].content).toHaveLength(3) + expect(state.sessions['s1'].messages[0].content[0]).toEqual({ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }) + expect(state.sessions['s1'].messages[0].content[1]).toEqual({ type: 'tool_result', tool_use_id: 't1', content: 'output' }) + expect(state.sessions['s1'].messages[0].content[2]).toEqual({ type: 'tool_use', id: 't2', name: 'Read', input: { file_path: 'f.ts' } }) + }) + + it('does not coalesce when previous message has text content', () => { + let state = agentChatReducer(initial, sessionCreated({ requestId: 'r', sessionId: 's1' })) + + // Message with text + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'text', text: 'Hello' }], + })) + expect(state.sessions['s1'].messages).toHaveLength(1) + + // Tool-only message - should NOT coalesce (previous has text) + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }], + })) + expect(state.sessions['s1'].messages).toHaveLength(2) + }) + + it('does not coalesce when new message has text content', () => { + let state = agentChatReducer(initial, sessionCreated({ requestId: 'r', sessionId: 's1' })) + + // Tool-only message + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }], + })) + expect(state.sessions['s1'].messages).toHaveLength(1) + + // Message with text - should NOT coalesce (new has text) + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'text', text: 'Done' }], + })) + expect(state.sessions['s1'].messages).toHaveLength(2) + }) + + it('coalesces multiple consecutive tool-only messages', () => { + let state = agentChatReducer(initial, sessionCreated({ requestId: 'r', sessionId: 's1' })) + + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }], + })) + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_result', tool_use_id: 't1', content: 'file1' }], + })) + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_use', id: 't2', name: 'Read', input: { file_path: 'f.ts' } }], + })) + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_result', tool_use_id: 't2', content: 'content' }], + })) + + expect(state.sessions['s1'].messages).toHaveLength(1) + expect(state.sessions['s1'].messages[0].content).toHaveLength(4) + }) + + it('does not coalesce across user messages', () => { + let state = agentChatReducer(initial, sessionCreated({ requestId: 'r', sessionId: 's1' })) + + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }], + })) + state = agentChatReducer(state, addUserMessage({ sessionId: 's1', text: 'Thanks' })) + state = agentChatReducer(state, addAssistantMessage({ + sessionId: 's1', + content: [{ type: 'tool_use', id: 't2', name: 'Read', input: { file_path: 'f.ts' } }], + })) + + expect(state.sessions['s1'].messages).toHaveLength(3) // assistant, user, assistant + }) +}) +``` + +Run to verify it fails: + +```bash +npm run test:vitest -- test/unit/client/agentChatSlice.test.ts --run -t "tool message coalescing" +``` + +Expected: FAIL - "expected 1 to equal 2" (messages not coalescing) + +- [ ] **Step 2: Run test to verify it fails** + +(Already confirmed above) + +- [ ] **Step 3: Write minimal implementation** + +First, add helper function after the imports in `src/store/agentChatSlice.ts` (after line 9, before `initialState`): + +```typescript +/** Check if content blocks contain only tool_use and tool_result blocks (no text or thinking). */ +function isToolOnlyContent(blocks: ChatContentBlock[]): boolean { + return blocks.length > 0 && blocks.every( + (b) => b.type === 'tool_use' || b.type === 'tool_result' + ) +} +``` + +Then modify the `addAssistantMessage` reducer: + +```typescript +addAssistantMessage(state, action: PayloadAction<{ + sessionId: string + content: ChatContentBlock[] + model?: string +}>) { + const session = state.sessions[action.payload.sessionId] + if (!session) return + + const newContent = action.payload.content + const prevMessage = session.messages[session.messages.length - 1] + + // Coalesce consecutive tool-only assistant messages + if ( + prevMessage?.role === 'assistant' && + isToolOnlyContent(prevMessage.content) && + isToolOnlyContent(newContent) + ) { + // Append content blocks to previous message instead of creating new one + prevMessage.content = [...prevMessage.content, ...newContent] + } else { + session.messages.push({ + role: 'assistant', + content: newContent, + timestamp: new Date().toISOString(), + model: action.payload.model, + }) + } + session.status = 'running' +}, +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +npm run test:vitest -- test/unit/client/agentChatSlice.test.ts --run -t "tool message coalescing" +``` + +Expected: All 5 coalescing tests pass + +- [ ] **Step 5: Refactor and verify** + +Run full agentChatSlice test suite and typecheck: + +```bash +npm run test:vitest -- test/unit/client/agentChatSlice.test.ts --run +npx tsc --noEmit -p src/tsconfig.json +``` + +Expected: All tests pass, no type errors + +- [ ] **Step 6: Commit** + +```bash +git add src/store/agentChatSlice.ts test/unit/client/agentChatSlice.test.ts +git commit -m "feat(agent-chat): coalesce consecutive tool-only assistant messages" +``` + +--- + +## Task 2: Implement Coalescing in Session History Loader + +**Files:** +- Modify: `server/session-history-loader.ts` +- Modify: `test/unit/server/session-history-loader.test.ts` + +- [ ] **Step 1: Write the failing test** + +Add test to `test/unit/server/session-history-loader.test.ts`: + +```typescript +describe('tool message coalescing', () => { + it('coalesces consecutive tool-only assistant messages from JSONL', () => { + const content = [ + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{"command":"ls"}}]},"timestamp":"2026-01-01T00:00:01Z"}', + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_result","tool_use_id":"t1","content":"file1\\nfile2"}]},"timestamp":"2026-01-01T00:00:02Z"}', + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t2","name":"Read","input":{"file_path":"f.ts"}}]},"timestamp":"2026-01-01T00:00:03Z"}', + ].join('\n') + + const messages = extractChatMessagesFromJsonl(content) + + expect(messages).toHaveLength(1) + expect(messages[0].content).toHaveLength(3) + expect(messages[0].content[0]).toEqual({ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'ls' } }) + expect(messages[0].content[1]).toEqual({ type: 'tool_result', tool_use_id: 't1', content: 'file1\nfile2' }) + expect(messages[0].content[2]).toEqual({ type: 'tool_use', id: 't2', name: 'Read', input: { file_path: 'f.ts' } }) + }) + + it('does not coalesce when assistant message has text content', () => { + const content = [ + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"Hello"}]},"timestamp":"2026-01-01T00:00:01Z"}', + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{"command":"ls"}}]},"timestamp":"2026-01-01T00:00:02Z"}', + ].join('\n') + + const messages = extractChatMessagesFromJsonl(content) + + expect(messages).toHaveLength(2) + }) + + it('does not coalesce across user messages', () => { + const content = [ + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{"command":"ls"}}]},"timestamp":"2026-01-01T00:00:01Z"}', + '{"type":"user","message":{"role":"user","content":[{"type":"text","text":"Thanks"}]},"timestamp":"2026-01-01T00:00:02Z"}', + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t2","name":"Read","input":{"file_path":"f.ts"}}]},"timestamp":"2026-01-01T00:00:03Z"}', + ].join('\n') + + const messages = extractChatMessagesFromJsonl(content) + + expect(messages).toHaveLength(3) + }) + + it('preserves timestamp from first message in coalesced group', () => { + const content = [ + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{}}]},"timestamp":"2026-01-01T00:00:01Z"}', + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_result","tool_use_id":"t1","content":"output"}]},"timestamp":"2026-01-01T00:00:02Z"}', + ].join('\n') + + const messages = extractChatMessagesFromJsonl(content) + + expect(messages).toHaveLength(1) + expect(messages[0].timestamp).toBe('2026-01-01T00:00:01Z') + }) +}) +``` + +Run to verify it fails: + +```bash +npm run test:vitest -- test/unit/server/session-history-loader.test.ts --run -t "tool message coalescing" +``` + +Expected: FAIL - messages not coalescing + +- [ ] **Step 2: Run test to verify it fails** + +(Already confirmed above) + +- [ ] **Step 3: Write minimal implementation** + +Modify `extractChatMessagesFromJsonl` in `server/session-history-loader.ts`: + +```typescript +export function extractChatMessagesFromJsonl(content: string): ChatMessage[] { + const lines = content.split(/\r?\n/).filter(Boolean) + const messages: ChatMessage[] = [] + + /** Check if content blocks contain only tool_use and tool_result blocks. */ + const isToolOnly = (blocks: ContentBlock[]): boolean => + blocks.length > 0 && blocks.every( + (b) => b.type === 'tool_use' || b.type === 'tool_result' + ) + + for (const line of lines) { + let obj: any + try { + obj = JSON.parse(line) + } catch { + continue + } + + if (obj.type !== 'user' && obj.type !== 'assistant') continue + + const role = obj.type as 'user' | 'assistant' + const timestamp = obj.timestamp as string | undefined + const msg = obj.message + + let newContent: ContentBlock[] + let newMessage: ChatMessage + + if (typeof msg === 'string') { + newContent = [{ type: 'text', text: msg }] + newMessage = { + role, + content: newContent, + ...(timestamp ? { timestamp } : {}), + } + } else if (msg && typeof msg === 'object' && Array.isArray(msg.content)) { + newContent = msg.content as ContentBlock[] + newMessage = { + role: msg.role || role, + content: newContent, + ...(timestamp ? { timestamp } : {}), + ...(msg.model ? { model: msg.model } : {}), + } + } else { + continue + } + + // Coalesce consecutive tool-only assistant messages + const prevMessage = messages[messages.length - 1] + if ( + prevMessage?.role === 'assistant' && + newMessage.role === 'assistant' && + isToolOnly(prevMessage.content) && + isToolOnly(newMessage.content) + ) { + // Append content blocks to previous message + prevMessage.content = [...prevMessage.content, ...newMessage.content] + } else { + messages.push(newMessage) + } + } + + return messages +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +npm run test:vitest -- test/unit/server/session-history-loader.test.ts --run -t "tool message coalescing" +``` + +Expected: All 4 coalescing tests pass + +- [ ] **Step 5: Refactor and verify** + +Run full test suite and typecheck: + +```bash +npm run test:vitest -- test/unit/server/session-history-loader.test.ts --run +npx tsc --noEmit -p server/tsconfig.json +``` + +Expected: All tests pass, no type errors + +- [ ] **Step 6: Commit** + +```bash +git add server/session-history-loader.ts test/unit/server/session-history-loader.test.ts +git commit -m "feat(server): coalesce consecutive tool-only assistant messages in JSONL loader" +``` + +--- + +## Task 3: Run Full Test Suite + +**Files:** +- None (verification only) + +- [ ] **Step 1: Run full unit test suite** + +```bash +npm run test:vitest -- test/unit --run +``` + +Expected: All tests pass + +- [ ] **Step 2: Run typecheck** + +```bash +npm run check +``` + +Expected: All checks pass + +- [ ] **Step 3: Run lint** + +```bash +npm run lint +``` + +Expected: No errors + +- [ ] **Step 4: Commit if any fixes were needed** + +(Only if fixes were made) + +```bash +git add -A +git commit -m "fix: address test/typecheck/lint issues from tool coalescing" +``` + +--- + +## Task 4: Create Automated Browser-Use Test + +**Files:** +- Create: `test/browser_use/tool_coalesce.py` +- Create: `test/browser_use/tool_coalesce_test.py` + +This task creates an **automated** browser-use test following the repo's established pattern (`smoke_freshell.py`). The test uses an LLM as judge to verify tool strip coalescing. + +- [ ] **Step 1: Create the test parsing utilities** + +Create `test/browser_use/tool_coalesce_test.py` with unit tests for result parsing: + +```python +import unittest +from tool_coalesce import _parse_result + + +class ToolCoalesceParseTest(unittest.TestCase): + def test_pass_requires_exact_single_line(self) -> None: + ok, err = _parse_result("TOOL_COALESCE_RESULT: PASS") + self.assertTrue(ok) + self.assertIsNone(err) + + def test_pass_with_extra_text_is_invalid(self) -> None: + ok, err = _parse_result("TOOL_COALESCE_RESULT: PASS. extra") + self.assertFalse(ok) + self.assertEqual(err, "final_result_invalid_format") + + def test_fail_requires_reason(self) -> None: + ok, err = _parse_result("TOOL_COALESCE_RESULT: FAIL - multiple strips found") + self.assertFalse(ok) + self.assertIsNone(err) + + def test_empty_is_invalid(self) -> None: + ok, err = _parse_result("") + self.assertFalse(ok) + self.assertEqual(err, "missing_final_result") + + def test_multiple_lines_is_invalid(self) -> None: + ok, err = _parse_result("TOOL_COALESCE_RESULT: PASS\nmore") + self.assertFalse(ok) + self.assertEqual(err, "final_result_not_single_line") + + +if __name__ == "__main__": + raise SystemExit(unittest.main()) +``` + +- [ ] **Step 2: Run parsing tests to verify they fail (module not created yet)** + +```bash +cd /home/user/code/freshell/.worktrees/tool-coalesce/test/browser_use +python -m pytest tool_coalesce_test.py -v 2>&1 || echo "Expected to fail - module not created" +``` + +Expected: Import error (module not created yet) + +- [ ] **Step 3: Create the main browser-use test script** + +Create `test/browser_use/tool_coalesce.py`: + +```python +#!/usr/bin/env python3 +""" +Browser-use test for tool strip coalescing in Freshclaude. + +Verifies that consecutive tool uses in an assistant turn appear as ONE +strip showing "N tools used" instead of multiple separate "1 tool used" strips. +Uses an LLM as judge to evaluate the visual result. +""" + +from __future__ import annotations + +import argparse +import asyncio +import os +import sys +import logging +import urllib.request +from pathlib import Path + +# Add parent directory for shared utilities +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from smoke_utils import ( + JsonLogger, + build_target_url, + default_base_url, + env_or, + find_upwards, + load_dotenv, + monotonic_timer, + redact_url, + redact_text, + require, + token_fingerprint, +) + + +def _parse_result(final_text: str) -> tuple[bool, str | None]: + """ + Enforce strict output contract: + - Exactly one line + - Exactly "TOOL_COALESCE_RESULT: PASS" + or "TOOL_COALESCE_RESULT: FAIL - " + """ + text = (final_text or "").strip() + if not text: + return False, "missing_final_result" + + lines = [ln.strip() for ln in text.splitlines() if ln.strip()] + if len(lines) != 1: + return False, "final_result_not_single_line" + + line = lines[0] + if line == "TOOL_COALESCE_RESULT: PASS": + return True, None + if line.startswith("TOOL_COALESCE_RESULT: FAIL - ") and len(line) > len("TOOL_COALESCE_RESULT: FAIL - "): + return False, None + return False, "final_result_invalid_format" + + +def _build_task(*, base_url: str) -> str: + return f""" +You are testing the tool strip coalescing feature in a Freshell Freshclaude session. + +The app is already opened and authenticated at {base_url}. + +Your goal: Verify that when an assistant turn uses multiple tools, they appear grouped in ONE tool strip showing "N tools used" instead of multiple separate "1 tool used" strips. + +Steps: + +1. Open the sidebar if it's collapsed. Look for Freshclaude sessions (they have a different icon than shell tabs). + +2. Find and click on a Freshclaude session that has assistant messages with tool uses. Look for sessions that show tool indicators or message counts. + +3. If no session with multiple tool uses exists, skip to step 7 and report what you found. + +4. Once in a session, look at the assistant messages that contain tool uses. Find a message where the assistant used 2 or more tools in a single turn. + +5. Examine the tool strip display for that message: + - A tool strip should be visible showing either "N tools used" (collapsed) or individual tool blocks (expanded) + - Count how many separate tool-related text lines are visible (e.g., "1 tool used", "2 tools used", etc.) + +6. Judge the result: + - PASS: You see ONE tool strip per assistant turn, showing the combined count (e.g., "2 tools used" or "3 tools used") + - FAIL: You see MULTIPLE separate lines each showing "1 tool used" for tools that should be grouped + +7. Report your findings as exactly one line: + - If tool strips are correctly coalesced: TOOL_COALESCE_RESULT: PASS + - If you see multiple separate "1 tool used" strips: TOOL_COALESCE_RESULT: FAIL - multiple strips found + - If no suitable session found: TOOL_COALESCE_RESULT: FAIL - no session with tools + +Non-negotiable constraints: +- Do not create or write any files +- Stay in ONE browser tab +- Output exactly one result line at the end +""" + + +async def _run(args: argparse.Namespace) -> int: + repo_root = Path(__file__).resolve().parents[2] + dotenv_path = find_upwards(repo_root, ".env") + dotenv = load_dotenv(dotenv_path) if dotenv_path else {} + + log = JsonLogger(min_level=("debug" if args.debug else "info")) + + if args.require_api_key and not os.environ.get("BROWSER_USE_API_KEY"): + log.error("Missing BROWSER_USE_API_KEY", event="missing_browser_use_api_key") + return 2 + + base_url = args.base_url or default_base_url(dotenv) + token = env_or(args.token, "AUTH_TOKEN") or dotenv.get("AUTH_TOKEN") + try: + token = require("AUTH_TOKEN", token) + except ValueError as e: + log.error(str(e), event="missing_auth_token") + return 2 + + model = env_or(args.model, "BROWSER_USE_MODEL") or "bu-latest" + target_url = build_target_url(base_url, token) + redacted_target_url = redact_url(target_url) + + log.info( + "Tool coalesce test start", + event="test_start", + baseUrl=base_url, + tokenFp=token_fingerprint(token), + model=model, + headless=args.headless, + ) + + if args.preflight: + health_url = f"{base_url.rstrip('/')}/api/health" + try: + with urllib.request.urlopen(health_url, timeout=3) as resp: + log.info("Preflight ok", event="preflight_ok", url=health_url) + except Exception as e: + log.error("Preflight failed", event="preflight_failed", url=health_url, error=str(e)) + return 1 + + from browser_use import Agent, Browser, ChatBrowserUse # type: ignore + + llm = ChatBrowserUse(model=model) + browser = Browser( + headless=args.headless, + window_size={"width": args.width, "height": args.height}, + viewport={"width": args.width, "height": args.height}, + no_viewport=False, + ) + browser_started = False + + try: + log.info("Pre-opening target URL", event="preopen_target", targetUrl=redacted_target_url) + await browser.start() + browser_started = True + + # Navigate to authenticated URL + page = await browser.new_page(target_url) + + log.info("Target URL opened", event="preopen_target_ok") + except Exception as e: + log.error("Failed to pre-open target URL", event="preopen_target_failed", error=str(e)) + if browser_started: + try: + await browser.stop() + except Exception: + pass + return 1 + + task = _build_task(base_url=base_url) + + agent = Agent( + task=task.strip(), + llm=llm, + browser=browser, + use_vision=True, + max_actions_per_step=2, + directly_open_url=False, + ) + + _start, elapsed_s = monotonic_timer() + try: + log.info("Agent run start", event="agent_run_start", maxSteps=args.max_steps) + history = await agent.run(max_steps=args.max_steps) + finally: + if browser_started: + try: + await browser.stop() + except Exception: + pass + + log.info("Agent finished", event="agent_finished", elapsedS=round(elapsed_s(), 2)) + + final_result_fn = getattr(history, "final_result", None) + final = final_result_fn() if callable(final_result_fn) else None + final_text = str(final or "").strip() + + ok, parse_err = _parse_result(final_text) + if parse_err: + log.error("Invalid final_result format", event="invalid_final_result", error=parse_err, text=final_text[:500]) + return 1 + if ok: + log.info("TOOL_COALESCE_RESULT: PASS", event="test_pass") + return 0 + log.error("TOOL_COALESCE_RESULT: FAIL", event="test_fail", reason=final_text[:500]) + return 1 + + +def main(argv: list[str]) -> int: + p = argparse.ArgumentParser(description="Browser-use test for tool strip coalescing.") + p.add_argument("--base-url", default=None, help="Base URL") + p.add_argument("--token", default=None, help="Auth token") + p.add_argument("--model", default=None, help="Browser Use model") + p.add_argument("--headless", action="store_true", help="Run headless") + p.add_argument("--width", type=int, default=1024) + p.add_argument("--height", type=int, default=768) + p.add_argument("--max-steps", type=int, default=30) + p.add_argument("--preflight", action="store_true", help="Check /api/health first") + p.add_argument("--debug", action="store_true") + p.add_argument("--no-require-api-key", dest="require_api_key", action="store_false") + p.set_defaults(require_api_key=True) + args = p.parse_args(argv) + try: + return asyncio.run(_run(args)) + except KeyboardInterrupt: + return 130 + except Exception: + sys.stderr.write(__import__("traceback").format_exc()) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) +``` + +- [ ] **Step 4: Run parsing tests to verify they pass** + +```bash +cd /home/user/code/freshell/.worktrees/tool-coalesce/test/browser_use +python -m pytest tool_coalesce_test.py -v +``` + +Expected: All parsing tests pass + +- [ ] **Step 5: Make the test script executable and verify syntax** + +```bash +chmod +x /home/user/code/freshell/.worktrees/tool-coalesce/test/browser_use/tool_coalesce.py +python -m py_compile /home/user/code/freshell/.worktrees/tool-coalesce/test/browser_use/tool_coalesce.py +``` + +Expected: No syntax errors + +- [ ] **Step 6: Commit** + +```bash +git add test/browser_use/tool_coalesce.py test/browser_use/tool_coalesce_test.py +git commit -m "test(browser-use): add automated tool coalescing verification with LLM judge" +``` + +--- + +## Task 5: Final Verification and Integration + +**Files:** +- None (verification only) + +- [ ] **Step 1: Run coordinated full test suite** + +```bash +FRESHELL_TEST_SUMMARY="tool-coalesce final verification" npm test +``` + +Expected: All tests pass + +- [ ] **Step 2: Verify git status** + +```bash +git status +git log --oneline -5 +``` + +Expected: Clean working tree, all commits present + +- [ ] **Step 3: Summary of changes** + +Files changed: +- `src/store/agentChatSlice.ts` - Coalescing in `addAssistantMessage` + helper function +- `server/session-history-loader.ts` - Coalescing in `extractChatMessagesFromJsonl` +- `test/unit/client/agentChatSlice.test.ts` - Unit tests for Redux coalescing +- `test/unit/server/session-history-loader.test.ts` - Unit tests for loader coalescing +- `test/browser_use/tool_coalesce.py` - Automated browser-use test with LLM judge +- `test/browser_use/tool_coalesce_test.py` - Unit tests for parsing logic + +--- + +## Edge Cases and Invariants + +### Invariants Preserved + +1. **Message order is preserved** - Coalescing only appends content, never reorders +2. **User messages break coalescing** - Tool-only assistant messages on opposite sides of a user message stay separate +3. **Text content breaks coalescing** - Any text or thinking block prevents coalescing +4. **Timestamp from first message** - Coalesced message keeps timestamp of first message in group + +### Edge Cases Handled + +1. **Empty content array** - `isToolOnlyContent` returns false for empty arrays (no coalescing) +2. **Mixed content** - Text + tool_use in same message prevents coalescing with next +3. **Session not found** - Reducer returns early without modification +4. **Malformed JSONL** - Parser skips bad lines (existing behavior preserved) + +### Regression Risks + +1. **Timeline body hydration** - Uses same `ChatMessage` type, but `turnBodyReceived` stores separately (not affected) +2. **Message count in UI** - Count may decrease due to coalescing (expected behavior) +3. **Timestamp display** - First timestamp preserved (may differ from last tool's timestamp) + +--- + +## Rollback Plan + +If issues arise after merge: + +1. Revert the implementation commits +2. The coalescing logic is isolated to two functions - easy to disable by removing the `if` check +3. No database migrations or schema changes involved + +--- + +## References + +- `server/sdk-bridge.ts` - Where SDK assistant messages are created +- `src/lib/sdk-message-handler.ts` - Where `sdk.assistant` dispatches `addAssistantMessage` +- `src/components/agent-chat/MessageBubble.tsx` - Tool grouping logic within single message +- `test/unit/client/components/agent-chat/MessageBubble.test.tsx` - Existing grouping tests +- `test/browser_use/smoke_freshell.py` - Pattern for browser-use tests in this repo diff --git a/docs/plans/2026-04-05-opencode-busy-finished-status.md b/docs/plans/2026-04-05-opencode-busy-finished-status.md new file mode 100644 index 000000000..f335e2bca --- /dev/null +++ b/docs/plans/2026-04-05-opencode-busy-finished-status.md @@ -0,0 +1,563 @@ +# OpenCode Busy/Finished Status Implementation Plan + +> **For Claude:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make Freshell reflect OpenCode terminal busy vs finished state reliably by using OpenCode's own localhost status API from spawn time through websocket delivery to the UI. + +**Architecture:** Each OpenCode terminal must be launched with an explicit localhost control endpoint that is threaded through `providerSettings.opencodeServer`, not through a new positional `buildSpawnSpec()` parameter. `providerSettings` already flows from async callsites into `buildSpawnSpec()` and then `resolveCodingCliCommand()`, and `resolveCodingCliCommand()` is the only seam that feeds Unix, WSL, `cmd.exe`, and PowerShell launch paths. Server-side tracking should use OpenCode's documented `/global/health`, `/session/status`, and per-instance `/event` SSE stream as the authoritative busy source. Do not parse terminal output for busy or finished state, and do not add heuristic fallbacks. + +**Verified upstream contract:** +- OpenCode CLI docs show `--hostname` and `--port` on the TUI entrypoint, which is what Freshell launches for `mode === 'opencode'`: +- OpenCode server docs show: + - `GET /global/health` + - `GET /session/status` + - `GET /event` + - `GET /doc` for the OpenAPI spec + - `GET /global/event` also exists, but it is the global wrapper stream and is not needed for one Freshell terminal talking to one OpenCode instance: +- OpenCode's generated SDK types define: + - `SessionStatus = { type: "idle" } | { type: "retry", ... } | { type: "busy" }` + - `EventSessionStatus = { type: "session.status", properties: { sessionID, status } }` + - `EventSessionIdle = { type: "session.idle", properties: { sessionID } }` + - `EventServerConnected = { type: "server.connected", ... }` + Source: + +**Implementation note:** If the launched OpenCode instance's `/doc` output disagrees with the upstream docs above, stop and update this plan plus the contract tests before proceeding. + +**Tech Stack:** Node.js, Express, ws, Redux Toolkit, React 18, Vitest, Testing Library, supertest, OpenCode CLI local server endpoints. + +--- + +## File Structure + +**Create** +- `server/local-port.ts` + Async helper that allocates an ephemeral localhost port for OpenCode launch callsites. +- `server/coding-cli/opencode-activity-tracker.ts` + Server-side monitor that waits for health, snapshots current busy state, consumes the OpenCode SSE event stream, and emits normalized `{ upsert, remove }` changes per terminal. +- `server/coding-cli/opencode-activity-wiring.ts` + Lifecycle glue between `TerminalRegistry` events and `OpencodeActivityTracker`. +- `src/store/opencodeActivitySlice.ts` + Redux slice mirroring the snapshot and mutation ordering protections already used by `codexActivity`. +- `test/unit/server/coding-cli/opencode-activity-tracker.test.ts` + Unit coverage for health wait, snapshot bootstrap, idle removal, reconnect backoff, and terminal teardown. +- `test/server/ws-opencode-activity.test.ts` + WebSocket protocol coverage for list and update messages. +- `test/unit/client/store/opencodeActivitySlice.test.ts` + Reducer coverage for stale snapshot ordering, upsert/remove mutation ordering, and reset behavior. + +**Modify** +- `server/terminal-registry.ts` + Extend `ProviderSettings` with `opencodeServer`, require it for OpenCode launches, inject `--hostname` and `--port` inside `resolveCodingCliCommand()`, and store the endpoint on terminal records. +- `server/ws-handler.ts` + Allocate OpenCode control ports before `registry.create()`, merge them into `providerSettings`, serve `opencode.activity.list`, and broadcast `opencode.activity.updated`. +- `server/agent-api/router.ts` + Allocate OpenCode control ports for `/tabs`, `/run`, split, and respawn routes before terminal creation and merge them into `providerSettings`. +- `server/index.ts` + Construct the OpenCode tracker wiring, expose list snapshots to websocket clients, broadcast tracker mutations, and dispose the wiring on shutdown. +- `shared/ws-protocol.ts` + Add `opencode.activity.list`, `opencode.activity.list.response`, and `opencode.activity.updated` schemas/types. +- `src/store/store.ts` + Register the new `opencodeActivity` reducer. +- `src/App.tsx` + Bootstrap the OpenCode activity snapshot, handle websocket deltas, and reset stale overlay state on reconnect/disconnect. +- `src/lib/pane-activity.ts` + Treat exact-match OpenCode busy records as blue activity and include them in busy session-key collection. +- `src/components/panes/PaneContainer.tsx` + Feed OpenCode activity into per-pane activity resolution. +- `src/components/TabBar.tsx` + Feed OpenCode activity into busy-tab computation. +- `src/components/TabSwitcher.tsx` + Feed OpenCode activity into mobile switcher busy badges. +- `src/components/MobileTabStrip.tsx` + Feed OpenCode activity into the active-tab busy badge. +- `src/components/Sidebar.tsx` + Feed OpenCode activity into busy session highlighting. +- `test/unit/server/terminal-registry.test.ts` + Lock the OpenCode launch flags, provider-settings threading, and explicit-endpoint requirement. +- `test/server/agent-tabs-write.test.ts` + Assert `/api/tabs` allocates and passes an OpenCode control endpoint. +- `test/server/agent-run.test.ts` + Assert `/api/run` still works when the launched mode is `opencode`. +- `test/unit/client/components/App.ws-bootstrap.test.tsx` + Cover OpenCode snapshot request, reset-on-ready, reset-on-disconnect, and stale snapshot ordering. +- `test/unit/client/lib/pane-activity.test.ts` + Cover exact-match OpenCode busy semantics and busy session-key collection. +- `test/unit/client/components/panes/PaneContainer.test.tsx` + Cover per-pane OpenCode busy resolution where pane runtime activity used to be the only source. +- `test/unit/client/components/TabBar.test.tsx` + Cover OpenCode blue-tab behavior. +- `test/unit/client/components/TabSwitcher.test.tsx` + Cover OpenCode busy badges in the switcher. +- `test/unit/client/components/MobileTabStrip.test.tsx` + Cover OpenCode busy badge on the active mobile tab. +- `test/unit/client/components/Sidebar.test.tsx` + Cover busy OpenCode session highlighting. +- `test/e2e/pane-activity-indicator-flow.test.tsx` + Add an OpenCode pane/tab busy-indicator flow. +- `test/e2e/opencode-startup-probes.test.tsx` + Guard against OpenCode startup regressions after adding control-port launch args. + +**Explicit non-goals** +- No terminal-text parsing for busy/finished state. +- No generic multi-provider activity refactor beyond small local helpers needed to avoid repetition. +- No `docs/index.html` update unless the visible UX changes beyond the existing busy-indicator behavior. + +## Architectural Guardrails + +- OpenCode server state is authoritative. Use: + - `/global/health` to know when the embedded server is reachable. + - `/session/status` to seed the current busy snapshot. + - `/event` to receive live per-instance transitions. Ignore the initial `server.connected` event. +- Do not use `/global/event` for this feature. It wraps events with `directory` and is unnecessary when a Freshell terminal owns exactly one OpenCode instance. +- Normalize every non-idle OpenCode session status into a single UI-facing concept: `busy`. +- Remove activity records when OpenCode reports idle, a resnapshot after reconnect no longer shows a busy session, or the terminal exits. +- Require an explicit OpenCode control endpoint at spawn time. If it is missing, throw a clear error instead of silently falling back. +- Preserve Codex behavior exactly. OpenCode is additive, not a rewrite of the Codex path. +- Use concrete retry policy: + - health probe: poll every `200ms`, fail startup after `15_000ms` + - SSE reconnect: exponential backoff starting at `250ms`, doubling to a `5_000ms` cap, with small jitter + - stop retrying immediately once the terminal exits or the monitor is disposed + +## Chunk 1: Server Launch And Activity Authority + +### Task 1: Require And Pass An OpenCode Control Endpoint At Spawn Time + +**Files:** +- Create: `server/local-port.ts` +- Modify: `server/terminal-registry.ts` +- Modify: `server/ws-handler.ts` +- Modify: `server/agent-api/router.ts` +- Test: `test/unit/server/terminal-registry.test.ts` +- Test: `test/server/agent-tabs-write.test.ts` +- Test: `test/server/agent-run.test.ts` + +- [ ] **Step 1: Write the failing launch-contract tests** + +```ts +expect(buildSpawnSpec( + 'opencode', + '/repo/app', + 'system', + undefined, + { + opencodeServer: { hostname: '127.0.0.1', port: 43123 }, + }, + undefined, + 'term-oc', +).args).toEqual(expect.arrayContaining([ + '--hostname', '127.0.0.1', + '--port', '43123', +])) + +await request(app) + .post('/api/tabs') + .send({ mode: 'opencode', name: 'OpenCode' }) + +expect(registry.create).toHaveBeenCalledWith(expect.objectContaining({ + mode: 'opencode', + providerSettings: expect.objectContaining({ + opencodeServer: { + hostname: '127.0.0.1', + port: expect.any(Number), + }, + }), +})) +``` + +- [ ] **Step 2: Run the focused server tests and verify they fail** + +Run: `npm run test:vitest -- test/unit/server/terminal-registry.test.ts test/server/agent-tabs-write.test.ts test/server/agent-run.test.ts` + +Expected: FAIL because OpenCode launch args do not include `--hostname`/`--port`, and OpenCode callsites do not pass `providerSettings.opencodeServer`. + +- [ ] **Step 3: Implement the minimal launch-path changes** + +```ts +export type OpencodeServerEndpoint = { + hostname: '127.0.0.1' + port: number +} + +type ProviderSettings = { + permissionMode?: string + model?: string + sandbox?: string + opencodeServer?: OpencodeServerEndpoint +} + +if (mode === 'opencode') { + if (!providerSettings?.opencodeServer) { + throw new Error('OpenCode launch requires an allocated localhost control endpoint.') + } + settingsArgs.push( + '--hostname', providerSettings.opencodeServer.hostname, + '--port', String(providerSettings.opencodeServer.port), + ) +} +``` + +Implementation notes: +- Keep `TerminalRegistry.create()` synchronous by allocating ports in async callsites before `registry.create(...)`. +- Do not add a new positional parameter to `buildSpawnSpec()`. +- Inject the OpenCode launch flags inside `resolveCodingCliCommand()`, because that command spec is what flows through Unix, WSL, `cmd.exe`, and PowerShell paths today. +- Keep the hostname pinned to the IPv4 loopback literal `'127.0.0.1'` intentionally. Do not broaden this to `localhost` or `::1` without an explicit design change, because the goal here is a private, consistent, single-stack control endpoint across all shell launch paths. +- Route callsites should merge the endpoint into existing provider settings, not replace them: + +```ts +const providerSettings = await resolveProviderSettings(mode, configStore, overrides) +const finalProviderSettings = mode === 'opencode' + ? { ...providerSettings, opencodeServer: await allocateLocalhostPort() } + : providerSettings +``` + +- Update direct OpenCode test fixtures that call `registry.create({ mode: 'opencode' })` or `buildSpawnSpec('opencode', ...)` to pass a stub endpoint under `providerSettings.opencodeServer`. + +- [ ] **Step 4: Run the focused server tests and verify they pass** + +Run: `npm run test:vitest -- test/unit/server/terminal-registry.test.ts test/server/agent-tabs-write.test.ts test/server/agent-run.test.ts` + +Expected: PASS with OpenCode routes explicitly allocating and forwarding a control endpoint through `providerSettings`. + +- [ ] **Step 5: Commit** + +```bash +git add server/local-port.ts server/terminal-registry.ts server/ws-handler.ts server/agent-api/router.ts test/unit/server/terminal-registry.test.ts test/server/agent-tabs-write.test.ts test/server/agent-run.test.ts +git commit -m "feat: launch opencode with explicit control endpoint" +``` + +### Task 2: Track OpenCode Busy And Finished State From Its Own Server API + +**Files:** +- Create: `server/coding-cli/opencode-activity-tracker.ts` +- Create: `server/coding-cli/opencode-activity-wiring.ts` +- Modify: `server/index.ts` +- Modify: `shared/ws-protocol.ts` +- Modify: `server/ws-handler.ts` +- Test: `test/unit/server/coding-cli/opencode-activity-tracker.test.ts` +- Test: `test/server/ws-opencode-activity.test.ts` + +- [ ] **Step 1: Write the failing tracker and websocket protocol tests** + +```ts +expect(changes).toContainEqual({ + upsert: [{ + terminalId: 'term-oc', + sessionId: 'session-oc', + phase: 'busy', + updatedAt: expect.any(Number), + }], + remove: [], +}) + +ws.send(JSON.stringify({ type: 'opencode.activity.list', requestId: 'req-oc-1' })) +const response = await waitForMessage(ws, (msg) => ( + msg.type === 'opencode.activity.list.response' && msg.requestId === 'req-oc-1' +)) +expect(response.terminals).toEqual(sampleActivity) +``` + +Add tracker cases for: +- health endpoint not ready yet, then becoming healthy within `15_000ms` +- startup timeout when `/global/health` never becomes healthy +- initial `/session/status` showing one busy session +- ignoring the initial `server.connected` SSE event +- `session.status` with `busy` or `retry` upserting the record +- `session.idle` or `session.status` with `idle` removing the record +- `session.idle` for a different `sessionID` not clearing the currently tracked session +- SSE disconnect followed by reconnect, backoff, resnapshot, and resubscribe +- terminal exit removing monitor state and cancelling retry timers + +- [ ] **Step 2: Run the focused tracker tests and verify they fail** + +Run: `npm run test:vitest -- test/unit/server/coding-cli/opencode-activity-tracker.test.ts test/server/ws-opencode-activity.test.ts` + +Expected: FAIL because no OpenCode tracker, websocket message types, or broadcasts exist. + +- [ ] **Step 3: Implement the tracker, wiring, and websocket contract** + +```ts +type OpencodeActivityRecord = { + terminalId: string + sessionId?: string + phase: 'busy' + updatedAt: number +} + +const HEALTH_POLL_MS = 200 +const HEALTH_TIMEOUT_MS = 15_000 +const RECONNECT_BASE_MS = 250 +const RECONNECT_MAX_MS = 5_000 + +if (event.type === 'session.status' && event.properties.status.type !== 'idle') { + this.upsertBusy(terminalId, event.properties.sessionID, now()) +} + +if (event.type === 'session.idle' || ( + event.type === 'session.status' && event.properties.status.type === 'idle' +)) { + this.removeBusy(terminalId) +} +``` + +Implementation notes: +- Use one tracker monitor per OpenCode terminal record. +- Start monitoring from `terminal.created` when `record.mode === 'opencode'` and `record.providerSettings?.opencodeServer` or equivalent stored endpoint data exists. +- If current terminal records do not already retain provider settings, store only the endpoint data needed for runtime monitoring. +- Validate the OpenCode SSE payload boundary with local Zod schemas before mutating tracker state. Unknown or malformed payloads should be logged and ignored, not trusted. +- On reconnect, re-run `/session/status` before resubscribing to `/event`. +- Back off reconnect attempts with capped exponential backoff plus jitter. Do not spin in a tight loop if OpenCode exits or crashes. +- Only clear a busy record on idle if the event's `sessionID` matches the currently tracked session id, unless no session id is recorded yet. +- Add websocket types: + - client: `opencode.activity.list` + - server: `opencode.activity.list.response` + - server: `opencode.activity.updated` +- Mirror the existing Codex list/broadcast path instead of inventing a second transport pattern. +- Log clear tracker failures with terminal id and endpoint, but do not invent a fallback busy detector. + +- [ ] **Step 4: Run the focused tracker tests and verify they pass** + +Run: `npm run test:vitest -- test/unit/server/coding-cli/opencode-activity-tracker.test.ts test/server/ws-opencode-activity.test.ts` + +Expected: PASS with authenticated websocket updates, bounded reconnect behavior, and tracker removal on idle/exit. + +- [ ] **Step 5: Commit** + +```bash +git add server/coding-cli/opencode-activity-tracker.ts server/coding-cli/opencode-activity-wiring.ts server/index.ts shared/ws-protocol.ts server/ws-handler.ts test/unit/server/coding-cli/opencode-activity-tracker.test.ts test/server/ws-opencode-activity.test.ts +git commit -m "feat: track opencode activity from server events" +``` + +## Chunk 2: Client Activity Overlay And Busy Projection + +### Task 3: Add A Client-Side OpenCode Activity Overlay + +**Files:** +- Create: `src/store/opencodeActivitySlice.ts` +- Modify: `src/store/store.ts` +- Modify: `src/App.tsx` +- Test: `test/unit/client/store/opencodeActivitySlice.test.ts` +- Test: `test/unit/client/components/App.ws-bootstrap.test.tsx` + +- [ ] **Step 1: Write the failing client-store and websocket-bootstrap tests** + +```ts +store.dispatch(setOpencodeActivitySnapshot({ + terminals: [{ terminalId: 'term-oc', sessionId: 'session-oc', phase: 'busy', updatedAt: 20 }], + requestSeq: 3, +})) +expect(store.getState().opencodeActivity.byTerminalId['term-oc']?.phase).toBe('busy') + +expect(wsMocks.send).toHaveBeenCalledWith(expect.objectContaining({ + type: 'opencode.activity.list', +})) +``` + +Add bootstrap cases for: +- requesting `opencode.activity.list` after `ready` +- clearing stale OpenCode activity when bootstrapping onto an already-ready socket +- clearing OpenCode activity on disconnect +- ignoring stale `opencode.activity.list.response` snapshots that arrive after newer mutations + +- [ ] **Step 2: Run the focused client bootstrap tests and verify they fail** + +Run: `npm run test:vitest -- test/unit/client/store/opencodeActivitySlice.test.ts test/unit/client/components/App.ws-bootstrap.test.tsx` + +Expected: FAIL because the store has no `opencodeActivity` slice and `App.tsx` does not request or process OpenCode activity messages. + +- [ ] **Step 3: Implement the client overlay** + +```ts +const opencodeActivitySlice = createSlice({ + name: 'opencodeActivity', + initialState: createInitialState(), + reducers: { + setOpencodeActivitySnapshot, + upsertOpencodeActivity, + removeOpencodeActivity, + resetOpencodeActivity, + }, +}) + +ws.send({ type: 'opencode.activity.list', requestId }) + +if (msg.type === 'opencode.activity.updated') { + dispatch(upsertOpencodeActivity({ terminals: msg.upsert ?? [], mutationSeq })) + dispatch(removeOpencodeActivity({ terminalIds: msg.remove ?? [], mutationSeq })) +} +``` + +Implementation notes: +- Mirror the ordering protections from `codexActivitySlice.ts` instead of sharing provider state. +- It is acceptable to extract a small local helper inside `src/App.tsx` to avoid copy-pasting the request/reset/snapshot bookkeeping for Codex and OpenCode. +- Reset OpenCode overlay state in the same places Codex resets today: on `ready`, on explicit disconnect, and when bootstrapping onto a pre-ready socket. + +- [ ] **Step 4: Run the focused client bootstrap tests and verify they pass** + +Run: `npm run test:vitest -- test/unit/client/store/opencodeActivitySlice.test.ts test/unit/client/components/App.ws-bootstrap.test.tsx` + +Expected: PASS with deterministic snapshot ordering and prompt reset behavior. + +- [ ] **Step 5: Commit** + +```bash +git add src/store/opencodeActivitySlice.ts src/store/store.ts src/App.tsx test/unit/client/store/opencodeActivitySlice.test.ts test/unit/client/components/App.ws-bootstrap.test.tsx +git commit -m "feat: add opencode activity client overlay" +``` + +### Task 4: Feed OpenCode Activity Into The Existing Busy Indicators + +**Files:** +- Modify: `src/lib/pane-activity.ts` +- Modify: `src/components/panes/PaneContainer.tsx` +- Modify: `src/components/TabBar.tsx` +- Modify: `src/components/TabSwitcher.tsx` +- Modify: `src/components/MobileTabStrip.tsx` +- Modify: `src/components/Sidebar.tsx` +- Test: `test/unit/client/lib/pane-activity.test.ts` +- Test: `test/unit/client/components/panes/PaneContainer.test.tsx` +- Test: `test/unit/client/components/TabBar.test.tsx` +- Test: `test/unit/client/components/TabSwitcher.test.tsx` +- Test: `test/unit/client/components/MobileTabStrip.test.tsx` +- Test: `test/unit/client/components/Sidebar.test.tsx` +- Test: `test/e2e/pane-activity-indicator-flow.test.tsx` +- Test: `test/e2e/opencode-startup-probes.test.tsx` + +- [ ] **Step 1: Write the failing busy-indicator tests** + +```ts +expect(resolvePaneActivity({ + paneId: 'pane-oc', + content: { + kind: 'terminal', + createRequestId: 'req-oc', + status: 'running', + mode: 'opencode', + shell: 'system', + terminalId: 'term-oc', + resumeSessionId: 'session-oc', + }, + tabMode: 'opencode', + isOnlyPane: true, + codexActivityByTerminalId: {}, + opencodeActivityByTerminalId: { + 'term-oc': { terminalId: 'term-oc', sessionId: 'session-oc', phase: 'busy', updatedAt: 10 }, + }, + paneRuntimeActivityByPaneId: {}, + agentChatSessions: {}, +})).toMatchObject({ isBusy: true, source: 'opencode' }) +``` + +Add UI assertions for: +- blue pane and tab icons for a busy OpenCode pane +- busy badge in `PaneContainer` +- busy badge in `TabSwitcher` +- busy badge in `MobileTabStrip` +- blue highlight for the matching OpenCode session in `Sidebar` +- no false positives for shell or other providers +- no selector-instability warnings when `opencodeActivity` state is absent + +- [ ] **Step 2: Run the focused UI tests and verify they fail** + +Run: `npm run test:vitest -- test/unit/client/lib/pane-activity.test.ts test/unit/client/components/panes/PaneContainer.test.tsx test/unit/client/components/TabBar.test.tsx test/unit/client/components/TabSwitcher.test.tsx test/unit/client/components/MobileTabStrip.test.tsx test/unit/client/components/Sidebar.test.tsx test/e2e/pane-activity-indicator-flow.test.tsx test/e2e/opencode-startup-probes.test.tsx` + +Expected: FAIL because the busy-indicator pipeline only reads Codex activity today. + +- [ ] **Step 3: Implement the busy-projection changes** + +```ts +type PaneActivitySource = + | 'codex' + | 'opencode' + | 'claude-terminal' + | 'agent-chat' + | 'browser' + +if (effectiveMode === 'opencode') { + const record = input.content.terminalId + ? input.opencodeActivityByTerminalId[input.content.terminalId] + : undefined + return record?.phase === 'busy' + ? { isBusy: true, source: 'opencode' } + : IDLE_PANE_ACTIVITY +} +``` + +Implementation notes: +- Keep OpenCode semantics exact-match by `terminalId`, just like Codex. +- Update all three pane-activity entry points, not just `resolvePaneActivity()`: + - `resolvePaneActivity()` + - `getBusyPaneIdsForTab()` + - `collectBusySessionKeys()` +- Thread `opencodeActivityByTerminalId` into every component and selector path that already threads `codexActivityByTerminalId`. +- Use the existing blue busy styles. This is a behavior fix, not a visual redesign. +- `TerminalView.tsx` should remain unchanged unless a test proves a launch/startup regression caused by the new server flags. + +- [ ] **Step 4: Run the focused UI tests and verify they pass** + +Run: `npm run test:vitest -- test/unit/client/lib/pane-activity.test.ts test/unit/client/components/panes/PaneContainer.test.tsx test/unit/client/components/TabBar.test.tsx test/unit/client/components/TabSwitcher.test.tsx test/unit/client/components/MobileTabStrip.test.tsx test/unit/client/components/Sidebar.test.tsx test/e2e/pane-activity-indicator-flow.test.tsx test/e2e/opencode-startup-probes.test.tsx` + +Expected: PASS with OpenCode busy panes/tabs/sessions turning blue only while the server reports work in progress. + +- [ ] **Step 5: Commit** + +```bash +git add src/lib/pane-activity.ts src/components/panes/PaneContainer.tsx src/components/TabBar.tsx src/components/TabSwitcher.tsx src/components/MobileTabStrip.tsx src/components/Sidebar.tsx test/unit/client/lib/pane-activity.test.ts test/unit/client/components/panes/PaneContainer.test.tsx test/unit/client/components/TabBar.test.tsx test/unit/client/components/TabSwitcher.test.tsx test/unit/client/components/MobileTabStrip.test.tsx test/unit/client/components/Sidebar.test.tsx test/e2e/pane-activity-indicator-flow.test.tsx test/e2e/opencode-startup-probes.test.tsx +git commit -m "feat: surface opencode busy state in ui" +``` + +## Chunk 3: Final Verification And Handoff + +### Task 5: Refactor, Verify Broadly, And Record Any Residual Risk + +**Files:** +- Modify only if needed after refactor/test fallout. +- Verify: `server/*`, `src/*`, `test/*` + +- [ ] **Step 1: Refactor any duplicated helper logic that the passing tests exposed** + +```ts +function createActivityOverlayController(...) { + return { + requestList, + reset, + applySnapshot, + applyMutation, + } +} +``` + +Refactor only if it clearly reduces duplication between Codex and OpenCode without changing behavior. + +- [ ] **Step 2: Run lint and all focused Vitest suites together** + +Run: `npm run lint` + +Run: `npm run test:vitest -- test/unit/server/terminal-registry.test.ts test/server/agent-tabs-write.test.ts test/server/agent-run.test.ts test/unit/server/coding-cli/opencode-activity-tracker.test.ts test/server/ws-opencode-activity.test.ts test/unit/client/store/opencodeActivitySlice.test.ts test/unit/client/components/App.ws-bootstrap.test.tsx test/unit/client/lib/pane-activity.test.ts test/unit/client/components/panes/PaneContainer.test.tsx test/unit/client/components/TabBar.test.tsx test/unit/client/components/TabSwitcher.test.tsx test/unit/client/components/MobileTabStrip.test.tsx test/unit/client/components/Sidebar.test.tsx test/e2e/pane-activity-indicator-flow.test.tsx test/e2e/opencode-startup-probes.test.tsx` + +Expected: PASS. + +- [ ] **Step 3: Run the coordinated full suite** + +Run: `npm run test:status` + +Run: `FRESHELL_TEST_SUMMARY="opencode busy/finished status" npm test` + +Expected: PASS across the repo-owned coordinated suite. If the coordinator is busy, wait instead of interrupting it. + +- [ ] **Step 4: Inspect for any residual risk and update the implementation notes if necessary** + +```md +- Residual risk: the initial `15_000ms` health timeout may need tuning on very slow hosts. +- Residual risk: if OpenCode changes its `/event` payload contract, the tracker should fail loudly in tests before it fails silently in production. +- Residual risk: localhost port allocation still uses the normal bind-to-0 then close pattern, so there is a small TOCTOU race before OpenCode binds the chosen port. +- No fallback path was added; failures should surface as clear launch or tracker errors. +``` + +If a real residual risk remains, document it in the final implementation summary and keep the error path explicit. + +- [ ] **Step 5: Commit the final verified state** + +```bash +git add server/local-port.ts server/coding-cli/opencode-activity-tracker.ts server/coding-cli/opencode-activity-wiring.ts server/terminal-registry.ts server/ws-handler.ts server/agent-api/router.ts server/index.ts shared/ws-protocol.ts src/store/opencodeActivitySlice.ts src/store/store.ts src/App.tsx src/lib/pane-activity.ts src/components/panes/PaneContainer.tsx src/components/TabBar.tsx src/components/TabSwitcher.tsx src/components/MobileTabStrip.tsx src/components/Sidebar.tsx test/unit/server/terminal-registry.test.ts test/server/agent-tabs-write.test.ts test/server/agent-run.test.ts test/unit/server/coding-cli/opencode-activity-tracker.test.ts test/server/ws-opencode-activity.test.ts test/unit/client/store/opencodeActivitySlice.test.ts test/unit/client/components/App.ws-bootstrap.test.tsx test/unit/client/lib/pane-activity.test.ts test/unit/client/components/panes/PaneContainer.test.tsx test/unit/client/components/TabBar.test.tsx test/unit/client/components/TabSwitcher.test.tsx test/unit/client/components/MobileTabStrip.test.tsx test/unit/client/components/Sidebar.test.tsx test/e2e/pane-activity-indicator-flow.test.tsx test/e2e/opencode-startup-probes.test.tsx +git commit -m "feat: wire opencode busy and finished status end to end" +``` diff --git a/docs/plans/2026-04-07-input-history-test-plan.md b/docs/plans/2026-04-07-input-history-test-plan.md new file mode 100644 index 000000000..9ea6722eb --- /dev/null +++ b/docs/plans/2026-04-07-input-history-test-plan.md @@ -0,0 +1,228 @@ +# Test Plan: Bash-Style Input History + +> **Companion to:** `2026-04-07-input-history.md` (implementation plan) +> **Feature:** Up/down arrow history navigation in ChatComposer, persisted per-pane to localStorage with 500-entry cap. + +--- + +## 1. Strategy Reconciliation + +The AGENTS.md testing strategy requires: + +| Requirement | How Satisfied | +|---|---| +| Red-Green-Refactor TDD | Each task in the plan writes failing tests first (steps 1-2), then implements (step 3), then verifies green (step 4) | +| Both unit & e2e coverage | Unit: store tests + hook tests + component tests. E2E: jsdom integration flow test. E2E-browser: Playwright spec | +| Check logs for debugging | Not directly applicable (no server component) | +| Full suite pass before merge | Task 6 runs `npm test`, `npm run lint`, `npx tsc --noEmit` | + +All three test layers are covered. No deviations from the approved strategy. + +--- + +## 2. Test Inventory + +### Layer 1: Unit — Persistence Store + +**File:** `test/unit/client/lib/input-history-store.test.ts` +**Source under test:** `src/lib/input-history-store.ts` + +| # | Test Name | What It Verifies | Key Assertions | +|---|-----------|------------------|----------------| +| 1 | `returns empty array for unknown paneId` | `loadHistory` returns `[]` when no data exists | `loadHistory('nonexistent')` → `toEqual([])` | +| 2 | `pushEntry adds an entry and returns updated history` | Basic push + load round-trip | `pushEntry(...)` returns `['hello']`; `loadHistory(...)` returns `['hello']` | +| 3 | `pushEntry deduplicates consecutive identical entries` | Consecutive dedup: `['hello']` then push `'hello'` → still `['hello']` | Result length is 1, entry unchanged | +| 4 | `pushEntry keeps non-consecutive duplicates` | Same value at different times is kept | `['hello', 'world', 'hello']` after three pushes | +| 5 | `evicts oldest entries beyond 500` | Push 502 unique entries; oldest 2 are evicted | `history.length === 500`, `history[0] === 'entry-2'`, `history[499] === 'entry-501'` | +| 6 | `isolates history per paneId` | Different paneIds have independent histories | `loadHistory('test-pane')` ≠ `loadHistory('other-pane')` | +| 7 | `clearHistory removes stored history` | `clearHistory` deletes the localStorage key | `loadHistory` returns `[]` after clear | +| 8 | `handles corrupted localStorage data gracefully` | Bad JSON returns `[]`, no throw | `localStorage.setItem(key, 'not json{')` then `loadHistory` → `[]` | +| 9 | `preserves entry order across save and load` | Order is `[oldest, ..., newest]` after multiple pushes | `['first', 'second', 'third']` | + +**Assessment:** 9 tests. Fully specified in the plan (Task 1, Step 1). No gaps. + +--- + +### Layer 2: Unit — React Hook + +**File:** `test/unit/client/hooks/useInputHistory.test.ts` +**Source under test:** `src/hooks/useInputHistory.ts` + +| # | Test Name | What It Verifies | Key Assertions | +|---|-----------|------------------|----------------| +| 1 | `navigateUp returns null when no history exists` | Empty history is a no-op | `navigateUp('')` → `null` | +| 2 | `navigateUp returns newest entry first` | Cursor starts at -1, first Up shows newest | After push 'first','second': `navigateUp('')` → `'second'` | +| 3 | `navigateUp returns null at oldest entry` | Boundary: can't go past oldest | Two Ups on 1-entry history: second returns `null` | +| 4 | `navigateDown returns null at newest position` | Boundary: can't go past newest (cursor=-1) | `navigateDown('')` → `null` with no prior navigation | +| 5 | `navigateDown restores saved draft` | Draft saved on first Up, restored on Down to -1 | Up saves draft; Down returns `'my draft'` | +| 6 | `full navigation cycle: up twice, down twice` | Complete 3-entry cycle with draft restore | Up→'third', Up→'second', Down→'third', Down→`''` | +| 7 | `push adds entry and resets cursor` | After push, navigation restarts from newest | After navigating to older entry, push resets; next Up shows newest | +| 8 | `reset clears cursor and draft without pushing` | `reset()` returns to -1 without modifying history | After Up+reset, Down returns `null`; Up returns entry | +| 9 | `saves draft on first navigateUp only` | Draft is the text at the moment of first Up | Up×2 then Down×2 returns to original draft, not intermediate | +| 10 | `resets when paneId changes` | Hook reloads history for new paneId | After push+navigate on pane A, switch to pane B → no history | +| 11 | `loads history from store on mount` | Pre-existing localStorage data is available | `pushEntry` before mount; `navigateUp('')` → `'pre-existing'` | +| 12 | `no-ops when paneId is undefined` | Graceful handling of missing paneId | `navigateUp`/`navigateDown` → `null`; `push` doesn't persist | + +**Assessment:** 12 tests. Fully specified in the plan (Task 2, Step 1). No gaps. + +--- + +### Layer 3: Unit — ChatComposer Integration + +**File:** `test/unit/client/components/agent-chat/ChatComposer.test.tsx` +**Source under test:** `src/components/agent-chat/ChatComposer.tsx` +**Action:** Add a new `describe('input history navigation')` block to the existing file + +#### 3a. Existing Tests (must continue passing) + +The existing 17 tests cover: rendering, send on Enter, Shift+Enter newline, disabled state, stop button, Escape interrupt, send button disabled, draft preservation (5 tests), autoFocus on disabled transition (3 tests), tab switching shortcuts (3 tests). + +**Cleanup changes:** +- Add `import { clearHistory } from '@/lib/input-history-store'` to imports +- Add `clearHistory('test-pane')`, `clearHistory('pane-a')`, `clearHistory('pane-b')` to the existing `afterEach` block + +#### 3b. New Tests + +| # | Test Name | What It Verifies | Key Assertions | +|---|-----------|------------------|----------------| +| 1 | `ArrowUp on empty input navigates to previous history entry` | Send 2 messages, Up shows newest | After sending 'first','second': ArrowUp → value `'second message'` | +| 2 | `ArrowUp navigates through multiple entries` | Up×2 walks newest→oldest | ArrowUp → 'second', ArrowUp → 'first' | +| 3 | `ArrowDown restores draft after navigating up` | Draft save/restore through textarea | Type draft, Up → history entry, Down → original draft | +| 4 | `ArrowDown at bottom position does nothing` | No navigation below cursor=-1 | ArrowDown on empty → value stays `''` | +| 5 | `ArrowUp does not navigate when cursor is not on first line` | Multi-line guard for ArrowUp | Multi-line text + ArrowUp → value unchanged (`'line1\n'`) | +| 6 | `sends add to history and can be recalled` | Push-on-send + recall | After Enter, onSend called; ArrowUp shows sent text | +| 7 | `history is independent per pane` | Per-pane isolation in the component | Send on pane-a, unmount, mount pane-b → ArrowUp shows empty | + +**Assessment:** 7 new tests + cleanup changes. Fully specified in the plan (Task 3, Step 1). + +--- + +### Layer 4: E2E Integration (jsdom) + +**File:** `test/e2e/agent-chat-input-history-flow.test.tsx` +**Source under test:** Full ChatComposer → useInputHistory → input-history-store chain + +Uses the same mock pattern as the existing ChatComposer unit tests (`vi.mock('@/store/hooks')`, `vi.mock('@/store/tabsSlice')`). + +| # | Test Name | What It Verifies | Key Assertions | +|---|-----------|------------------|----------------| +| 1 | `end-to-end: send messages, navigate history, verify values` | Full send→Up×2→Down×2 cycle | Values at each step: 'message beta' → 'message alpha' → 'message beta' → `''` | +| 2 | `preserves draft through navigation cycle` | Draft in real textarea survives Up/Down | Type draft, Up, Down → draft restored | +| 3 | `history survives component unmount and remount` | localStorage persistence across remount | Unmount after send, remount, ArrowUp → 'persistent message' | +| 4 | `deduplicates consecutive identical sends` | Consecutive dedup visible at UI level | Send 'same' twice; only 1 history entry (Up×2 stays at 'same', Down goes to '') | + +**Assessment:** 4 tests. Fully specified in the plan (Task 4, Step 1). No gaps. + +--- + +### Layer 5: E2E Browser (Playwright) + +**File:** `test/e2e-browser/specs/agent-chat-input-history.spec.ts` +**Source under test:** Full stack in real Chromium with production server + +Uses a shared `setupAgentChatPane` helper that: +1. Waits for terminal to be ready +2. Gets active tab/pane IDs from the Redux harness +3. Suppresses agent-chat network effects +4. Dispatches `agentChat/sessionCreated`, `agentChat/sessionInit`, and `panes/updatePaneContent` to set up an agent-chat pane +5. Returns `{ tabId, paneId, sessionId, textarea }` + +| # | Test Name | What It Verifies | Key Assertions | +|---|-----------|------------------|----------------| +| 1 | `ArrowUp cycles through sent messages` | Full navigation in real browser | Send 2 msgs, Up×2→Down×2 with correct values at each step | +| 2 | `ArrowUp preserves current draft when navigating away` | Draft save/restore in real browser | Type draft, Up→history, Down→draft | +| 3 | `history persists across page reload` | localStorage survives full page reload | Send msg, `page.goto()` to reload, check localStorage contains 'persistent message' | +| 4 | `history scoped per pane (different paneIds are independent)` | Per-pane localStorage isolation | Send on pane A; check unrelated localStorage key is null | +| 5 | `max 500 entries — oldest evicted` | Eviction in real browser | Pre-populate 502 entries in localStorage, send 1 msg through UI → 500 entries, oldest evicted | + +**Assessment:** 5 tests. Fully specified in the plan (Task 5, Step 1). No gaps. + +--- + +## 3. Gaps and Additional Tests + +### 3.1. Gap: ArrowDown multi-line guard + +The plan tests that **ArrowUp** is blocked when the cursor is on line 2 of multi-line input (test 5 in Layer 3). However, there is no corresponding test for **ArrowDown** being blocked when the cursor is on line 1 of multi-line input. + +**Recommended addition to Layer 3:** + +``` +Test: "ArrowDown does not navigate when cursor is not on last line" +File: test/unit/client/components/agent-chat/ChatComposer.test.tsx +Verify: ArrowDown on line 1 of 'line1\nline2' does NOT navigate history +Setup: Send a message to create history, type 'line1\nline2', place cursor on line 1 +Assert: fireEvent.keyDown(ArrowDown) → textarea value still 'line1\nline2' +``` + +**Note:** Testing cursor position in jsdom can be fragile. If `selectionStart` is not reliably tracked by `userEvent`, this test may need to set `textarea.selectionStart` directly. If the test proves flaky, it can be moved to the Playwright layer where cursor positioning is reliable. + +### 3.2. Gap: Push with whitespace-only or empty string + +The `input-history-store.pushEntry` does not guard against empty/whitespace strings. The ChatComposer trims before calling `push`, so this won't happen in practice. But the store itself has no guard. + +**Verdict:** Low risk. The hook's `push` function is only called from `handleSend` which trims and early-returns on empty. No additional test needed — the component-level tests cover this path. + +### 3.3. Gap: Hook referential stability + +The plan's design decisions state that `navigateUp`, `navigateDown`, and `reset` have zero dependencies and are referentially stable. No test verifies this explicitly. + +**Verdict:** This is an implementation detail, not a behavior contract. Not required for correctness. If performance optimization becomes important later, a stability test can be added. + +### 3.4. Gap: Intermediate edit discard + +When the user navigates to a history entry, edits it in the textarea, then navigates away (Up or Down), the edit is lost. This matches the design (draft-only model), but no test explicitly verifies this behavior. + +**Recommended addition to Layer 4 (e2e jsdom):** + +``` +Test: "editing a history entry then navigating away discards the edit" +File: test/e2e/agent-chat-input-history-flow.test.tsx +Verify: Navigate to history entry, edit it, navigate away then back → original entry +Setup: Send 'original', navigate Up to show 'original', type 'modified' suffix, + navigate Down then Up again +Assert: textarea shows 'original' (edit was not preserved) +``` + +### 3.5. Gap: localStorage quota exceeded + +No test for `localStorage.setItem` throwing due to quota. + +**Verdict:** Extremely unlikely with 500 string entries. Acceptable to skip. + +--- + +## 4. Summary + +| Layer | File | Test Count | Plan Coverage | Gap Tests | +|---|---|---|---|---| +| Unit: Store | `test/unit/client/lib/input-history-store.test.ts` | 9 | 100% specified | 0 | +| Unit: Hook | `test/unit/client/hooks/useInputHistory.test.ts` | 12 | 100% specified | 0 | +| Unit: Component | `test/unit/client/components/agent-chat/ChatComposer.test.tsx` | 7 new | 100% specified | +1 (ArrowDown guard) | +| E2E Integration | `test/e2e/agent-chat-input-history-flow.test.tsx` | 4 | 100% specified | +1 (edit discard) | +| E2E Browser | `test/e2e-browser/specs/agent-chat-input-history.spec.ts` | 5 | 100% specified | 0 | +| **Total** | | **37** | | **+2 recommended** | + +### Recommended gap tests to add (2 tests, low effort) + +1. **ArrowDown multi-line guard** — Add to ChatComposer unit test block. ~10 lines. +2. **Intermediate edit discard** — Add to e2e integration flow test. ~15 lines. + +Both are optional but improve coverage of the multi-line guard and draft-only design decision. + +### No strategy deviations + +The plan's test specifications fully satisfy the AGENTS.md requirements for TDD, unit coverage, and e2e coverage. No changes to the approved testing strategy are needed. + +--- + +## 5. Execution Order + +Tests should be written in the same order as the plan's tasks: + +1. **Task 1:** `input-history-store.test.ts` → implement store → green +2. **Task 2:** `useInputHistory.test.ts` → implement hook → green +3. **Task 3:** ChatComposer history tests → wire integration → green + existing tests still pass +4. **Task 4:** E2E integration flow test → green (implementation already complete) +5. **Task 5:** Playwright spec → green +6. **Task 6:** Full suite verification (`npm test`, `npm run lint`, `npx tsc --noEmit`) diff --git a/docs/plans/2026-04-07-input-history.md b/docs/plans/2026-04-07-input-history.md new file mode 100644 index 000000000..19a406528 --- /dev/null +++ b/docs/plans/2026-04-07-input-history.md @@ -0,0 +1,1054 @@ +# Bash-Style Input History Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use trycycle-executing to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add bash-style up/down arrow history navigation to the ChatComposer used by agent-chat panes, persisted per-pane to localStorage with a 500-entry cap. + +**Architecture:** A three-layer design: (1) `input-history-store.ts` — a pure localStorage persistence module keyed by paneId, handling dedup and eviction; (2) `useInputHistory` — a React hook providing cursor-based navigation with draft save/restore; (3) ChatComposer integration wiring ArrowUp/ArrowDown keys through the hook. History is scoped per pane, persists across page reloads, and survives component unmount/remount. + +**Tech Stack:** React hooks, localStorage, @testing-library/react (unit + integration), Playwright (e2e-browser) + +--- + +## Design Decisions + +### Cursor model +- `cursorRef = -1` means "at the input line" (newest position, below all history) +- `cursorRef = 0` means "showing the newest history entry" +- `cursorRef = history.length - 1` means "showing the oldest entry" +- ArrowUp increments cursor (toward older), ArrowDown decrements (toward newer) +- History array stored as `[oldest, ..., newest]` — `history[history.length - 1 - cursor]` returns the entry at the current cursor position + +### Draft save/restore +- When the user first presses ArrowUp from position -1, the current text is saved as `draftRef` +- When they ArrowDown back to position -1, the draft is restored +- Intermediate edits to history entries are NOT preserved — this matches browser-console/IPython behavior (simpler than full bash line-editing model) +- Rationale: full bash-style per-position edit preservation requires tracking modified copies for every cursor position, which is complex and fragile. The draft-only model is the standard for non-terminal history inputs. + +### Cursor position guards +- ArrowUp only navigates history when the textarea cursor is on the first line (no newline in text before selectionStart) OR the text is empty +- ArrowDown only navigates when the textarea cursor is on the last line (no newline in text after selectionStart) OR the text is empty +- This preserves normal arrow-key movement within multi-line input + +### Dedup policy +- Consecutive identical entries are deduplicated on push: if `history[last] === entry`, skip +- Non-consecutive duplicates are kept (the user may send the same message at different times) + +### Max size (500) +- When pushing an entry that exceeds 500, the oldest entry is evicted (shift from front) +- The eviction happens before save so localStorage never holds more than 500 entries per pane + +### Storage format +- Key: `freshell.input-history.v1:${paneId}` — one localStorage key per pane +- Value: JSON stringified `string[]` +- Try/catch on parse: corrupted data returns empty array (defensive) + +### Hook stability +- `navigateUp`, `navigateDown`, `reset` have zero dependencies (use refs only), so they're referentially stable +- `push` depends on `paneId` (stable between pane switches) + +--- + +## File Structure + +| File | Action | Responsibility | +|------|--------|---------------| +| `src/store/storage-keys.ts` | Modify | Add `inputHistory` key constant | +| `src/lib/input-history-store.ts` | Create | localStorage persistence: load, save, push, clear | +| `src/hooks/useInputHistory.ts` | Create | React hook: cursor navigation, draft, push, reset | +| `src/components/agent-chat/ChatComposer.tsx` | Modify | Wire ArrowUp/ArrowDown keys and push-on-send | +| `test/unit/client/lib/input-history-store.test.ts` | Create | Unit tests for persistence layer | +| `test/unit/client/hooks/useInputHistory.test.ts` | Create | Unit tests for hook | +| `test/unit/client/components/agent-chat/ChatComposer.test.tsx` | Modify | Add history navigation tests | +| `test/e2e/agent-chat-input-history-flow.test.tsx` | Create | Integration test (jsdom) | +| `test/e2e-browser/specs/agent-chat-input-history.spec.ts` | Create | Playwright e2e browser tests | + +--- + +## Task 1: Storage key + Persistence layer + Unit tests + +**Files:** +- Modify: `src/store/storage-keys.ts` +- Create: `src/lib/input-history-store.ts` +- Create: `test/unit/client/lib/input-history-store.test.ts` + +- [ ] **Step 1: Write failing tests for input-history-store** + +```typescript +// test/unit/client/lib/input-history-store.test.ts +import { describe, it, expect, beforeEach } from 'vitest' +import { loadHistory, pushEntry, clearHistory } from '@/lib/input-history-store' + +describe('input-history-store', () => { + beforeEach(() => { + clearHistory('test-pane') + clearHistory('other-pane') + }) + + it('returns empty array for unknown paneId', () => { + expect(loadHistory('nonexistent')).toEqual([]) + }) + + it('pushEntry adds an entry and returns updated history', () => { + const result = pushEntry('test-pane', 'hello') + expect(result).toEqual(['hello']) + expect(loadHistory('test-pane')).toEqual(['hello']) + }) + + it('pushEntry deduplicates consecutive identical entries', () => { + pushEntry('test-pane', 'hello') + const result = pushEntry('test-pane', 'hello') + expect(result).toEqual(['hello']) + }) + + it('pushEntry keeps non-consecutive duplicates', () => { + pushEntry('test-pane', 'hello') + pushEntry('test-pane', 'world') + pushEntry('test-pane', 'hello') + expect(loadHistory('test-pane')).toEqual(['hello', 'world', 'hello']) + }) + + it('evicts oldest entries beyond 500', () => { + for (let i = 0; i < 502; i++) { + pushEntry('test-pane', `entry-${i}`) + } + const history = loadHistory('test-pane') + expect(history).toHaveLength(500) + expect(history[0]).toBe('entry-2') + expect(history[499]).toBe('entry-501') + }) + + it('isolates history per paneId', () => { + pushEntry('test-pane', 'a') + pushEntry('other-pane', 'b') + expect(loadHistory('test-pane')).toEqual(['a']) + expect(loadHistory('other-pane')).toEqual(['b']) + }) + + it('clearHistory removes stored history', () => { + pushEntry('test-pane', 'hello') + clearHistory('test-pane') + expect(loadHistory('test-pane')).toEqual([]) + }) + + it('handles corrupted localStorage data gracefully', () => { + localStorage.setItem('freshell.input-history.v1:corrupted', 'not json{') + expect(loadHistory('corrupted')).toEqual([]) + }) + + it('preserves entry order across save and load', () => { + pushEntry('test-pane', 'first') + pushEntry('test-pane', 'second') + pushEntry('test-pane', 'third') + expect(loadHistory('test-pane')).toEqual(['first', 'second', 'third']) + }) +}) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npm run test:vitest -- test/unit/client/lib/input-history-store.test.ts --run` +Expected: FAIL — module not found + +- [ ] **Step 3: Add storage key and implement input-history-store** + +Modify `src/store/storage-keys.ts` — add to `STORAGE_KEYS` object: +```typescript +inputHistory: 'freshell.input-history.v1', +``` + +Create `src/lib/input-history-store.ts`: +```typescript +import { STORAGE_KEYS } from '@/store/storage-keys' + +const MAX_ENTRIES = 500 + +function storageKey(paneId: string): string { + return `${STORAGE_KEYS.inputHistory}:${paneId}` +} + +export function loadHistory(paneId: string): string[] { + try { + const raw = localStorage.getItem(storageKey(paneId)) + if (!raw) return [] + const parsed = JSON.parse(raw) + return Array.isArray(parsed) ? parsed : [] + } catch { + return [] + } +} + +export function pushEntry(paneId: string, entry: string): string[] { + const history = loadHistory(paneId) + if (history.length > 0 && history[history.length - 1] === entry) { + return history + } + history.push(entry) + while (history.length > MAX_ENTRIES) { + history.shift() + } + saveHistory(paneId, history) + return history +} + +export function saveHistory(paneId: string, entries: string[]): void { + localStorage.setItem(storageKey(paneId), JSON.stringify(entries)) +} + +export function clearHistory(paneId: string): void { + localStorage.removeItem(storageKey(paneId)) +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `npm run test:vitest -- test/unit/client/lib/input-history-store.test.ts --run` +Expected: all PASS + +- [ ] **Step 5: Refactor and verify** + +Review the store implementation for clarity. Ensure the 500-constant is not duplicated. Re-run: + +Run: `npm run test:vitest -- test/unit/client/lib/input-history-store.test.ts --run` +Expected: all PASS + +- [ ] **Step 6: Commit** + +```bash +git add src/store/storage-keys.ts src/lib/input-history-store.ts test/unit/client/lib/input-history-store.test.ts +git commit -m "feat: add input-history-store with localStorage persistence and 500-entry cap" +``` + +--- + +## Task 2: useInputHistory hook + Unit tests + +**Files:** +- Create: `src/hooks/useInputHistory.ts` +- Create: `test/unit/client/hooks/useInputHistory.test.ts` + +- [ ] **Step 1: Write failing tests for useInputHistory** + +```typescript +// test/unit/client/hooks/useInputHistory.test.ts +import { describe, it, expect, beforeEach } from 'vitest' +import { renderHook, act } from '@testing-library/react' +import { useInputHistory } from '@/hooks/useInputHistory' +import { clearHistory, loadHistory, pushEntry } from '@/lib/input-history-store' + +describe('useInputHistory', () => { + beforeEach(() => { + clearHistory('hook-pane') + clearHistory('other-hook-pane') + }) + + it('navigateUp returns null when no history exists', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + expect(result.current.navigateUp('')).toBeNull() + }) + + it('navigateUp returns newest entry first', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + act(() => { + result.current.push('first') + result.current.push('second') + }) + expect(result.current.navigateUp('')).toBe('second') + }) + + it('navigateUp returns null at oldest entry', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + act(() => { result.current.push('only') }) + result.current.navigateUp('') + expect(result.current.navigateUp('only')).toBeNull() + }) + + it('navigateDown returns null at newest position', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + act(() => { result.current.push('entry') }) + expect(result.current.navigateDown('')).toBeNull() + }) + + it('navigateDown restores saved draft', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + act(() => { result.current.push('entry') }) + result.current.navigateUp('my draft') + expect(result.current.navigateDown('entry')).toBe('my draft') + }) + + it('full navigation cycle: up twice, down twice', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + act(() => { + result.current.push('first') + result.current.push('second') + result.current.push('third') + }) + expect(result.current.navigateUp('')).toBe('third') + expect(result.current.navigateUp('third')).toBe('second') + expect(result.current.navigateDown('second')).toBe('third') + expect(result.current.navigateDown('third')).toBe('') + }) + + it('push adds entry and resets cursor', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + act(() => { + result.current.push('first') + result.current.push('second') + }) + result.current.navigateUp('') + result.current.navigateUp('second') + act(() => { result.current.push('third') }) + expect(result.current.navigateUp('')).toBe('third') + expect(result.current.navigateUp('third')).toBe('second') + }) + + it('reset clears cursor and draft without pushing', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + act(() => { result.current.push('entry') }) + result.current.navigateUp('my draft') + act(() => { result.current.reset() }) + expect(result.current.navigateDown('')).toBeNull() + expect(result.current.navigateUp('')).toBe('entry') + }) + + it('saves draft on first navigateUp only', () => { + const { result } = renderHook(() => useInputHistory('hook-pane')) + act(() => { + result.current.push('first') + result.current.push('second') + }) + result.current.navigateUp('original draft') + result.current.navigateUp('second') + expect(result.current.navigateDown('first')).toBe('second') + expect(result.current.navigateDown('second')).toBe('original draft') + }) + + it('resets when paneId changes', () => { + const { result, rerender } = renderHook( + ({ paneId }) => useInputHistory(paneId), + { initialProps: { paneId: 'hook-pane' } } + ) + act(() => { result.current.push('entry-a') }) + result.current.navigateUp('') + rerender({ paneId: 'other-hook-pane' }) + expect(result.current.navigateUp('')).toBeNull() + }) + + it('loads history from store on mount', () => { + pushEntry('hook-pane', 'pre-existing') + const { result } = renderHook(() => useInputHistory('hook-pane')) + expect(result.current.navigateUp('')).toBe('pre-existing') + }) + + it('no-ops when paneId is undefined', () => { + const { result } = renderHook(() => useInputHistory(undefined)) + expect(result.current.navigateUp('')).toBeNull() + expect(result.current.navigateDown('')).toBeNull() + act(() => { result.current.push('should not persist') }) + expect(loadHistory('undefined')).toEqual([]) + }) +}) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npm run test:vitest -- test/unit/client/hooks/useInputHistory.test.ts --run` +Expected: FAIL — module not found + +- [ ] **Step 3: Implement useInputHistory hook** + +Create `src/hooks/useInputHistory.ts`: +```typescript +import { useCallback, useEffect, useRef } from 'react' +import { loadHistory, pushEntry as storePushEntry } from '@/lib/input-history-store' + +export interface UseInputHistoryReturn { + navigateUp: (currentText: string) => string | null + navigateDown: (currentText: string) => string | null + push: (entry: string) => void + reset: () => void +} + +export function useInputHistory(paneId: string | undefined): UseInputHistoryReturn { + const cursorRef = useRef(-1) + const draftRef = useRef('') + const historyRef = useRef([]) + const paneIdRef = useRef(paneId) + + useEffect(() => { + if (paneId !== paneIdRef.current || paneIdRef.current === undefined) { + paneIdRef.current = paneId + } + historyRef.current = paneId ? loadHistory(paneId) : [] + cursorRef.current = -1 + draftRef.current = '' + }, [paneId]) + + const navigateUp = useCallback((currentText: string): string | null => { + const history = historyRef.current + if (history.length === 0) return null + if (cursorRef.current >= history.length - 1) return null + + if (cursorRef.current === -1) { + draftRef.current = currentText + } + + cursorRef.current++ + return history[history.length - 1 - cursorRef.current] + }, []) + + const navigateDown = useCallback((_currentText: string): string | null => { + if (cursorRef.current <= -1) return null + + cursorRef.current-- + + if (cursorRef.current === -1) { + return draftRef.current + } + + const history = historyRef.current + return history[history.length - 1 - cursorRef.current] + }, []) + + const push = useCallback((entry: string): void => { + if (!paneId) return + historyRef.current = storePushEntry(paneId, entry) + cursorRef.current = -1 + draftRef.current = '' + }, [paneId]) + + const reset = useCallback((): void => { + cursorRef.current = -1 + draftRef.current = '' + }, []) + + return { navigateUp, navigateDown, push, reset } +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `npm run test:vitest -- test/unit/client/hooks/useInputHistory.test.ts --run` +Expected: all PASS + +- [ ] **Step 5: Refactor and verify** + +Review for clarity. Ensure callback deps are correct (navigateUp/navigateDown have no deps since they use refs; push depends on paneId). Re-run: + +Run: `npm run test:vitest -- test/unit/client/hooks/useInputHistory.test.ts --run` +Expected: all PASS + +- [ ] **Step 6: Commit** + +```bash +git add src/hooks/useInputHistory.ts test/unit/client/hooks/useInputHistory.test.ts +git commit -m "feat: add useInputHistory hook with cursor-based navigation and draft preservation" +``` + +--- + +## Task 3: ChatComposer integration + extended tests + +**Files:** +- Modify: `src/components/agent-chat/ChatComposer.tsx` +- Modify: `test/unit/client/components/agent-chat/ChatComposer.test.tsx` + +- [ ] **Step 1: Write failing tests for history navigation in ChatComposer** + +Add the following test block to `test/unit/client/components/agent-chat/ChatComposer.test.tsx`: + +```typescript +import { clearHistory } from '@/lib/input-history-store' + +// Add to the existing afterEach: +// clearHistory('test-pane') +// clearHistory('pane-a') +// clearHistory('pane-b') + +// New describe block: +describe('input history navigation', () => { + afterEach(() => { + clearHistory('test-pane') + }) + + it('ArrowUp on empty input navigates to previous history entry', async () => { + const user = userEvent.setup() + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + + await user.type(textarea, 'first message{Enter}') + await user.type(textarea, 'second message{Enter}') + await user.click(textarea) + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('second message') + }) + + it('ArrowUp navigates through multiple entries', async () => { + const user = userEvent.setup() + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + + await user.type(textarea, 'first{Enter}') + await user.type(textarea, 'second{Enter}') + await user.click(textarea) + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('second') + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('first') + }) + + it('ArrowDown restores draft after navigating up', async () => { + const user = userEvent.setup() + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + + await user.type(textarea, 'history entry{Enter}') + await user.type(textarea, 'my draft') + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('history entry') + fireEvent.keyDown(textarea, { key: 'ArrowDown' }) + expect(textarea).toHaveValue('my draft') + }) + + it('ArrowDown at bottom position does nothing', async () => { + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + fireEvent.keyDown(textarea, { key: 'ArrowDown' }) + expect(textarea).toHaveValue('') + }) + + it('ArrowUp does not navigate when cursor is not on first line', async () => { + const user = userEvent.setup() + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + + await user.type(textarea, 'history entry{Enter}') + await user.type(textarea, 'line1{Shift>}{Enter}{/Shift}') + // Cursor is now on line 2, not on first line — ArrowUp should NOT navigate + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('line1\n') + }) + + it('sends add to history and can be recalled', async () => { + const onSend = vi.fn() + const user = userEvent.setup() + render( {}} />) + const textarea = screen.getByRole('textbox') + + await user.type(textarea, 'sent message{Enter}') + expect(onSend).toHaveBeenCalledWith('sent message') + expect(textarea).toHaveValue('') + + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('sent message') + }) + + it('history is independent per pane', async () => { + const user = userEvent.setup() + const { unmount } = render( + {}} onInterrupt={() => {}} /> + ) + await user.type(screen.getByRole('textbox'), 'pane-a message{Enter}') + unmount() + + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('') + }) +}) +``` + +Also add `clearHistory('test-pane')`, `clearHistory('pane-a')`, `clearHistory('pane-b')` to the existing `afterEach` block in the main `describe('ChatComposer', ...)`. + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npm run test:vitest -- test/unit/client/components/agent-chat/ChatComposer.test.tsx --run` +Expected: FAIL — history navigation tests fail (ArrowUp does not populate textarea) + +- [ ] **Step 3: Implement ChatComposer integration** + +Modify `src/components/agent-chat/ChatComposer.tsx`: + +1. Add import: +```typescript +import { useInputHistory } from '@/hooks/useInputHistory' +``` + +2. Add helper functions before the component: +```typescript +function isOnFirstLine(textarea: HTMLTextAreaElement): boolean { + const textBefore = textarea.value.substring(0, textarea.selectionStart) + return !textBefore.includes('\n') +} + +function isOnLastLine(textarea: HTMLTextAreaElement): boolean { + const textAfter = textarea.value.substring(textarea.selectionStart) + return !textAfter.includes('\n') +} +``` + +3. Inside the component, after the `text` state, add the hook: +```typescript +const { navigateUp, navigateDown, push, reset } = useInputHistory(paneId) +``` + +4. Modify `handleSend` to push to history and reset: +```typescript +const handleSend = useCallback(() => { + const trimmed = text.trim() + if (!trimmed) return + push(trimmed) + onSend(trimmed) + setText('') + if (paneId) clearDraft(paneId) + if (textareaRef.current) { + textareaRef.current.style.height = 'auto' + } +}, [text, onSend, paneId, push]) +``` + +5. Modify `handleKeyDown` to handle ArrowUp/ArrowDown: +```typescript +const handleKeyDown = useCallback((e: KeyboardEvent) => { + const tabDir = getTabSwitchShortcutDirection(e) + if (tabDir) { + e.preventDefault() + e.stopPropagation() + dispatch(tabDir === 'next' ? switchToNextTab() : switchToPrevTab()) + return + } + + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault() + handleSend() + return + } + if (e.key === 'Escape' && isRunning) { + e.preventDefault() + onInterrupt() + return + } + if (e.key === 'ArrowUp' && textareaRef.current && isOnFirstLine(textareaRef.current)) { + const next = navigateUp(text) + if (next !== null) { + e.preventDefault() + setText(next) + if (paneId) setDraft(paneId, next) + } + return + } + if (e.key === 'ArrowDown' && textareaRef.current && isOnLastLine(textareaRef.current)) { + const next = navigateDown(text) + if (next !== null) { + e.preventDefault() + setText(next) + if (paneId) setDraft(paneId, next) + } + return + } +}, [dispatch, handleSend, isRunning, onInterrupt, navigateUp, navigateDown, text, paneId]) +``` + +6. In the paneId-change effect, add `reset()`: +```typescript +useEffect(() => { + if (paneId !== prevPaneIdRef.current) { + prevPaneIdRef.current = paneId + setText(paneId ? getDraft(paneId) : '') + reset() + requestAnimationFrame(() => { + if (textareaRef.current) resizeTextarea(textareaRef.current) + }) + } +}, [paneId, resizeTextarea, reset]) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `npm run test:vitest -- test/unit/client/components/agent-chat/ChatComposer.test.tsx --run` +Expected: all PASS (both old and new tests) + +- [ ] **Step 5: Refactor and verify broader suite** + +Check that the handleKeyDown dependency array is correct and minimal. Ensure no regressions in existing tests: + +Run: `npm run test:vitest -- test/unit/client/components/agent-chat/ --run` +Expected: all PASS + +Run: `npm run test:vitest -- test/unit/client/ --run` +Expected: all PASS + +- [ ] **Step 6: Commit** + +```bash +git add src/components/agent-chat/ChatComposer.tsx test/unit/client/components/agent-chat/ChatComposer.test.tsx +git commit -m "feat: wire useInputHistory into ChatComposer with ArrowUp/Down navigation" +``` + +--- + +## Task 4: E2E integration test (jsdom) + +**Files:** +- Create: `test/e2e/agent-chat-input-history-flow.test.tsx` + +This test validates the full ChatComposer → useInputHistory → input-history-store chain in jsdom with a real Redux store, exercising ArrowUp/ArrowDown via `fireEvent` and verifying textarea value changes. + +- [ ] **Step 1: Write the integration test** + +```typescript +// test/e2e/agent-chat-input-history-flow.test.tsx +import { describe, it, expect, vi, afterEach } from 'vitest' +import { render, screen, cleanup, fireEvent } from '@testing-library/react' +import userEvent from '@testing-library/user-event' +import ChatComposer from '@/components/agent-chat/ChatComposer' +import { clearHistory } from '@/lib/input-history-store' + +const mockDispatch = vi.fn() +vi.mock('@/store/hooks', () => ({ + useAppDispatch: () => mockDispatch, + useAppSelector: () => ({}), +})) + +vi.mock('@/store/tabsSlice', async (importOriginal) => { + const actual = await importOriginal() + return { + ...actual, + switchToNextTab: () => ({ type: 'tabs/switchToNextTab' }), + switchToPrevTab: () => ({ type: 'tabs/switchToPrevTab' }), + } +}) + +describe('agent chat input history flow', () => { + afterEach(() => { + cleanup() + clearHistory('flow-pane') + mockDispatch.mockClear() + }) + + it('end-to-end: send messages, navigate history, verify values', async () => { + const onSend = vi.fn() + const user = userEvent.setup() + render( {}} />) + const textarea = screen.getByRole('textbox', { name: 'Chat message input' }) + + await user.type(textarea, 'message alpha{Enter}') + expect(onSend).toHaveBeenCalledWith('message alpha') + expect(textarea).toHaveValue('') + + await user.type(textarea, 'message beta{Enter}') + expect(onSend).toHaveBeenCalledWith('message beta') + expect(textarea).toHaveValue('') + + await user.click(textarea) + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('message beta') + + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('message alpha') + + fireEvent.keyDown(textarea, { key: 'ArrowDown' }) + expect(textarea).toHaveValue('message beta') + + fireEvent.keyDown(textarea, { key: 'ArrowDown' }) + expect(textarea).toHaveValue('') + }) + + it('preserves draft through navigation cycle', async () => { + const user = userEvent.setup() + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + + await user.type(textarea, 'existing entry{Enter}') + await user.type(textarea, 'work in progress') + + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('existing entry') + + fireEvent.keyDown(textarea, { key: 'ArrowDown' }) + expect(textarea).toHaveValue('work in progress') + }) + + it('history survives component unmount and remount', async () => { + const user = userEvent.setup() + const { unmount } = render( + {}} onInterrupt={() => {}} /> + ) + await user.type(screen.getByRole('textbox'), 'persistent message{Enter}') + unmount() + + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('persistent message') + }) + + it('deduplicates consecutive identical sends', async () => { + const user = userEvent.setup() + render( {}} onInterrupt={() => {}} />) + const textarea = screen.getByRole('textbox') + + await user.type(textarea, 'same{Enter}') + await user.type(textarea, 'same{Enter}') + await user.click(textarea) + + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + expect(textarea).toHaveValue('same') + + // Second ArrowUp: with dedup only 1 entry exists, so this is a no-op + // (cursor stays at oldest). Without dedup there would be 2 entries and + // cursor would advance to the older one. + fireEvent.keyDown(textarea, { key: 'ArrowUp' }) + + // ArrowDown from cursor=0 → cursor=-1 returns draft (''). + // Without dedup cursor would be 1 → ArrowDown goes to 0, showing 'same'. + fireEvent.keyDown(textarea, { key: 'ArrowDown' }) + expect(textarea).toHaveValue('') + }) +}) +``` + +- [ ] **Step 2: Run tests to verify they pass** + +Run: `npm run test:vitest -- test/e2e/agent-chat-input-history-flow.test.tsx --run` +Expected: all PASS + +- [ ] **Step 3: Refactor and verify** + +Run broader e2e suite to check no regressions: + +Run: `npm run test:vitest -- test/e2e/ --run` +Expected: all PASS + +- [ ] **Step 4: Commit** + +```bash +git add test/e2e/agent-chat-input-history-flow.test.tsx +git commit -m "test: add e2e integration tests for agent chat input history navigation" +``` + +--- + +## Task 5: E2E browser tests (Playwright) + +**Files:** +- Create: `test/e2e-browser/specs/agent-chat-input-history.spec.ts` + +These tests run against a real production server and Chromium browser, exercising the complete stack including localStorage persistence across page reloads. + +- [ ] **Step 1: Write the Playwright spec** + +```typescript +// test/e2e-browser/specs/agent-chat-input-history.spec.ts +import { test, expect } from '../helpers/fixtures.js' + +test.describe('Agent Chat Input History', () => { + async function setupAgentChatPane(page: any, harness: any, terminal: any) { + await terminal.waitForTerminal() + + const tabId = await harness.getActiveTabId() + expect(tabId).toBeTruthy() + const layout = await harness.getPaneLayout(tabId!) + expect(layout?.type).toBe('leaf') + const paneId = layout.id as string + const sessionId = `sdk-e2e-history-${Date.now()}` + const cliSessionId = '44444444-4444-4444-8444-444444444444' + + await page.evaluate((pId: string) => { + window.__FRESHELL_TEST_HARNESS__?.setAgentChatNetworkEffectsSuppressed(pId, true) + }, paneId) + + await page.evaluate((args: any) => { + const h = window.__FRESHELL_TEST_HARNESS__ + h?.dispatch({ type: 'agentChat/sessionCreated', payload: { requestId: 'req-history', sessionId: args.sid } }) + h?.dispatch({ type: 'agentChat/sessionInit', payload: { sessionId: args.sid, cliSessionId: args.cliSid } }) + h?.dispatch({ + type: 'panes/updatePaneContent', + payload: { + tabId: args.tid, + paneId: args.pid, + content: { + kind: 'agent-chat', + provider: 'freshclaude', + createRequestId: 'req-history', + sessionId: args.sid, + status: 'idle', + }, + }, + }) + }, { tid: tabId, pid: paneId, sid: sessionId, cliSid: cliSessionId }) + + const textarea = page.getByRole('textbox', { name: 'Chat message input' }) + await expect(textarea).toBeVisible() + return { tabId: tabId!, paneId, sessionId, textarea } + } + + test('ArrowUp cycles through sent messages', async ({ freshellPage, page, harness, terminal }) => { + const { textarea } = await setupAgentChatPane(page, harness, terminal) + + await textarea.click() + await page.keyboard.type('first message') + await page.keyboard.press('Enter') + await expect(textarea).toHaveValue('') + + await page.keyboard.type('second message') + await page.keyboard.press('Enter') + await expect(textarea).toHaveValue('') + + await page.keyboard.press('ArrowUp') + await expect(textarea).toHaveValue('second message') + + await page.keyboard.press('ArrowUp') + await expect(textarea).toHaveValue('first message') + + await page.keyboard.press('ArrowDown') + await expect(textarea).toHaveValue('second message') + + await page.keyboard.press('ArrowDown') + await expect(textarea).toHaveValue('') + }) + + test('ArrowUp preserves current draft when navigating away', async ({ freshellPage, page, harness, terminal }) => { + const { textarea } = await setupAgentChatPane(page, harness, terminal) + + await textarea.click() + await page.keyboard.type('history entry') + await page.keyboard.press('Enter') + await expect(textarea).toHaveValue('') + + await page.keyboard.type('my draft') + await page.keyboard.press('ArrowUp') + await expect(textarea).toHaveValue('history entry') + + await page.keyboard.press('ArrowDown') + await expect(textarea).toHaveValue('my draft') + }) + + test('history persists across page reload', async ({ freshellPage, page, harness, terminal, serverInfo }) => { + const { paneId, sessionId } = await setupAgentChatPane(page, harness, terminal) + + const textarea = page.getByRole('textbox', { name: 'Chat message input' }) + await textarea.click() + await page.keyboard.type('persistent message') + await page.keyboard.press('Enter') + await expect(textarea).toHaveValue('') + + await page.goto(`${serverInfo.baseUrl}/?token=${serverInfo.token}&e2e=1`) + await harness.waitForHarness() + await harness.waitForConnection() + + await page.waitForTimeout(1000) + + const localStorageData = await page.evaluate((pid: string) => { + return localStorage.getItem(`freshell.input-history.v1:${pid}`) + }, paneId) + expect(JSON.parse(localStorageData!)).toContain('persistent message') + }) + + test('history scoped per pane (different paneIds are independent)', async ({ freshellPage, page, harness, terminal }) => { + const { paneId: firstPaneId, sessionId } = await setupAgentChatPane(page, harness, terminal) + const textarea = page.getByRole('textbox', { name: 'Chat message input' }) + + await textarea.click() + await page.keyboard.type('pane-one message') + await page.keyboard.press('Enter') + await expect(textarea).toHaveValue('') + + const historyKey1 = await page.evaluate((pid: string) => { + return localStorage.getItem(`freshell.input-history.v1:${pid}`) + }, firstPaneId) + expect(JSON.parse(historyKey1!)).toEqual(['pane-one message']) + + const unrelatedKey = `freshell.input-history.v1:other-pane-${Date.now()}` + const unrelatedData = await page.evaluate((key: string) => { + return localStorage.getItem(key) + }, unrelatedKey) + expect(unrelatedData).toBeNull() + }) + + test('max 500 entries — oldest evicted', async ({ freshellPage, page, harness, terminal }) => { + const { paneId } = await setupAgentChatPane(page, harness, terminal) + + await page.evaluate((pid: string) => { + const entries: string[] = [] + for (let i = 0; i < 502; i++) { + entries.push(`entry-${i}`) + } + localStorage.setItem(`freshell.input-history.v1:${pid}`, JSON.stringify(entries)) + }, paneId) + + const stored = await page.evaluate((pid: string) => { + const raw = localStorage.getItem(`freshell.input-history.v1:${pid}`) + return JSON.parse(raw!) + }, paneId) + expect(stored).toHaveLength(502) + + const textarea = page.getByRole('textbox', { name: 'Chat message input' }) + await textarea.click() + await page.keyboard.type('overflow entry') + await page.keyboard.press('Enter') + await expect(textarea).toHaveValue('') + + const afterPush = await page.evaluate((pid: string) => { + const raw = localStorage.getItem(`freshell.input-history.v1:${pid}`) + return JSON.parse(raw!) + }, paneId) + expect(afterPush).toHaveLength(500) + expect(afterPush[0]).toBe('entry-3') + expect(afterPush[499]).toBe('overflow entry') + }) +}) +``` + +- [ ] **Step 2: Run the e2e browser tests** + +```bash +cd /home/user/code/freshell/.worktrees/add-input-history +npx playwright test test/e2e-browser/specs/agent-chat-input-history.spec.ts --project=chromium +``` + +Expected: all PASS + +- [ ] **Step 3: Refactor and verify** + +Review test helpers for reuse. Ensure no flaky timing. Re-run: + +```bash +npx playwright test test/e2e-browser/specs/agent-chat-input-history.spec.ts --project=chromium +``` + +Expected: all PASS + +- [ ] **Step 4: Commit** + +```bash +git add test/e2e-browser/specs/agent-chat-input-history.spec.ts +git commit -m "test: add Playwright e2e browser tests for agent chat input history" +``` + +--- + +## Task 6: Final verification and lint + +- [ ] **Step 1: Run the full test suite** + +Run: `npm test` +Expected: all PASS + +- [ ] **Step 2: Run lint** + +Run: `npm run lint` +Expected: no errors + +- [ ] **Step 3: Run typecheck** + +Run: `npx tsc --noEmit` +Expected: no errors + +- [ ] **Step 4: Final commit if any fixes needed** + +```bash +git add -A +git commit -m "chore: address lint/typecheck findings from input history feature" +``` diff --git a/docs/superpowers/plans/2026-03-22-mcp-orchestration-server-test-plan.md b/docs/superpowers/plans/2026-03-22-mcp-orchestration-server-test-plan.md index e8b0796db..3c3f9b944 100644 --- a/docs/superpowers/plans/2026-03-22-mcp-orchestration-server-test-plan.md +++ b/docs/superpowers/plans/2026-03-22-mcp-orchestration-server-test-plan.md @@ -25,7 +25,6 @@ | `test/unit/server/mcp/server.test.ts` | **New** | `server/mcp/server.ts` | | `test/unit/server/mcp/config-writer.test.ts` | **New** | `server/mcp/config-writer.ts` | | `test/unit/server/terminal-registry.test.ts` | **Modified** | `server/terminal-registry.ts` | -| `test/unit/server/freshell-orchestration-skill.test.ts` | **Modified** (add MCP tool content checks) | `.claude/skills/freshell-orchestration/SKILL.md` | --- @@ -313,7 +312,7 @@ This is a real child-process integration test (not mocked). It spawns the actual ### Helper Replacements -**Remove:** `expectClaudeTurnCompleteArgs()` (currently asserts `--plugin-dir` with `freshell-orchestration`) +**Remove:** `expectClaudeTurnCompleteArgs()` (currently asserts the legacy orchestration `--plugin-dir`) **Replace with:** `expectClaudeMcpArgs()` ```typescript @@ -413,18 +412,17 @@ No test logic changes beyond the helper swap are needed for these existing test --- -## File 6: `test/unit/server/freshell-orchestration-skill.test.ts` (Verified, may need update) +## File 6: `test/unit/server/mcp/freshell-tool.test.ts` (Expanded instruction coverage) -**Source:** `.claude/skills/freshell-orchestration/SKILL.md` +**Source:** `server/mcp/freshell-tool.ts` **Category:** Unit -**Status:** The plan modifies SKILL.md to add MCP tool preference. The existing test checks for specific content (`'If a target or name contains spaces, quote it.'`). This content is retained. **However**, a new test case should be added to verify the MCP section was added. +**Status:** The MCP tool is the canonical orchestration reference. Expand the existing help/instructions coverage instead of maintaining a separate standalone doc test. -### New Test Case (if SKILL.md is modified) +### New Test Case | # | Test Name | Asserts | |---|-----------|---------| -| 1 | `includes MCP tool section` | Read SKILL.md. Expect content to contain `'MCP tool'` or `'freshell MCP'` (confirming the new section exists). | -| 2 | `mentions CLI as fallback` | Read SKILL.md. Expect content to contain `'CLI fallback'` or `'fallback'` near the MCP section. | +| 1 | `help text documents rename defaults and rename playbook` | Call `executeAction('help')`. Expect the result to mention `rename-tab`, `rename-pane`, omitted-target semantics, and the create/split/rename playbook. | --- @@ -459,7 +457,7 @@ The implementation plan uses red-green-refactor TDD. Tests should be written and 3. **Task 4:** `test/unit/server/mcp/server.test.ts` -- write tests first (red), implement `server/mcp/server.ts` (green), refactor 4. **Task 5:** `test/unit/server/mcp/config-writer.test.ts` -- write tests first (red), implement `server/mcp/config-writer.ts` (green), refactor 5. **Task 6:** Modify `test/unit/server/terminal-registry.test.ts` -- update helpers (red), modify `server/terminal-registry.ts` (green), delete dead code, refactor -6. **Task 7:** Optionally update `test/unit/server/freshell-orchestration-skill.test.ts`, update SKILL.md +6. **Task 7:** Optionally expand `test/unit/server/mcp/freshell-tool.test.ts` to cover the canonical MCP help/instruction surface --- @@ -473,5 +471,4 @@ The implementation plan uses red-green-refactor TDD. Tests should be written and | `config-writer.test.ts` | 26 | 0 | | `config-writer-paths.test.ts` | 5 | 0 | | `terminal-registry.test.ts` | 11 | ~12 (helper swap) | -| `freshell-orchestration-skill.test.ts` | 2 | 0 | | **Total** | **114** | **~12** | diff --git a/docs/superpowers/plans/2026-03-22-mcp-orchestration-server.md b/docs/superpowers/plans/2026-03-22-mcp-orchestration-server.md index 9c8db5c6b..4f3e91ff5 100644 --- a/docs/superpowers/plans/2026-03-22-mcp-orchestration-server.md +++ b/docs/superpowers/plans/2026-03-22-mcp-orchestration-server.md @@ -22,7 +22,7 @@ After this lands: - Shell-mode terminals are unaffected -- no MCP injection for plain shells. - The existing CLI fallback path continues to work; agents that happen to be running outside Freshell or inside the repo itself can still use the CLI via `npx tsx server/cli/index.ts`. - Temp MCP config files are cleaned up when terminals exit. -- The orchestration skill SKILL.md is updated to prefer the MCP tool when available, with CLI as fallback. +- The MCP tool instructions in `server/mcp/freshell-tool.ts` become the canonical orchestration reference, with CLI examples retained inside the tool help text. ## Contracts And Invariants @@ -42,13 +42,13 @@ After this lands: 8. **Dev/production detection for MCP server command.** When `process.env.NODE_ENV === 'production'`, the config uses `node /dist/server/mcp/server.js`. Otherwise, it uses `node --import /node_modules/tsx/dist/esm/index.mjs /server/mcp/server.ts`. All paths are absolute, resolved from `import.meta.url` in `server/mcp/config-writer.ts`. The `--import` path must be absolute (not bare `tsx/esm`) because the MCP server process runs in the agent's cwd, not the repo root. -9. **Dead code removal is complete.** After MCP injection replaces skill/plugin injection: `claudePluginArgs()`, `codexOrchestrationSkillArgs()`, `codexSkillsDir()`, `encodeTomlString()`, `firstExistingPath()`, `firstExistingPaths()`, and all associated constants (`DEFAULT_FRESHELL_ORCHESTRATION_SKILL_DIR`, `LEGACY_FRESHELL_ORCHESTRATION_SKILL_DIR`, `DEFAULT_FRESHELL_DEMO_SKILL_DIR`, `LEGACY_FRESHELL_DEMO_SKILL_DIR`, `DEFAULT_FRESHELL_CLAUDE_PLUGIN_DIR`, `LEGACY_FRESHELL_CLAUDE_PLUGIN_DIR`, `DEFAULT_CODEX_HOME`) are removed from `terminal-registry.ts`. The orphaned `server/spawn-spec.ts` is deleted. **`.claude/plugins/freshell-orchestration/` is NOT deleted** — it is still used by `SdkBridge` for Claude Agent SDK sessions (agent-chat panes). `SdkBridge.DEFAULT_PLUGIN_CANDIDATES` and its tests are left unchanged. The plugin directory will be replaced with MCP in a future follow-up that adds MCP support to SdkBridge sessions. +9. **Dead code removal is complete.** After MCP injection replaces skill/plugin injection: `claudePluginArgs()`, `codexOrchestrationSkillArgs()`, `codexSkillsDir()`, `encodeTomlString()`, `firstExistingPath()`, `firstExistingPaths()`, and all associated constants (`DEFAULT_FRESHELL_ORCHESTRATION_SKILL_DIR`, `LEGACY_FRESHELL_ORCHESTRATION_SKILL_DIR`, `DEFAULT_FRESHELL_DEMO_SKILL_DIR`, `LEGACY_FRESHELL_DEMO_SKILL_DIR`, `DEFAULT_FRESHELL_CLAUDE_PLUGIN_DIR`, `LEGACY_FRESHELL_CLAUDE_PLUGIN_DIR`, `DEFAULT_CODEX_HOME`) are removed from `terminal-registry.ts`. The orphaned `server/spawn-spec.ts` is deleted. In the follow-up safe cleanup, `SdkBridge.DEFAULT_PLUGIN_CANDIDATES` is removed, the legacy orchestration plugin/skill wrapper files are deleted, and agent-chat panes rely on MCP for Freshell orchestration while still allowing explicit plugin arrays for unrelated Claude SDK bundles. -10. **Test assertions update cleanly.** The existing `expectClaudeTurnCompleteArgs()` and `expectCodexTurnCompleteArgs()` test helpers in `terminal-registry.test.ts` are replaced with MCP-aware equivalents. The `freshell-orchestration-skill.test.ts` is updated to verify the new MCP tool section was added to SKILL.md (Task 7 adds MCP preference text; the test gains 2 new assertions for MCP content). +10. **Test assertions update cleanly.** The existing `expectClaudeTurnCompleteArgs()` and `expectCodexTurnCompleteArgs()` test helpers in `terminal-registry.test.ts` are replaced with MCP-aware equivalents. The canonical orchestration docs coverage lives in `test/unit/server/mcp/freshell-tool.test.ts`; no separate standalone doc test remains. ## Root Cause Summary -- **Claude Code:** `--plugin-dir` points to `.claude/plugins/freshell-orchestration/` which contains a `skills/freshell-orchestration` file that should be a symlink but is a plain text file containing `../../../skills/freshell-orchestration` (git `core.symlinks=false` on WSL). +- **Claude Code:** `--plugin-dir` pointed at a legacy Freshell orchestration wrapper whose pointer file was brittle on WSL (`core.symlinks=false`), so plugin-based orchestration was an unreliable transport. - **Codex:** `-c skills.config=[{path=..., enabled=true}]` attempts TOML array-of-tables syntax via the `-c` dotted-path flag. The `-c` flag parses the value as TOML, but a TOML inline array containing inline tables with embedded paths is fragile and silently fails depending on Codex's TOML parser version. - **Gemini/Kimi/OpenCode:** `providerNotificationArgs()` returns `[]` for these modes -- no orchestration injection attempted at all. @@ -99,16 +99,20 @@ No user decision is required. | `package.json` | Add `@modelcontextprotocol/sdk` dependency | | `server/terminal-registry.ts` | (1) Import `generateMcpInjection` and `cleanupMcpConfig` from `./mcp/config-writer.js`. (2) Thread `terminalId` and `cwd` through `buildSpawnSpec()` → `resolveCodingCliCommand()` → `providerNotificationArgs()`, where `generateMcpInjection()` is called and its args/env are merged into the existing call chain (MCP args flow through the same pipeline as notification args, correctly placed inside any shell wrapping). (3) Add `cleanupMcpConfig()` in `onExit`, `kill()`, and spawn-failure catch. (4) Remove dead code: `claudePluginArgs()`, `codexOrchestrationSkillArgs()`, `codexSkillsDir()`, `encodeTomlString()`, `firstExistingPath()`, `firstExistingPaths()`, and associated constants. (5) Update `providerNotificationArgs()` to remove plugin/skill calls (keep bell hooks). | | `test/unit/server/terminal-registry.test.ts` | Replace `expectClaudeTurnCompleteArgs()` and `expectCodexTurnCompleteArgs()` helpers with MCP-aware equivalents. Add test cases for Gemini, Kimi, OpenCode MCP injection. | -| ~~`server/sdk-bridge.ts`~~ | NOT modified — SdkBridge still uses the plugin directory for agent-chat sessions. Left unchanged. | -| `.claude/skills/freshell-orchestration/SKILL.md` | Add MCP tool preference section at top, keep CLI reference as fallback. | +| `server/sdk-bridge.ts` | Remove the default legacy orchestration plugin fallback while preserving explicit plugin arrays, and rely on MCP for Freshell orchestration in agent-chat panes. | +| `server/mcp/freshell-tool.ts` | Keep the canonical orchestration instructions and `help` text aligned with the `freshell` MCP surface. | ### Files to Delete | File | Reason | |------|--------| | `server/spawn-spec.ts` | Orphaned duplicate of `buildSpawnSpec()` -- nothing imports it at runtime (verified: only referenced in docs/plans). | +| `legacy Freshell orchestration plugin wrapper` | Legacy Claude-only orchestration wrapper replaced by MCP for both terminals and agent-chat panes. | +| `legacy Freshell orchestration wrapper` | Replaced by the canonical MCP tool instructions in `server/mcp/freshell-tool.ts`. | +| `legacy Codex orchestration pointer` | No longer used for orchestration. | +| `obsolete standalone orchestration doc test` | Coverage lives in `test/unit/server/mcp/freshell-tool.test.ts`. | -**Note:** `.claude/plugins/freshell-orchestration/` is NOT deleted. Although it contains a broken symlink on WSL, it is still used by `SdkBridge` for Claude Agent SDK sessions. Replacing plugin-based orchestration in SdkBridge with MCP is a separate follow-up. +**Note:** The follow-up safe cleanup removes the remaining orchestration wrapper files after `SdkBridge` stops defaulting them. --- @@ -462,8 +466,8 @@ Replace the broken `providerNotificationArgs()` / `claudePluginArgs()` / `codexO - Modify: `server/terminal-registry.ts` - Modify: `test/unit/server/terminal-registry.test.ts` - Delete: `server/spawn-spec.ts` -- NOT deleting: `.claude/plugins/freshell-orchestration/` (still used by SdkBridge) -- NOT modifying: `server/sdk-bridge.ts` or `test/unit/server/sdk-bridge.test.ts` (SdkBridge plugin behavior unchanged) +- Modify: `server/sdk-bridge.ts` +- Modify: `test/unit/server/sdk-bridge.test.ts` - [ ] **Step 1: Update existing tests to expect MCP injection** @@ -593,7 +597,7 @@ grep -r "from.*spawn-spec\|import.*spawn-spec\|require.*spawn-spec" server/ src/ ``` Expected: no matches -**Note:** `.claude/plugins/freshell-orchestration/` is NOT deleted — still used by SdkBridge for agent-chat sessions. +**Note:** The later safe cleanup deletes the legacy orchestration plugin wrapper after `SdkBridge` stops defaulting it. - [ ] **Step 5: Run tests to verify they pass** @@ -625,43 +629,26 @@ SdkBridge agent-chat sessions." --- -### Task 7: Update Orchestration Skill Content +### Task 7: Keep MCP Tool Instructions Canonical -The SKILL.md told agents to use the CLI via `npx tsx server/cli/index.ts`. Now the MCP tool is the preferred path. Update the skill for contexts where it's still discovered natively (e.g., agents working inside the Freshell repo). +The canonical orchestration reference now lives in `server/mcp/freshell-tool.ts`. Keep its `INSTRUCTIONS` and `HELP_TEXT` aligned with the supported MCP surface. **Files:** -- Modify: `.claude/skills/freshell-orchestration/SKILL.md` +- Modify: `server/mcp/freshell-tool.ts` -- [ ] **Step 1: Update SKILL.md to mention MCP tool** +- [ ] **Step 1: Update the `freshell` MCP tool instructions and help text** -Add a new section after "Start state" and before "Mental model": +Ensure the tool description, instructions, and `help` output cover: +- preferred use of the `freshell` MCP tool +- CLI examples only as fallback guidance +- concrete rename examples and caller-target defaults where supported -```markdown -## MCP tool (preferred) - -If you have the `freshell` MCP tool available, use it directly -- it's faster and doesn't require Bash approval for each command. - -Quick start: -- `freshell({action: "help"})` -- see all available commands -- `freshell({action: "list-tabs"})` -- see open tabs -- `freshell({action: "new-tab", params: {name: "Work", mode: "claude"}})` -- create a tab -- `freshell({action: "send-keys", params: {target: "p1", keys: "hello\n"}})` -- send input - -The MCP tool accepts the same commands as the CLI below but with structured JSON input instead of positional arguments. - -## CLI fallback - -If the MCP tool is not available (e.g., running outside Freshell or in a context without MCP support), use the CLI: -``` - -Keep the rest of the file unchanged -- it serves as reference for the CLI fallback path and as documentation for the MCP tool's action names. - -- [ ] **Step 2: Verify the orchestration skill test still passes** +- [ ] **Step 2: Verify the canonical MCP help/instruction test still passes** ```bash -npm run test:vitest -- --run test/unit/server/freshell-orchestration-skill.test.ts +npm run test:vitest -- --config vitest.server.config.ts --run test/unit/server/mcp/freshell-tool.test.ts ``` -Expected: PASS (this test checks for specific content in the SKILL.md) +Expected: PASS - [ ] **Step 3: Run full test suite one final time** @@ -673,8 +660,8 @@ Expected: Typecheck + all tests pass - [ ] **Step 4: Commit** ```bash -git add .claude/skills/freshell-orchestration/SKILL.md -git commit -m "docs: update orchestration skill to prefer MCP tool over CLI" +git add server/mcp/freshell-tool.ts test/unit/server/mcp/freshell-tool.test.ts +git commit -m "docs: keep freshell MCP tool instructions canonical" ``` --- diff --git a/docs/superpowers/plans/2026-04-05-session-sidebar-title-hardening.md b/docs/superpowers/plans/2026-04-05-session-sidebar-title-hardening.md new file mode 100644 index 000000000..a9f99d880 --- /dev/null +++ b/docs/superpowers/plans/2026-04-05-session-sidebar-title-hardening.md @@ -0,0 +1,855 @@ +# Session Sidebar Title Hardening Implementation Plan + +> **For Claude:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make inferred session titles durable, keep active or running sessions visible in the sidebar, and improve lightweight Codex title recovery so long-lived sessions do not disappear from the left pane. + +**Architecture:** Treat this as one indexing contract fix spanning server and client. The server becomes the durable source of inferred titles via a sticky metadata field plus monotonic index merges, the client selector merges live tab state into server rows instead of suppressing it, and the lightweight scan learns enough of the Codex JSONL shape to recover titles during cold start without waiting for full enrichment. Keep user session overrides authoritative and preserve the existing `hideEmptySessions` behavior for inactive, truly titleless sessions. + +**Tech Stack:** TypeScript, Node.js, Express, React 18, Redux Toolkit, Vitest, Testing Library, Supertest. + +--- + +## Scope Check + +These three fixes are tightly coupled parts of the same failure mode: + +1. server-side sticky derived titles +2. client-side sidebar visibility invariants +3. lightweight Codex title recovery + +Keep them in one plan so the same regression harnesses prove the full user-visible behavior. + +Spec source: the current Freshell session thread about sticky derived titles, sidebar visibility for active sessions, and lightweight Codex title recovery. There is no separate spec document. + +## Guardrails + +- Execute only from a dedicated git worktree under `.worktrees/`. Do not edit `main` directly. +- Follow Red-Green-Refactor for every code change in this plan. +- Preserve precedence: user rename override > freshly parsed title > previous cached title > persisted derived title. +- Do not let session metadata writes become unbounded. Only persist `derivedTitle` when the non-empty value changes. +- `hideEmptySessions` may still hide inactive, truly empty sessions. It must not hide sessions that are open in a tab or actively running. +- Keep `readLightweightMeta()` bounded to head/tail reads. Do not turn cold start back into a full-file parse. +- Keep `POST /api/session-metadata` backward compatible. Updating `sessionType` must not clear stored `derivedTitle`. +- Scope this change to file-backed providers (`claude`, `codex`, and other JSONL-backed providers). Do not widen the direct-listing provider contract in this pass. + +## File Structure Map + +- Modify: `server/session-metadata-store.ts` + - Purpose: persist a sticky `derivedTitle` field and merge metadata patches instead of overwriting sibling fields. +- Modify: `test/unit/server/session-metadata-store.test.ts` + - Purpose: lock in merged metadata semantics and defensive-copy behavior for `derivedTitle`. +- Modify: `test/integration/server/session-metadata-api.test.ts` + - Purpose: prove the existing session metadata API still updates `sessionType` without clearing stored sticky titles. +- Modify: `server/coding-cli/session-indexer.ts` + - Purpose: preserve non-empty titles across reparses, hydrate lightweight rows from persisted metadata, persist newly discovered titles, and improve lightweight Codex title extraction. +- Modify: `test/unit/server/coding-cli/session-indexer.test.ts` + - Purpose: capture title-preservation regressions, cold-start metadata hydration, metadata persistence, and lightweight Codex title recovery. +- Modify: `src/store/selectors/sidebarSelectors.ts` + - Purpose: merge fallback live-tab data into matching server-backed rows and guarantee active/running sessions are not hidden by `hideEmptySessions`. +- Modify: `test/unit/client/store/selectors/sidebarSelectors.test.ts` + - Purpose: prove a titleless indexed row is upgraded by matching tab state instead of shadowing the fallback row. +- Modify: `test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts` + - Purpose: prove empty-session filtering still hides inactive empty rows but keeps open/running ones visible. +- Modify: `test/e2e/open-tab-session-sidebar-visibility.test.tsx` + - Purpose: reproduce the user-visible regression through `App`, including HTTP-owned sidebar refresh and an open Codex tab whose indexed row is titleless. + +## Chunk 1: Persist And Preserve Derived Titles + +### Task 1: Extend session metadata storage for sticky inferred titles + +**Files:** +- Modify: `server/session-metadata-store.ts` +- Test: `test/unit/server/session-metadata-store.test.ts` +- Test: `test/integration/server/session-metadata-api.test.ts` + +- [ ] **Step 1: Write the failing metadata-store unit tests** + +```ts +it('merges derivedTitle into an existing metadata record', async () => { + await store.set('codex', 'sess-1', { sessionType: 'codex' }) + await store.set('codex', 'sess-1', { derivedTitle: 'Investigate sidebar visibility' }) + + expect(await store.get('codex', 'sess-1')).toEqual({ + sessionType: 'codex', + derivedTitle: 'Investigate sidebar visibility', + }) +}) + +it('returns defensive copies that include derivedTitle', async () => { + await store.set('codex', 'sess-2', { derivedTitle: 'Sticky title' }) + + const entry = await store.get('codex', 'sess-2') + entry!.derivedTitle = 'mutated' + + expect((await store.get('codex', 'sess-2'))?.derivedTitle).toBe('Sticky title') +}) +``` + +- [ ] **Step 2: Write the failing integration test for the existing metadata API** + +```ts +it('preserves derivedTitle when the session metadata API updates sessionType', async () => { + await sessionMetadataStore.set('claude', 'sess-123', { derivedTitle: 'Sticky title' }) + + const res = await request(app) + .post('/api/session-metadata') + .set('x-auth-token', TEST_AUTH_TOKEN) + .send({ provider: 'claude', sessionId: 'sess-123', sessionType: 'agent' }) + + expect(res.status).toBe(200) + expect(await sessionMetadataStore.get('claude', 'sess-123')).toEqual({ + sessionType: 'agent', + derivedTitle: 'Sticky title', + }) +}) +``` + +- [ ] **Step 3: Run the new red tests** + +Run: `npm run test:vitest -- test/unit/server/session-metadata-store.test.ts test/integration/server/session-metadata-api.test.ts` + +Expected: FAIL because `SessionMetadataEntry` does not include `derivedTitle`, and `set()` currently overwrites the whole record instead of merging. + +- [ ] **Step 4: Implement sticky-title metadata support** + +```ts +export interface SessionMetadataEntry { + sessionType?: string + derivedTitle?: string +} + +sessions[provider][sessionId] = { + ...(sessions[provider][sessionId] ?? {}), + ...entry, +} +``` + +Implementation notes: +- Keep `set()` copy-safe by cloning the merged object before saving. +- Do not special-case the API route; the store merge semantics should make it safe automatically. + +- [ ] **Step 5: Run the server metadata tests to verify green** + +Run: `npm run test:vitest -- test/unit/server/session-metadata-store.test.ts test/integration/server/session-metadata-api.test.ts` + +Expected: PASS + +- [ ] **Step 6: Commit the metadata-store change** + +```bash +git add server/session-metadata-store.ts test/unit/server/session-metadata-store.test.ts test/integration/server/session-metadata-api.test.ts +git commit -m "feat: persist sticky derived session titles" +``` + +### Task 2: Lock in monotonic title resolution inside the session indexer + +**Files:** +- Modify: `server/coding-cli/session-indexer.ts` +- Test: `test/unit/server/coding-cli/session-indexer.test.ts` + +- [ ] **Step 1: Write the failing indexer regression tests** + +```ts +it('keeps an existing non-empty title when the same session reparses without one', async () => { + const fileA = path.join(tempDir, 'session-a.jsonl') + await fsp.writeFile(fileA, JSON.stringify({ cwd: '/project/a', title: 'Original title' }) + '\n') + + const provider = makeProvider([fileA]) + const indexer = new CodingCliSessionIndexer([provider]) + await indexer.refresh() + + await fsp.writeFile(fileA, JSON.stringify({ cwd: '/project/a' }) + '\n') + ;(indexer as any).markDirty(fileA) + await indexer.refresh() + + expect(indexer.getProjects()[0]?.sessions[0]?.title).toBe('Original title') +}) + +it('hydrates a cold-start lightweight row from metadata-store derivedTitle when parsing finds no title', async () => { + const files: string[] = [] + const sessionId = 'older-codex-session' + const fileA = path.join(tempDir, `${sessionId}.jsonl`) + await fsp.writeFile(fileA, JSON.stringify({ cwd: '/project/a' }) + '\n') + await fsp.utimes(fileA, new Date(2026, 0, 1), new Date(2026, 0, 1)) + files.push(fileA) + + for (let i = 0; i < 151; i += 1) { + const file = path.join(tempDir, `recent-${i}.jsonl`) + await fsp.writeFile(file, JSON.stringify({ cwd: `/project/${i}`, title: `Recent ${i}` }) + '\n') + files.push(file) + } + + const provider = makeProvider(files, { name: 'codex' }) + const metadataStore = mockMetadataStore({ + [makeSessionKey('codex', sessionId)]: { derivedTitle: 'Sticky old title' }, + }) + + vi.mocked(configStore.snapshot).mockResolvedValue({ + sessionOverrides: {}, + settings: { codingCli: { enabledProviders: ['codex'], providers: {} } }, + }) + + const indexer = new CodingCliSessionIndexer([provider], {}, metadataStore) + await indexer.refresh() + + const olderSession = indexer.getProjects() + .flatMap((group) => group.sessions) + .find((session) => session.sessionId === sessionId) + + expect(olderSession?.title).toBe('Sticky old title') +}) + +it('persists a newly parsed non-empty title to the metadata store', async () => { + const fileA = path.join(tempDir, 'session-b.jsonl') + await fsp.writeFile(fileA, JSON.stringify({ cwd: '/project/a', title: 'Fresh title' }) + '\n') + + const metadataStore = mockMetadataStore({}) + metadataStore.set = vi.fn().mockResolvedValue(undefined) + + const indexer = new CodingCliSessionIndexer([makeProvider([fileA])], {}, metadataStore) + await indexer.refresh() + + expect(metadataStore.set).toHaveBeenCalledWith('claude', 'session-b', { + derivedTitle: 'Fresh title', + }) +}) + +it('does not rewrite derivedTitle when the parsed title matches the stored title', async () => { + const fileA = path.join(tempDir, 'session-c.jsonl') + await fsp.writeFile(fileA, JSON.stringify({ cwd: '/project/a', title: 'Stable title' }) + '\n') + + const metadataStore = mockMetadataStore({ + [makeSessionKey('claude', 'session-c')]: { derivedTitle: 'Stable title' }, + }) + metadataStore.set = vi.fn().mockResolvedValue(undefined) + + const indexer = new CodingCliSessionIndexer([makeProvider([fileA])], {}, metadataStore) + await indexer.refresh() + + expect(metadataStore.set).not.toHaveBeenCalled() +}) + +it('resolves title precedence as parsed title, then previous cached title, then stored derivedTitle', async () => { + const fileA = path.join(tempDir, 'session-d.jsonl') + await fsp.writeFile(fileA, JSON.stringify({ cwd: '/project/a', title: 'Parsed title' }) + '\n') + + const metadataStore = mockMetadataStore({ + [makeSessionKey('claude', 'session-d')]: { derivedTitle: 'Stored title' }, + }) + + const indexer = new CodingCliSessionIndexer([makeProvider([fileA])], {}, metadataStore) + await indexer.refresh() + expect(indexer.getProjects()[0]?.sessions[0]?.title).toBe('Parsed title') + + await fsp.writeFile(fileA, JSON.stringify({ cwd: '/project/a' }) + '\n') + ;(indexer as any).markDirty(fileA) + await indexer.refresh() + + expect(indexer.getProjects()[0]?.sessions[0]?.title).toBe('Parsed title') +}) +``` + +- [ ] **Step 2: Run the new red tests** + +Run: `npm run test:vitest -- test/unit/server/coding-cli/session-indexer.test.ts` + +Expected: FAIL because the indexer demotes `title` to `undefined`, does not read `derivedTitle` from metadata, and never persists parsed titles back to the metadata store. + +- [ ] **Step 3: Implement monotonic title resolution in the indexer** + +```ts +const metaKey = makeSessionKey(provider.name, sessionId) +const storedTitle = sessionMetadata[metaKey]?.derivedTitle?.trim() +const parsedTitle = meta.title?.trim() +const resolvedTitle = parsedTitle || previous?.title || storedTitle + +const baseSession: CodingCliSession = { + ..., + title: resolvedTitle, +} + +if (this.sessionMetadataStore && parsedTitle && parsedTitle !== storedTitle) { + await this.sessionMetadataStore.set(provider.name, sessionId, { derivedTitle: parsedTitle }) +} +``` + +Implementation notes: +- Use the same resolution order for both lightweight cache entries and full cache entries. +- Update the local `mockMetadataStore()` helper in `test/unit/server/coding-cli/session-indexer.test.ts` so its entry shape accepts `derivedTitle` alongside `sessionType`. +- Persist only the freshly parsed non-empty title, not the fallback title from user overrides. +- Leave direct-listing providers alone in this change. They do not go through `readLightweightMeta()` and are not part of the regression being fixed. +- Keep `sessionType` merge behavior unchanged. + +- [ ] **Step 4: Run the full indexer unit suite to verify green** + +Run: `npm run test:vitest -- test/unit/server/coding-cli/session-indexer.test.ts` + +Expected: PASS + +- [ ] **Step 5: Commit the indexer durability change** + +```bash +git add server/coding-cli/session-indexer.ts test/unit/server/coding-cli/session-indexer.test.ts +git commit -m "fix: preserve inferred session titles across refreshes" +``` + +## Chunk 2: Keep Active Sessions Visible In The Sidebar + +### Task 3: Add selector-level regressions for fallback merge and visibility invariants + +**Files:** +- Modify: `src/store/selectors/sidebarSelectors.ts` +- Test: `test/unit/client/store/selectors/sidebarSelectors.test.ts` +- Test: `test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts` + +- [ ] **Step 1: Write the failing selector merge test** + +```ts +it('merges open-tab fallback data into a matching server-backed titleless session', () => { + const sessionId = 'claude-current' + const items = buildSessionItems( + [makeProject([{ provider: 'claude', sessionId, title: undefined, lastActivityAt: 10 }])], + [{ + id: 'tab-1', + title: 'Current Session', + mode: 'claude', + resumeSessionId: sessionId, + createdAt: 20_000, + sessionMetadataByKey: { + 'claude:claude-current': { + sessionType: 'freshclaude', + firstUserMessage: 'IMPORTANT: internal trycycle task', + isSubagent: true, + isNonInteractive: true, + }, + }, + }] as any, + { + layouts: { + 'tab-1': { + type: 'leaf', + id: 'pane-1', + content: { + kind: 'terminal', + mode: 'claude', + status: 'running', + createRequestId: 'req-1', + resumeSessionId: sessionId, + initialCwd: '/repo', + }, + }, + }, + activePane: { 'tab-1': 'pane-1' }, + paneTitles: { 'tab-1': { 'pane-1': 'Current Session' } }, + } as any, + emptyTerminals, + emptyActivity, + ) + + expect(items).toEqual([ + expect.objectContaining({ + sessionId, + provider: 'claude', + title: 'Current Session', + hasTitle: true, + hasTab: true, + sessionType: 'freshclaude', + firstUserMessage: 'IMPORTANT: internal trycycle task', + isSubagent: true, + isNonInteractive: true, + isFallback: undefined, + }), + ]) +}) +``` + +- [ ] **Step 2: Write the failing visibility tests** + +```ts +it('keeps titleless sessions visible when they have an open tab', () => { + const result = filterSessionItemsByVisibility([ + createSessionItem({ id: '1', title: 'deadbeef', hasTitle: false, hasTab: true }), + ], { + ...baseSettings, + showSubagents: true, + showNoninteractiveSessions: true, + hideEmptySessions: true, + }) + + expect(result.map((item) => item.id)).toEqual(['1']) +}) + +it('keeps titleless sessions visible when they are running', () => { + const result = filterSessionItemsByVisibility([ + createSessionItem({ id: '1', title: 'deadbeef', hasTitle: false, isRunning: true }), + ], { + ...baseSettings, + showSubagents: true, + showNoninteractiveSessions: true, + hideEmptySessions: true, + }) + + expect(result.map((item) => item.id)).toEqual(['1']) +}) +``` + +- [ ] **Step 3: Run the selector tests to verify red** + +Run: `npm run test:vitest -- test/unit/client/store/selectors/sidebarSelectors.test.ts test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts` + +Expected: FAIL because `knownKeys` suppresses the fallback row entirely and `hideEmptySessions` still drops the indexed row. + +### Task 4: Add an App-level regression for an open Codex session with a titleless indexed row + +**Files:** +- Modify: `test/e2e/open-tab-session-sidebar-visibility.test.tsx` + +- [ ] **Step 1: Write the failing App regression** + +```tsx +it('keeps an open Codex session visible when the indexed sidebar row is titleless', async () => { + const sessionId = 'codex-current' + fetchSidebarSessionsSnapshot.mockResolvedValue({ + projects: [ + { + projectPath: '/repo', + sessions: [ + { + provider: 'codex', + sessionId, + projectPath: '/repo', + lastActivityAt: 40, + title: undefined, + cwd: '/repo', + }, + ], + }, + ], + totalSessions: 1, + oldestIncludedTimestamp: 40, + oldestIncludedSessionId: `codex:${sessionId}`, + hasMore: false, + }) + + const store = createStore({ + tabs: [{ + id: 'tab-1', + title: 'Investigate sidebar visibility', + mode: 'codex', + resumeSessionId: sessionId, + createdAt: Date.now(), + }], + panes: { + layouts: { + 'tab-1': { + type: 'leaf', + id: 'pane-1', + content: { + kind: 'terminal', + mode: 'codex', + createRequestId: 'req-1', + status: 'running', + resumeSessionId: sessionId, + initialCwd: '/repo', + }, + }, + }, + activePane: { 'tab-1': 'pane-1' }, + paneTitles: { 'tab-1': { 'pane-1': 'Investigate sidebar visibility' } }, + }, + }) + + render() + + await waitFor(() => { + expect(screen.getAllByText('Investigate sidebar visibility').length).toBeGreaterThan(0) + }) + + act(() => { + broadcastWs({ type: 'sessions.changed', revision: 1 }) + }) + + await waitFor(() => { + expect(fetchSidebarSessionsSnapshot).toHaveBeenCalled() + expect(screen.getAllByText('Investigate sidebar visibility').length).toBeGreaterThan(0) + }) +}) +``` + +- [ ] **Step 2: Run the new e2e regression to verify red** + +Run: `npm run test:vitest -- test/e2e/open-tab-session-sidebar-visibility.test.tsx` + +Expected: FAIL because the server-backed row shadows the fallback row and the empty-session filter hides it. + +### Task 5: Implement sidebar row merging and the active-session visibility invariant + +**Files:** +- Modify: `src/store/selectors/sidebarSelectors.ts` +- Test: `test/unit/client/store/selectors/sidebarSelectors.test.ts` +- Test: `test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts` +- Test: `test/e2e/open-tab-session-sidebar-visibility.test.tsx` + +- [ ] **Step 1: Merge fallback row data into an existing item instead of discarding it** + +```ts +const existing = itemsByKey.get(key) +if (existing) { + existing.hasTab = true + existing.timestamp = Math.max(existing.timestamp, input.timestamp ?? 0) + if (!existing.hasTitle && input.title?.trim()) { + existing.title = input.title.trim() + existing.hasTitle = true + } + const fallbackSessionType = input.metadata?.sessionType || input.sessionType + if (fallbackSessionType && (!existing.sessionType || existing.sessionType === existing.provider)) { + existing.sessionType = fallbackSessionType + } + if (!existing.cwd && input.cwd) existing.cwd = input.cwd + if (!existing.firstUserMessage && input.metadata?.firstUserMessage) { + existing.firstUserMessage = input.metadata.firstUserMessage + } + if (existing.isSubagent === undefined && input.metadata?.isSubagent !== undefined) { + existing.isSubagent = input.metadata.isSubagent + } + if (existing.isNonInteractive === undefined && input.metadata?.isNonInteractive !== undefined) { + existing.isNonInteractive = input.metadata.isNonInteractive + } + return +} +``` + +Implementation notes: +- Keep one row per `provider:sessionId`. +- Server-backed rows should remain server-backed; do not flip them to `isFallback`. +- Prefer the existing non-empty server title if one already exists. +- Preserve fallback metadata that affects filtering and labeling: `sessionType`, `firstUserMessage`, `isSubagent`, and `isNonInteractive`. + +- [ ] **Step 2: Update empty-session filtering so open/running rows survive** + +```ts +if (settings.hideEmptySessions && !item.hasTitle && !item.hasTab && !item.isRunning) { + return false +} +``` + +- [ ] **Step 3: Run the selector and e2e tests to verify green** + +Run: `npm run test:vitest -- test/unit/client/store/selectors/sidebarSelectors.test.ts test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts test/e2e/open-tab-session-sidebar-visibility.test.tsx` + +Expected: PASS + +- [ ] **Step 4: Commit the sidebar selector hardening** + +```bash +git add src/store/selectors/sidebarSelectors.ts test/unit/client/store/selectors/sidebarSelectors.test.ts test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts test/e2e/open-tab-session-sidebar-visibility.test.tsx +git commit -m "fix: keep active sessions visible in the sidebar" +``` + +## Chunk 3: Improve Lightweight Codex Title Recovery + +### Task 6: Add a cold-start regression for older Codex sessions outside the enrichment batch + +**Files:** +- Modify: `server/coding-cli/session-indexer.ts` +- Test: `test/unit/server/coding-cli/session-indexer.test.ts` + +- [ ] **Step 1: Write the failing lightweight Codex title test** + +```ts +it('extracts a lightweight title from a Codex response_item user message on cold start', async () => { + const files: string[] = [] + for (let i = 0; i < 151; i += 1) { + const file = path.join(tempDir, `recent-${i}.jsonl`) + await fsp.writeFile(file, [ + JSON.stringify({ type: 'session_meta', payload: { id: `recent-${i}`, cwd: `/project/${i}` } }), + JSON.stringify({ + timestamp: new Date(2026, 3, 5, 12, i).toISOString(), + type: 'response_item', + payload: { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: `Recent task ${i}` }], + }, + }), + ].join('\n')) + files.push(file) + } + + const olderSessionId = 'older-codex-session' + const olderFile = path.join(tempDir, `${olderSessionId}.jsonl`) + await fsp.writeFile(olderFile, [ + JSON.stringify({ type: 'session_meta', payload: { id: olderSessionId, cwd: '/project/older' } }), + JSON.stringify({ + timestamp: new Date(2026, 0, 1).toISOString(), + type: 'response_item', + payload: { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: 'Investigate sidebar visibility' }], + }, + }), + ].join('\n')) + files.push(olderFile) + + vi.mocked(configStore.snapshot).mockResolvedValue({ + sessionOverrides: {}, + settings: { codingCli: { enabledProviders: ['codex'], providers: {} } }, + }) + + const provider = makeProvider(files, { + name: 'codex', + parseSessionFile: codexProvider.parseSessionFile, + }) + + const indexer = new CodingCliSessionIndexer([provider], { fullScanIntervalMs: 0 }) + await indexer.refresh() + + const olderSession = indexer.getProjects() + .flatMap((group) => group.sessions) + .find((session) => session.sessionId === olderSessionId) + + expect(olderSession?.title).toBe('Investigate sidebar visibility') +}) +``` + +- [ ] **Step 2: Add the failing lightweight system-context guard test** + +```ts +it('does not synthesize a lightweight title from older system-context user records', async () => { + const files: string[] = [] + const systemOnlyId = 'system-only' + const fileA = path.join(tempDir, `${systemOnlyId}.jsonl`) + await fsp.writeFile(fileA, JSON.stringify({ + sessionId: systemOnlyId, + cwd: '/project/a', + role: 'user', + content: '\n /project/a\n', + timestamp: new Date(2026, 0, 1).toISOString(), + }) + '\n') + await fsp.utimes(fileA, new Date(2026, 0, 1), new Date(2026, 0, 1)) + files.push(fileA) + + for (let i = 0; i < 151; i += 1) { + const file = path.join(tempDir, `recent-system-${i}.jsonl`) + await fsp.writeFile(file, [ + JSON.stringify({ type: 'session_meta', payload: { id: `recent-system-${i}`, cwd: `/project/${i}` } }), + JSON.stringify({ + timestamp: new Date(2026, 3, 5, 12, i).toISOString(), + type: 'response_item', + payload: { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: `Recent task ${i}` }], + }, + }), + ].join('\n')) + files.push(file) + } + + vi.mocked(configStore.snapshot).mockResolvedValue({ + sessionOverrides: {}, + settings: { codingCli: { enabledProviders: ['codex'], providers: {} } }, + }) + + const provider = makeProvider(files, { + name: 'codex', + parseSessionFile: codexProvider.parseSessionFile, + }) + + const indexer = new CodingCliSessionIndexer([provider], { fullScanIntervalMs: 0 }) + await indexer.refresh() + + const systemOnlySession = indexer.getProjects() + .flatMap((group) => group.sessions) + .find((session) => session.sessionId === systemOnlyId) + + expect(systemOnlySession?.title).toBeUndefined() +}) +``` + +- [ ] **Step 3: Add the failing IDE-context lightweight title test** + +```ts +it('extracts lightweight Codex titles from IDE-context messages', async () => { + const ideSessionId = 'ide-context-session' + const ideFile = path.join(tempDir, `${ideSessionId}.jsonl`) + await fsp.writeFile(ideFile, [ + JSON.stringify({ type: 'session_meta', payload: { id: ideSessionId, cwd: '/project/ide' } }), + JSON.stringify({ + timestamp: new Date(2026, 0, 1).toISOString(), + type: 'response_item', + payload: { + type: 'message', + role: 'user', + content: [{ + type: 'input_text', + text: '# Context from my IDE setup:\n\n## My request for Codex:\nFix the authentication bug in the login form', + }], + }, + }), + ].join('\n')) + await fsp.utimes(ideFile, new Date(2026, 0, 1), new Date(2026, 0, 1)) + + const files = [ideFile] + for (let i = 0; i < 151; i += 1) { + const file = path.join(tempDir, `recent-ide-${i}.jsonl`) + await fsp.writeFile(file, [ + JSON.stringify({ type: 'session_meta', payload: { id: `recent-ide-${i}`, cwd: `/project/${i}` } }), + JSON.stringify({ + timestamp: new Date(2026, 3, 5, 12, i).toISOString(), + type: 'response_item', + payload: { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: `Recent task ${i}` }], + }, + }), + ].join('\n')) + files.push(file) + } + + vi.mocked(configStore.snapshot).mockResolvedValue({ + sessionOverrides: {}, + settings: { codingCli: { enabledProviders: ['codex'], providers: {} } }, + }) + + const provider = makeProvider(files, { + name: 'codex', + parseSessionFile: codexProvider.parseSessionFile, + }) + + const indexer = new CodingCliSessionIndexer([provider], { fullScanIntervalMs: 0 }) + await indexer.refresh() + + const ideSession = indexer.getProjects() + .flatMap((group) => group.sessions) + .find((session) => session.sessionId === ideSessionId) + + expect(ideSession?.title).toBe('Fix the authentication bug in the login form') +}) +``` + +- [ ] **Step 4: Run the targeted red tests** + +Run: `npm run test:vitest -- test/unit/server/coding-cli/session-indexer.test.ts` + +Expected: FAIL because `readLightweightMeta()` only recognizes flat `role/content` records and does not apply the same IDE-context or system-context handling as the full Codex parser. + +### Task 7: Teach the lightweight scan enough of the Codex message shape to recover titles safely + +**Files:** +- Modify: `server/coding-cli/session-indexer.ts` +- Test: `test/unit/server/coding-cli/session-indexer.test.ts` + +- [ ] **Step 1: Implement bounded lightweight title extraction for nested message payloads** + +```ts +const nestedMessagePayload = + obj?.type === 'response_item' && obj?.payload?.type === 'message' + ? obj.payload + : undefined + +const rawContent = + nestedMessagePayload?.content ?? + obj?.message?.content ?? + obj?.content + +const isUser = + nestedMessagePayload?.role === 'user' || + obj?.role === 'user' || + obj?.type === 'user' || + obj?.message?.role === 'user' + +const rawText = typeof rawContent === 'string' + ? rawContent + : Array.isArray(rawContent) + ? rawContent + .filter((part: any) => typeof part?.text === 'string') + .map((part: any) => part.text) + .join('\n') + : undefined + +if (isUser) { + const ideRequest = rawText ? extractFromIdeContext(rawText) : undefined + const candidate = ideRequest || (!isSystemContext(rawText ?? '') ? rawText?.replace(/<\/?image[^>]*>/g, '').trim() : '') + if (!title && candidate) { + title = extractTitleFromMessage(candidate, 200) + } +} +``` + +Implementation notes: +- Import the same helpers the full Codex parser uses: `extractTitleFromMessage`, `extractFromIdeContext`, and `isSystemContext`. +- Preserve the current flat-record behavior (`obj.content`, `obj.message?.content`, `obj.role === 'user'`) while adding nested `response_item.payload` support. +- Keep the logic inside the existing head-only scan. Do not call the full provider parser from the lightweight path. + +- [ ] **Step 2: Run the indexer unit suite to verify green** + +Run: `npm run test:vitest -- test/unit/server/coding-cli/session-indexer.test.ts` + +Expected: PASS + +- [ ] **Step 3: Commit the lightweight Codex parsing improvement** + +```bash +git add server/coding-cli/session-indexer.ts test/unit/server/coding-cli/session-indexer.test.ts +git commit -m "fix: improve lightweight codex session titles" +``` + +## Chunk 4: Final Verification And Merge Readiness + +### Task 8: Run the focused pack, then the coordinated full suite + +**Files:** +- Verify only: + - `server/session-metadata-store.ts` + - `test/unit/server/session-metadata-store.test.ts` + - `test/integration/server/session-metadata-api.test.ts` + - `server/coding-cli/session-indexer.ts` + - `test/unit/server/coding-cli/session-indexer.test.ts` + - `src/store/selectors/sidebarSelectors.ts` + - `test/unit/client/store/selectors/sidebarSelectors.test.ts` + - `test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts` + - `test/e2e/open-tab-session-sidebar-visibility.test.tsx` + +- [ ] **Step 1: Run the focused client/e2e regression pack** + +Run: `npm run test:vitest -- test/unit/client/store/selectors/sidebarSelectors.test.ts test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts test/e2e/open-tab-session-sidebar-visibility.test.tsx` + +Expected: PASS under the default JSDOM/client Vitest config. + +- [ ] **Step 2: Run the focused server/integration regression pack** + +Run: `npm run test:vitest -- --config vitest.server.config.ts test/unit/server/session-metadata-store.test.ts test/unit/server/coding-cli/session-indexer.test.ts test/integration/server/session-metadata-api.test.ts` + +Expected: PASS under the server/node Vitest config. + +- [ ] **Step 3: Check the coordinated test gate before the broad run** + +Run: `npm run test:status` + +Expected: no conflicting holder, or a clear signal to wait for the shared coordinator before starting `npm test`. + +- [ ] **Step 4: Run the full coordinated suite** + +Run: `FRESHELL_TEST_SUMMARY="session sidebar title hardening" npm test` + +Expected: PASS across the coordinated default and server configs. + +- [ ] **Step 5: Review the final diff for scope** + +Run: `git diff --stat "$(git merge-base main HEAD)"..HEAD` + +Expected: the diff covers the full branch and only includes the planned server, selector, and regression-test files. If anything extra appears, inspect it before continuing. + +- [ ] **Step 6: If the full suite forced any cleanup, make the minimal fix, rerun the focused packs and the coordinated suite, then create a final commit** + +```bash +npm run test:vitest -- test/unit/client/store/selectors/sidebarSelectors.test.ts test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts test/e2e/open-tab-session-sidebar-visibility.test.tsx +npm run test:vitest -- --config vitest.server.config.ts test/unit/server/session-metadata-store.test.ts test/unit/server/coding-cli/session-indexer.test.ts test/integration/server/session-metadata-api.test.ts +FRESHELL_TEST_SUMMARY="session sidebar title hardening" npm test +git add server/session-metadata-store.ts server/coding-cli/session-indexer.ts src/store/selectors/sidebarSelectors.ts test/unit/server/session-metadata-store.test.ts test/integration/server/session-metadata-api.test.ts test/unit/server/coding-cli/session-indexer.test.ts test/unit/client/store/selectors/sidebarSelectors.test.ts test/unit/client/store/selectors/sidebarSelectors.visibility.test.ts test/e2e/open-tab-session-sidebar-visibility.test.tsx +git commit -m "fix: harden session sidebar title recovery" +``` diff --git a/server/agent-api/router.ts b/server/agent-api/router.ts index edd2ae48e..fbd02ec3f 100644 --- a/server/agent-api/router.ts +++ b/server/agent-api/router.ts @@ -2,6 +2,7 @@ import { Router } from 'express' import fs from 'node:fs/promises' import { randomUUID } from 'node:crypto' import { nanoid } from 'nanoid' +import { allocateLocalhostPort, type OpencodeServerEndpoint } from '../local-port.js' import { makeSessionKey } from '../coding-cli/types.js' import { MAX_TERMINAL_TITLE_OVERRIDE_LENGTH } from '../terminals-router.js' import { ok, approx, fail } from './response.js' @@ -32,6 +33,24 @@ async function resolveProviderSettings( } } +async function resolveSpawnProviderSettings( + mode: string, + configStore: any, + overrides: { permissionMode?: string; model?: string; sandbox?: string }, +): Promise<{ + permissionMode?: string + model?: string + sandbox?: string + opencodeServer?: OpencodeServerEndpoint +} | undefined> { + const providerSettings = await resolveProviderSettings(mode, configStore, overrides) + if (mode !== 'opencode') return providerSettings + return { + ...(providerSettings ?? {}), + opencodeServer: await allocateLocalhostPort(), + } +} + type ResizeLayoutStore = { getSplitSizes?: (tabId: string | undefined, splitId: string) => [number, number] | undefined resolveTarget?: (target: string) => { tabId?: string; paneId?: string; message?: string } @@ -224,7 +243,7 @@ export function createAgentApiRouter({ paneContent = { kind: 'editor', filePath: editor, language: null, readOnly: false, content: '', viewMode: 'source' } } else { const effectiveMode = mode || 'shell' - const providerSettings = await resolveProviderSettings(effectiveMode, configStore, { permissionMode, model, sandbox }) + const providerSettings = await resolveSpawnProviderSettings(effectiveMode, configStore, { permissionMode, model, sandbox }) const terminal = registry.create({ mode: effectiveMode, shell, @@ -551,7 +570,7 @@ export function createAgentApiRouter({ const created = layoutStore.createTab?.({ title }) const tabId = created?.tabId || nanoid() const paneId = created?.paneId || nanoid() - const providerSettings = await resolveProviderSettings(mode, configStore, {}) + const providerSettings = await resolveSpawnProviderSettings(mode, configStore, {}) const terminal = registry.create({ mode, shell, cwd, providerSettings, envContext: { tabId, paneId } }) layoutStore.attachPaneContent?.(tabId, paneId, { kind: 'terminal', terminalId: terminal.terminalId }) wsHandler?.broadcastUiCommand({ @@ -613,7 +632,7 @@ export function createAgentApiRouter({ content = { kind: 'editor', filePath: req.body.editor, language: null, readOnly: false, content: '', viewMode: 'source' } } else { const splitMode = req.body?.mode || 'shell' - const splitProviderSettings = await resolveProviderSettings(splitMode, configStore, {}) + const splitProviderSettings = await resolveSpawnProviderSettings(splitMode, configStore, {}) const terminal = registry.create({ mode: splitMode, shell: req.body?.shell, @@ -800,7 +819,7 @@ export function createAgentApiRouter({ const tabId = target?.tabId if (!tabId) return res.status(404).json(fail('pane not found')) const effectiveMode = req.body?.mode || 'shell' - const providerSettings = await resolveProviderSettings(effectiveMode, configStore, {}) + const providerSettings = await resolveSpawnProviderSettings(effectiveMode, configStore, {}) const terminal = registry.create({ mode: effectiveMode, shell: req.body?.shell, cwd: req.body?.cwd, providerSettings, envContext: { tabId, paneId } }) const content = { kind: 'terminal', terminalId: terminal.terminalId, status: 'running', mode: req.body?.mode || 'shell', shell: req.body?.shell || 'system', createRequestId: nanoid() } layoutStore.attachPaneContent(tabId, paneId, content) diff --git a/server/coding-cli/opencode-activity-tracker.ts b/server/coding-cli/opencode-activity-tracker.ts new file mode 100644 index 000000000..a51364299 --- /dev/null +++ b/server/coding-cli/opencode-activity-tracker.ts @@ -0,0 +1,488 @@ +import { EventEmitter } from 'events' +import { z } from 'zod' +import type { OpencodeServerEndpoint } from '../local-port.js' +import { logger } from '../logger.js' + +export const OPENCODE_HEALTH_POLL_MS = 200 +// Applies per health-wait cycle; connection failures restart the cycle after backoff. +export const OPENCODE_HEALTH_TIMEOUT_MS = 15_000 +export const OPENCODE_RECONNECT_BASE_MS = 250 +export const OPENCODE_RECONNECT_MAX_MS = 5_000 + +export type OpencodeActivityPhase = 'busy' + +export type OpencodeActivityRecord = { + terminalId: string + sessionId?: string + phase: OpencodeActivityPhase + updatedAt: number +} + +export type OpencodeActivityChange = { + upsert: OpencodeActivityRecord[] + remove: string[] +} + +const SessionIdleStatusSchema = z.object({ + type: z.literal('idle'), +}).passthrough() + +const SessionBusyStatusSchema = z.object({ + type: z.literal('busy'), +}).passthrough() + +const SessionRetryStatusSchema = z.object({ + type: z.literal('retry'), +}).passthrough() + +const SessionStatusSchema = z.discriminatedUnion('type', [ + SessionIdleStatusSchema, + SessionBusyStatusSchema, + SessionRetryStatusSchema, +]) + +const SessionStatusMapSchema = z.record(z.string(), SessionStatusSchema) + +const ServerConnectedEventSchema = z.object({ + type: z.literal('server.connected'), +}).passthrough() + +const SessionStatusEventSchema = z.object({ + type: z.literal('session.status'), + properties: z.object({ + sessionID: z.string().min(1), + status: SessionStatusSchema, + }).passthrough(), +}).passthrough() + +const SessionIdleEventSchema = z.object({ + type: z.literal('session.idle'), + properties: z.object({ + sessionID: z.string().min(1), + }).passthrough(), +}).passthrough() + +const OpencodeEventSchema = z.discriminatedUnion('type', [ + ServerConnectedEventSchema, + SessionStatusEventSchema, + SessionIdleEventSchema, +]) + +const OpencodeEventTypeSchema = z.object({ + type: z.string().min(1), +}).passthrough() + +const KNOWN_OPENCODE_EVENT_TYPES = new Set['type']>([ + 'server.connected', + 'session.status', + 'session.idle', +]) + +type FetchLike = typeof fetch + +type TrackerLogger = { + warn: (payload: object, message?: string) => void +} + +type MonitorState = { + terminalId: string + endpoint: OpencodeServerEndpoint + disposed: boolean + controller?: AbortController + reconnectDelayMs: number + reconnectTimer?: ReturnType + reconnectResolve?: () => void +} + +function createAbortError(): Error { + const error = new Error('The operation was aborted.') + error.name = 'AbortError' + return error +} + +function isAbortError(error: unknown): boolean { + return error instanceof Error && error.name === 'AbortError' +} + +function createAbortPromise(signal: AbortSignal): Promise { + return new Promise((_, reject) => { + if (signal.aborted) { + reject(createAbortError()) + return + } + signal.addEventListener('abort', () => reject(createAbortError()), { once: true }) + }) +} + +function parseSseData(block: string): string | undefined { + const dataLines: string[] = [] + for (const line of block.split('\n')) { + if (!line || line.startsWith(':')) continue + if (!line.startsWith('data:')) continue + dataLines.push(line.slice(5).trimStart()) + } + return dataLines.length > 0 ? dataLines.join('\n') : undefined +} + +function parseOpencodeEvent(data: string): z.infer | undefined { + let parsedJson: unknown + try { + parsedJson = JSON.parse(data) + } catch { + throw new Error('OpenCode event payload was not valid JSON.') + } + + const parsedType = OpencodeEventTypeSchema.safeParse(parsedJson) + if (!parsedType.success) { + throw new Error('OpenCode event payload did not include a string type.') + } + if (!KNOWN_OPENCODE_EVENT_TYPES.has(parsedType.data.type as z.infer['type'])) { + return undefined + } + + const parsedEvent = OpencodeEventSchema.safeParse(parsedJson) + if (!parsedEvent.success) { + throw new Error('OpenCode event payload did not match the expected schema.') + } + + return parsedEvent.data +} + +function extractBusySessionId( + snapshot: Record>, + currentSessionId?: string, +): string | undefined { + const busySessionIds = Object.entries(snapshot) + .filter(([, status]) => status.type !== 'idle') + .map(([sessionId]) => sessionId) + .sort() + if (busySessionIds.length === 0) return undefined + if (currentSessionId && busySessionIds.includes(currentSessionId)) { + return currentSessionId + } + return busySessionIds[0] +} + +export class OpencodeActivityTracker extends EventEmitter { + private readonly records = new Map() + private readonly monitors = new Map() + private readonly fetchImpl: FetchLike + private readonly log: TrackerLogger + private readonly now: () => number + private readonly setTimeoutFn: typeof setTimeout + private readonly clearTimeoutFn: typeof clearTimeout + private readonly random: () => number + + constructor(input: { + fetchImpl?: FetchLike + log?: TrackerLogger + now?: () => number + setTimeoutFn?: typeof setTimeout + clearTimeoutFn?: typeof clearTimeout + random?: () => number + } = {}) { + super() + this.fetchImpl = input.fetchImpl ?? fetch + this.log = input.log ?? logger.child({ component: 'opencode-activity-tracker' }) + this.now = input.now ?? (() => Date.now()) + this.setTimeoutFn = input.setTimeoutFn ?? setTimeout + this.clearTimeoutFn = input.clearTimeoutFn ?? clearTimeout + this.random = input.random ?? Math.random + } + + list(): OpencodeActivityRecord[] { + return Array.from(this.records.values()) + } + + getActivity(terminalId: string): OpencodeActivityRecord | undefined { + return this.records.get(terminalId) + } + + trackTerminal(input: { terminalId: string; endpoint: OpencodeServerEndpoint }): void { + const existing = this.monitors.get(input.terminalId) + if ( + existing + && existing.endpoint.hostname === input.endpoint.hostname + && existing.endpoint.port === input.endpoint.port + && !existing.disposed + ) { + return + } + + this.untrackTerminal({ terminalId: input.terminalId }) + + const monitor: MonitorState = { + terminalId: input.terminalId, + endpoint: input.endpoint, + disposed: false, + reconnectDelayMs: OPENCODE_RECONNECT_BASE_MS, + } + this.monitors.set(input.terminalId, monitor) + void this.runMonitor(monitor) + } + + untrackTerminal(input: { terminalId: string }): void { + const monitor = this.monitors.get(input.terminalId) + if (monitor) { + monitor.disposed = true + monitor.controller?.abort() + if (monitor.reconnectTimer) { + this.clearTimeoutFn(monitor.reconnectTimer) + monitor.reconnectTimer = undefined + } + monitor.reconnectResolve?.() + monitor.reconnectResolve = undefined + this.monitors.delete(input.terminalId) + } + this.removeRecord(input.terminalId) + } + + dispose(): void { + for (const terminalId of Array.from(this.monitors.keys())) { + this.untrackTerminal({ terminalId }) + } + } + + private async runMonitor(monitor: MonitorState): Promise { + while (!monitor.disposed) { + const controller = new AbortController() + monitor.controller = controller + try { + await this.waitForHealth(monitor, controller.signal) + await this.refreshSnapshot(monitor, controller.signal) + monitor.reconnectDelayMs = OPENCODE_RECONNECT_BASE_MS + await this.consumeEvents(monitor, controller.signal) + } catch (error) { + if (monitor.disposed || isAbortError(error)) { + return + } + this.log.warn({ + terminalId: monitor.terminalId, + endpoint: monitor.endpoint, + err: error, + }, 'OpenCode activity tracker cycle failed; retrying.') + } finally { + if (monitor.controller === controller) { + monitor.controller = undefined + } + } + + if (monitor.disposed) return + await this.sleepWithBackoff(monitor) + } + } + + private async waitForHealth(monitor: MonitorState, signal: AbortSignal): Promise { + const startedAt = this.now() + while (true) { + try { + const response = await this.fetchImpl(this.buildUrl(monitor.endpoint, '/global/health'), { + signal, + }) + if (response.ok) { + return + } + } catch (error) { + if (isAbortError(error)) { + throw error + } + } + if (this.now() - startedAt >= OPENCODE_HEALTH_TIMEOUT_MS) { + throw new Error('Timed out waiting for OpenCode health endpoint.') + } + await this.sleep(signal, OPENCODE_HEALTH_POLL_MS) + } + } + + private async refreshSnapshot(monitor: MonitorState, signal: AbortSignal): Promise { + const response = await this.fetchImpl(this.buildUrl(monitor.endpoint, '/session/status'), { + signal, + }) + if (!response.ok) { + throw new Error(`OpenCode session status request failed with ${response.status}.`) + } + + const parsed = SessionStatusMapSchema.safeParse(await response.json()) + if (!parsed.success) { + throw new Error('OpenCode session status response did not match the expected schema.') + } + + const current = this.records.get(monitor.terminalId) + const busySessionId = extractBusySessionId(parsed.data, current?.sessionId) + if (!busySessionId) { + this.removeRecord(monitor.terminalId) + return + } + + this.upsertRecord({ + terminalId: monitor.terminalId, + sessionId: busySessionId, + phase: 'busy', + updatedAt: this.now(), + }) + } + + private async consumeEvents(monitor: MonitorState, signal: AbortSignal): Promise { + const response = await this.fetchImpl(this.buildUrl(monitor.endpoint, '/event'), { + signal, + headers: { accept: 'text/event-stream' }, + }) + if (!response.ok || !response.body) { + throw new Error(`OpenCode event stream request failed with ${response.status}.`) + } + + const reader = response.body.getReader() + const decoder = new TextDecoder() + const abortPromise = createAbortPromise(signal) + let buffer = '' + + try { + while (true) { + const result = await Promise.race([ + reader.read(), + abortPromise, + ]) + + if (result.done) { + return + } + + buffer += decoder.decode(result.value, { stream: true }) + .replace(/\r\n/g, '\n') + .replace(/\r/g, '\n') + + let separatorIndex = buffer.indexOf('\n\n') + while (separatorIndex >= 0) { + const block = buffer.slice(0, separatorIndex) + buffer = buffer.slice(separatorIndex + 2) + this.handleSseBlock(monitor.terminalId, block) + separatorIndex = buffer.indexOf('\n\n') + } + } + } finally { + try { + await reader.cancel() + } catch { + // ignore cancellation errors during teardown + } + } + } + + private handleSseBlock(terminalId: string, block: string): void { + const data = parseSseData(block) + if (!data) return + + let event: z.infer | undefined + try { + event = parseOpencodeEvent(data) + } catch (error) { + const endpoint = this.monitors.get(terminalId)?.endpoint + this.log.warn({ + terminalId, + endpoint, + err: error, + }, 'OpenCode event payload was invalid; skipping payload.') + return + } + + if (!event) return + if (event.type === 'server.connected') return + if (event.type === 'session.idle') { + this.removeRecordForSession(terminalId, event.properties.sessionID) + return + } + if (event.properties.status.type === 'idle') { + this.removeRecordForSession(terminalId, event.properties.sessionID) + return + } + + this.upsertRecord({ + terminalId, + sessionId: event.properties.sessionID, + phase: 'busy', + updatedAt: this.now(), + }) + } + + private async sleepWithBackoff(monitor: MonitorState): Promise { + const baseDelay = monitor.reconnectDelayMs + const jitter = Math.floor(baseDelay * 0.1 * this.random()) + const delayMs = Math.min(OPENCODE_RECONNECT_MAX_MS, baseDelay + jitter) + monitor.reconnectDelayMs = Math.min(OPENCODE_RECONNECT_MAX_MS, baseDelay * 2) + await new Promise((resolve) => { + monitor.reconnectResolve = resolve + monitor.reconnectTimer = this.setTimeoutFn(() => { + monitor.reconnectTimer = undefined + monitor.reconnectResolve = undefined + resolve() + }, delayMs) + }) + } + + private async sleep(signal: AbortSignal, delayMs: number): Promise { + if (signal.aborted) { + throw createAbortError() + } + + let timer: ReturnType | undefined + const onAbort = () => { + if (timer) { + this.clearTimeoutFn(timer) + } + } + + try { + await Promise.race([ + new Promise((resolve) => { + timer = this.setTimeoutFn(() => { + timer = undefined + resolve() + }, delayMs) + signal.addEventListener('abort', onAbort, { once: true }) + }), + createAbortPromise(signal), + ]) + } finally { + signal.removeEventListener('abort', onAbort) + if (timer) { + this.clearTimeoutFn(timer) + } + } + } + + private buildUrl(endpoint: OpencodeServerEndpoint, pathname: string): string { + return `http://${endpoint.hostname}:${endpoint.port}${pathname}` + } + + private removeRecordForSession(terminalId: string, sessionId: string): void { + const existing = this.records.get(terminalId) + if (!existing) return + if (existing.sessionId && existing.sessionId !== sessionId) return + this.removeRecord(terminalId) + } + + private upsertRecord(record: OpencodeActivityRecord): void { + const previous = this.records.get(record.terminalId) + if ( + previous + && previous.sessionId === record.sessionId + && previous.phase === record.phase + && previous.updatedAt === record.updatedAt + ) { + return + } + this.records.set(record.terminalId, record) + this.emit('changed', { + upsert: [record], + remove: [], + } satisfies OpencodeActivityChange) + } + + private removeRecord(terminalId: string): void { + if (!this.records.delete(terminalId)) return + this.emit('changed', { + upsert: [], + remove: [terminalId], + } satisfies OpencodeActivityChange) + } +} diff --git a/server/coding-cli/opencode-activity-wiring.ts b/server/coding-cli/opencode-activity-wiring.ts new file mode 100644 index 000000000..51dd5605a --- /dev/null +++ b/server/coding-cli/opencode-activity-wiring.ts @@ -0,0 +1,67 @@ +import { OpencodeActivityTracker } from './opencode-activity-tracker.js' +import type { OpencodeServerEndpoint } from '../local-port.js' +import type { TerminalRecord } from '../terminal-registry.js' + +type OpencodeActivityRegistry = { + list: () => Array<{ terminalId: string }> + get: (terminalId: string) => TerminalRecord | undefined + on: (event: string, handler: (...args: any[]) => void) => void + off: (event: string, handler: (...args: any[]) => void) => void +} + +function getEndpoint(record: TerminalRecord): OpencodeServerEndpoint | undefined { + return record.mode === 'opencode' ? record.opencodeServer : undefined +} + +export function wireOpencodeActivityTracker(input: { + registry: OpencodeActivityRegistry + fetchImpl?: typeof fetch + now?: () => number + setTimeoutFn?: typeof setTimeout + clearTimeoutFn?: typeof clearTimeout + random?: () => number +}) { + const tracker = new OpencodeActivityTracker({ + fetchImpl: input.fetchImpl, + now: input.now, + setTimeoutFn: input.setTimeoutFn, + clearTimeoutFn: input.clearTimeoutFn, + random: input.random, + }) + + const startTracking = (record: TerminalRecord) => { + const endpoint = getEndpoint(record) + if (!endpoint || record.status !== 'running') return + tracker.trackTerminal({ + terminalId: record.terminalId, + endpoint, + }) + } + + const onCreated = (record: TerminalRecord) => { + startTracking(record) + } + + const onExit = (event: { terminalId?: string }) => { + if (!event.terminalId) return + tracker.untrackTerminal({ terminalId: event.terminalId }) + } + + input.registry.on('terminal.created', onCreated) + input.registry.on('terminal.exit', onExit) + + for (const listed of input.registry.list()) { + const record = input.registry.get(listed.terminalId) + if (!record) continue + startTracking(record) + } + + return { + tracker, + dispose(): void { + input.registry.off('terminal.created', onCreated) + input.registry.off('terminal.exit', onExit) + tracker.dispose() + }, + } +} diff --git a/server/coding-cli/session-indexer.ts b/server/coding-cli/session-indexer.ts index e1032454d..72f676cf8 100644 --- a/server/coding-cli/session-indexer.ts +++ b/server/coding-cli/session-indexer.ts @@ -6,10 +6,11 @@ import chokidar from 'chokidar' import { logger } from '../logger.js' import { getPerfConfig, startPerfTimer } from '../perf-logger.js' import { configStore, SessionOverride } from '../config-store.js' +import { extractTitleFromMessage } from '../title-utils.js' import type { CodingCliProvider } from './provider.js' import { makeSessionKey, type CodingCliSession, type CodingCliProviderName, type ProjectGroup } from './types.js' import { sanitizeCodexTaskEventsForTruncatedSnippet } from './providers/codex.js' -import { resolveGitCheckoutRoot, resolveGitRepoRoot } from './utils.js' +import { extractFromIdeContext, isSystemContext, resolveGitCheckoutRoot, resolveGitRepoRoot } from './utils.js' import { diffProjects } from '../sessions-sync/diff.js' import type { SessionMetadataStore, SessionMetadataEntry } from '../session-metadata-store.js' @@ -47,6 +48,19 @@ function maxDefined(a: number | undefined, b: number | undefined): number | unde return Math.max(a, b) } +function normalizeTitle(title: string | undefined): string | undefined { + const trimmed = title?.trim() + return trimmed ? trimmed : undefined +} + +function resolveSessionTitle( + parsedTitle: string | undefined, + previousTitle: string | undefined, + storedTitle: string | undefined, +): string | undefined { + return normalizeTitle(parsedTitle) || normalizeTitle(previousTitle) || normalizeTitle(storedTitle) +} + // Byte pattern for a text user message (content is a string, not a tool_result array). const USER_TEXT_PATTERN = Buffer.from('"role":"user","content":"') @@ -247,15 +261,35 @@ async function readLightweightMeta(filePath: string): Promise typeof b?.text === 'string').map((b: any) => b.text).join(' ') + const rawText = typeof rawContent === 'string' + ? rawContent + : Array.isArray(rawContent) + ? rawContent + .filter((part: any) => typeof part?.text === 'string') + .map((part: any) => part.text) + .join('\n') : undefined - if (typeof text === 'string' && text.trim()) title = text.trim().slice(0, 200) + + const ideRequest = rawText ? extractFromIdeContext(rawText) : undefined + const candidate = ideRequest + || (!isSystemContext(rawText ?? '') ? rawText?.replace(/<\/?image[^>]*>/g, '').trim() : '') + if (candidate) { + title = extractTitleFromMessage(candidate, 200) + } } } if (sessionId && cwd && title && createdAt) break @@ -718,7 +752,12 @@ export class CodingCliSessionIndexer { } } - private async updateCacheEntry(provider: CodingCliProvider, filePath: string, cacheKey: string) { + private async updateCacheEntry( + provider: CodingCliProvider, + filePath: string, + cacheKey: string, + sessionMetadata: Record, + ) { let stat: Stats try { stat = await fsp.stat(filePath) @@ -772,6 +811,10 @@ export class CodingCliSessionIndexer { const sessionId = meta.sessionId || provider.extractSessionId(filePath, meta) const previous = cached?.lightweight ? undefined : cached?.baseSession const sameSession = previous?.provider === provider.name && previous?.sessionId === sessionId + const metaKey = makeSessionKey(provider.name, sessionId) + const storedTitle = normalizeTitle(sessionMetadata[metaKey]?.derivedTitle) + const parsedTitle = normalizeTitle(meta.title) + const resolvedTitle = resolveSessionTitle(parsedTitle, sameSession ? previous?.title : undefined, storedTitle) const appendOnlyReparse = sameSession && size >= (cached?.size ?? 0) const createdAt = appendOnlyReparse ? minDefined(previous?.createdAt, meta.createdAt) @@ -793,7 +836,7 @@ export class CodingCliSessionIndexer { lastActivityAt, createdAt, messageCount: meta.messageCount, - title: meta.title, + title: resolvedTitle, summary: meta.summary, ...(meta.firstUserMessage ? { firstUserMessage: meta.firstUserMessage } : {}), cwd: meta.cwd, @@ -806,6 +849,10 @@ export class CodingCliSessionIndexer { codexTaskEvents: meta.codexTaskEvents, } + if (this.sessionMetadataStore && parsedTitle && parsedTitle !== storedTitle) { + await this.sessionMetadataStore.set(provider.name, sessionId, { derivedTitle: parsedTitle }) + } + this.fileCache.set(cacheKey, { provider: provider.name, mtimeMs, @@ -1035,6 +1082,9 @@ export class CodingCliSessionIndexer { if (existing && existing.baseSession) continue const sessionId = meta.sessionId || provider.extractSessionId(meta.filePath) + const metaKey = makeSessionKey(provider.name, sessionId) + const storedTitle = normalizeTitle(sessionMetadata[metaKey]?.derivedTitle) + const resolvedTitle = resolveSessionTitle(meta.title, existing?.baseSession?.title, storedTitle) const projectPath = meta.cwd ? await resolveGitRepoRoot(meta.cwd) : meta.cwd const baseSession: CodingCliSession = { provider: provider.name, @@ -1042,7 +1092,7 @@ export class CodingCliSessionIndexer { projectPath, lastActivityAt: meta.lastActivityAt || meta.mtimeMs, createdAt: meta.createdAt, - title: meta.title, + title: resolvedTitle, cwd: meta.cwd, sourceFile: meta.filePath, isSubagent: isSubagentSession(meta.filePath) || undefined, @@ -1081,6 +1131,7 @@ export class CodingCliSessionIndexer { filesByProvider: Map, enabledSet: Set, seenCacheKeys: Set, + sessionMetadata: Record, ): Promise { // Collect all file-based entries for enrichment. Files the lightweight scan couldn't // parse (e.g. Codex with 14KB first lines) use file mtime as the recency estimate. @@ -1128,7 +1179,7 @@ export class CodingCliSessionIndexer { if (INDEXER_DELAY_MS > 0) { await new Promise((r) => setTimeout(r, INDEXER_DELAY_MS)) } - await this.updateCacheEntry(provider, filePath, cacheKey) + await this.updateCacheEntry(provider, filePath, cacheKey, sessionMetadata) enriched += 1 if (enriched % REFRESH_YIELD_EVERY === 0) { await yieldToEventLoop() @@ -1202,13 +1253,13 @@ export class CodingCliSessionIndexer { }) fileCount += files.length - if (isColdStart) { - // Selective enrichment: only the most recent non-subagent sessions. - await this.enrichRecentSessions( - new Map([[provider, files]]), enabledSet, seenCacheKeys, - ) - for (const f of files) seenCacheKeys.add(normalizeFilePath(f)) - } else { + if (isColdStart) { + // Selective enrichment: only the most recent non-subagent sessions. + await this.enrichRecentSessions( + new Map([[provider, files]]), enabledSet, seenCacheKeys, sessionMetadata, + ) + for (const f of files) seenCacheKeys.add(normalizeFilePath(f)) + } else { // Warm rescan: process all files (cache hits skip unchanged files). for (const file of files) { processedEntries += 1 @@ -1217,7 +1268,7 @@ export class CodingCliSessionIndexer { } const cacheKey = normalizeFilePath(file) seenCacheKeys.add(cacheKey) - await this.updateCacheEntry(provider, file, cacheKey) + await this.updateCacheEntry(provider, file, cacheKey, sessionMetadata) } } } @@ -1274,7 +1325,7 @@ export class CodingCliSessionIndexer { this.deleteCacheEntry(file) continue } - await this.updateCacheEntry(provider, file, file) + await this.updateCacheEntry(provider, file, file, sessionMetadata) } } diff --git a/server/index.ts b/server/index.ts index a51609274..79f2a4c22 100644 --- a/server/index.ts +++ b/server/index.ts @@ -20,6 +20,7 @@ import { SessionsSyncService } from './sessions-sync/service.js' import { CodingCliSessionIndexer } from './coding-cli/session-indexer.js' import { CodingCliSessionManager } from './coding-cli/session-manager.js' import { wireCodexActivityTracker } from './coding-cli/codex-activity-wiring.js' +import { wireOpencodeActivityTracker } from './coding-cli/opencode-activity-wiring.js' import { claudeProvider } from './coding-cli/providers/claude.js' import { codexProvider } from './coding-cli/providers/codex.js' import { opencodeProvider } from './coding-cli/providers/opencode.js' @@ -185,6 +186,7 @@ async function main() { const terminalMetadata = new TerminalMetadataService() const layoutStore = new LayoutStore() const codexActivity = wireCodexActivityTracker({ registry, codingCliIndexer }) + const opencodeActivity = wireOpencodeActivityTracker({ registry }) const sessionRepairService = getSessionRepairService({ skipDiscovery: true }) const serverInstanceId = await loadOrCreateServerInstanceId() @@ -287,29 +289,32 @@ async function main() { const wsHandler = new WsHandler( server, registry, - codingCliSessionManager, - sdkBridge, - sessionRepairService, - async () => { - const currentSettings = migrateSettingsSortMode(await configStore.getSettings()) - const readError = configStore.getLastReadError() - const configFallback = readError - ? { reason: readError, backupExists: await configStore.backupExists() } - : undefined - return { - settings: currentSettings, - projects: codingCliIndexer.getProjects(), - perfLogging: perfConfig.enabled, - configFallback, - } + { + codingCliManager: codingCliSessionManager, + sdkBridge, + sessionRepairService, + handshakeSnapshotProvider: async () => { + const currentSettings = migrateSettingsSortMode(await configStore.getSettings()) + const readError = configStore.getLastReadError() + const configFallback = readError + ? { reason: readError, backupExists: await configStore.backupExists() } + : undefined + return { + settings: currentSettings, + projects: codingCliIndexer.getProjects(), + perfLogging: perfConfig.enabled, + configFallback, + } + }, + terminalMetaListProvider: () => terminalMetadata.list(), + tabsRegistryStore, + serverInstanceId, + layoutStore, + extensionManager, + codexActivityListProvider: () => codexActivity.tracker.list(), + agentHistorySource, + opencodeActivityListProvider: () => opencodeActivity.tracker.list(), }, - () => terminalMetadata.list(), - tabsRegistryStore, - serverInstanceId, - layoutStore, - extensionManager, - () => codexActivity.tracker.list(), - agentHistorySource, ) attachProxyUpgradeHandler(server) const port = Number(process.env.PORT || 3001) @@ -348,6 +353,9 @@ async function main() { codexActivity.tracker.on('changed', (payload) => { wsHandler.broadcastCodexActivityUpdated(payload) }) + opencodeActivity.tracker.on('changed', (payload) => { + wsHandler.broadcastOpencodeActivityUpdated(payload) + }) const broadcastTerminalMetaUpserts = (upsert: ReturnType) => { if (upsert.length === 0) return @@ -783,6 +791,7 @@ async function main() { // 8b. Stop Codex activity tracker listeners and sweep timer codexActivity.dispose() + opencodeActivity.dispose() // 9. Stop session repair service await sessionRepairService.stop() diff --git a/server/local-port.ts b/server/local-port.ts new file mode 100644 index 000000000..81859955d --- /dev/null +++ b/server/local-port.ts @@ -0,0 +1,39 @@ +import net from 'node:net' + +export type OpencodeServerEndpoint = { + hostname: '127.0.0.1' + port: number +} + +// This is a best-effort ephemeral port reservation. There is an unavoidable race +// between closing this probe socket and the child process binding the same port, +// so callers must still be prepared to retry startup if OpenCode loses the bind. +export async function allocateLocalhostPort(): Promise { + return new Promise((resolve, reject) => { + const server = net.createServer() + + const onError = (error: Error) => { + server.close(() => reject(error)) + } + + server.once('error', onError) + server.listen(0, '127.0.0.1', () => { + const address = server.address() + if (!address || typeof address === 'string') { + server.close(() => reject(new Error('Failed to allocate a localhost control port for OpenCode.'))) + return + } + + server.close((closeError) => { + if (closeError) { + reject(closeError) + return + } + resolve({ + hostname: '127.0.0.1', + port: address.port, + }) + }) + }) + }) +} diff --git a/server/mcp/freshell-tool.ts b/server/mcp/freshell-tool.ts index c94a6c794..8c25c16d5 100644 --- a/server/mcp/freshell-tool.ts +++ b/server/mcp/freshell-tool.ts @@ -239,14 +239,14 @@ const ACTION_PARAMS: Record 'list-tabs': { required: [], optional: [] }, 'select-tab': { required: ['target'], optional: [] }, 'kill-tab': { required: ['target'], optional: [] }, - 'rename-tab': { required: ['target', 'name'], optional: [] }, + 'rename-tab': { required: ['name'], optional: ['target'] }, 'has-tab': { required: ['target'], optional: [] }, 'next-tab': { required: [], optional: [] }, 'prev-tab': { required: [], optional: [] }, 'split-pane': { required: [], optional: ['target', 'direction', 'mode', 'shell', 'cwd', 'browser', 'editor'] }, 'list-panes': { required: [], optional: ['target'] }, 'select-pane': { required: ['target'], optional: [] }, - 'rename-pane': { required: ['target', 'name'], optional: [] }, + 'rename-pane': { required: ['name'], optional: ['target'] }, 'kill-pane': { required: ['target'], optional: [] }, 'resize-pane': { required: ['target'], optional: ['x', 'y', 'sizes'] }, 'swap-pane': { required: ['target', 'with'], optional: [] }, @@ -314,7 +314,8 @@ Tab commands: list-tabs List all tabs. Returns { tabs: [...], activeTabId }. select-tab Activate a tab. Params: target (tab ID or title) kill-tab Close a tab. Params: target - rename-tab Rename a tab. Params: target, name + rename-tab Rename a tab. Params: name, target? + Omit target to rename the caller tab (or active tab as fallback). has-tab Check if a tab exists. Params: target next-tab Switch to the next tab. prev-tab Switch to the previous tab. @@ -325,7 +326,8 @@ Pane commands: list-panes List panes. Params: target? (tab ID or title to filter by). Returns { panes: [...] }. select-pane Activate a pane. Params: target (pane ID or index) kill-pane Close a pane. Params: target - rename-pane Rename a pane. Params: target, name + rename-pane Rename a pane. Params: name, target? + Omit target to rename the caller pane (or the tab's active pane as fallback). resize-pane Resize a pane. Params: target, x? (1-99), y? (1-99) swap-pane Swap two panes. Params: target, with (other pane ID) respawn-pane Restart a pane's terminal. Params: target, mode?, shell?, cwd? @@ -409,6 +411,18 @@ Meta: // Or split an existing pane freshell({ action: "split-pane", params: { editor: "/absolute/path/to/file.ts" } }) +## Playbook: create, split, and rename without manual UI interaction + + seed = freshell({ action: "new-tab", params: { name: "Triager", mode: "codex", cwd: "/path/to/repo" } }) + tabId = seed.data.tabId + p0 = seed.data.paneId + p1 = freshell({ action: "split-pane", params: { target: p0, editor: "/path/to/repo/README.md" } }).data.paneId + + freshell({ action: "rename-tab", params: { target: tabId, name: "Issue 166 work" } }) + freshell({ action: "rename-pane", params: { target: p0, name: "Codex" } }) + freshell({ action: "select-pane", params: { target: p1 } }) + freshell({ action: "rename-pane", params: { name: "Editor" } }) + ## Playbook: open a URL in a browser pane // Open a URL in a new browser tab (correct way) @@ -515,10 +529,12 @@ async function routeAction( return c.delete(`/api/tabs/${encodeURIComponent(tab.id)}`) } case 'rename-tab': { - const target = requireParam(params, 'target') const name = requireParam(params, 'name') + const target = typeof params?.target === 'string' && params.target.trim().length > 0 + ? params.target + : undefined const { tab } = await resolveTabTarget(target) - if (!tab) return { error: `Tab '${target}' not found`, hint: "Run action 'list-tabs' to see available tabs." } + if (!tab) return { error: target ? `Tab '${target}' not found` : 'No active tab found', hint: "Run action 'list-tabs' to see available tabs." } return c.patch(`/api/tabs/${encodeURIComponent(tab.id)}`, { name }) } case 'has-tab': { @@ -561,9 +577,13 @@ async function routeAction( return c.post(`/api/panes/${encodeURIComponent(target)}/select`, {}) } case 'rename-pane': { - const target = requireParam(params, 'target') const name = requireParam(params, 'name') - return c.patch(`/api/panes/${encodeURIComponent(target)}`, { name }) + const target = typeof params?.target === 'string' && params.target.trim().length > 0 + ? params.target + : undefined + const { pane } = await resolvePaneTarget(target) + if (!pane) return { error: target ? `Pane '${target}' not found` : 'No active pane found', hint: "Run action 'list-panes' to see available panes." } + return c.patch(`/api/panes/${encodeURIComponent(pane.id)}`, { name }) } case 'kill-pane': { const target = requireParam(params, 'target') diff --git a/server/sdk-bridge.ts b/server/sdk-bridge.ts index c8414a4f1..8a72d4b76 100644 --- a/server/sdk-bridge.ts +++ b/server/sdk-bridge.ts @@ -1,5 +1,3 @@ -import fs from 'fs' -import path from 'path' import { nanoid } from 'nanoid' import { EventEmitter } from 'events' import { @@ -14,6 +12,7 @@ import { } from '@anthropic-ai/claude-agent-sdk' import type { PermissionResult, PermissionUpdate } from '@anthropic-ai/claude-agent-sdk' import { buildMcpServerCommandArgs } from './mcp/config-writer.js' +import { sanitizeAgentChatPluginPaths } from '../shared/agent-chat-plugins.js' import { formatModelDisplayName } from '../shared/format-model-name.js' import { logger } from './logger.js' import { synthesizeLiveMessageId } from './agent-timeline/ledger.js' @@ -28,11 +27,6 @@ import type { QuestionDefinition, } from './sdk-bridge-types.js' -/** Default plugin candidates resolved from cwd. Checked at session creation time. */ -const DEFAULT_PLUGIN_CANDIDATES = [ - path.join(process.cwd(), '.claude', 'plugins', 'freshell-orchestration'), -] - const log = logger.child({ component: 'sdk-bridge' }) interface InputStreamHandle { @@ -175,18 +169,16 @@ export class SdkBridge extends EventEmitter { return this.handlePermissionRequest(sessionId, toolName, input as Record, ctx) }, settingSources: ['user', 'project', 'local'], - // Explicit plugins override defaults; omit entirely when no defaults exist - // to avoid suppressing SDK's own plugin discovery with an empty array. - // Resolve defaults at session creation time (not module load) so new/removed - // plugins are picked up without a server restart. + // Explicit plugins remain supported for non-Freshell Claude SDK bundles. + // Freshell orchestration itself is provided by the MCP server above. ...((() => { if (options.plugins !== undefined) { - return { plugins: options.plugins.map(p => ({ type: 'local' as const, path: p })) } + return { + plugins: sanitizeAgentChatPluginPaths(options.plugins) + .map(p => ({ type: 'local' as const, path: p })), + } } - const defaults = DEFAULT_PLUGIN_CANDIDATES.filter(p => fs.existsSync(p)) - return defaults.length > 0 - ? { plugins: defaults.map(p => ({ type: 'local' as const, path: p })) } - : {} + return {} })()), }, }) diff --git a/server/session-history-loader.ts b/server/session-history-loader.ts index e0ba02313..3ae4d5354 100644 --- a/server/session-history-loader.ts +++ b/server/session-history-loader.ts @@ -45,6 +45,12 @@ export function extractChatMessagesFromJsonl(content: string): ChatMessage[] { return undefined } + /** Check if content blocks contain only tool_use and tool_result blocks. */ + const isToolOnly = (blocks: ContentBlock[]): boolean => + blocks.length > 0 && blocks.every( + (b) => b.type === 'tool_use' || b.type === 'tool_result' + ) + for (const line of lines) { let obj: any try { @@ -60,11 +66,15 @@ export function extractChatMessagesFromJsonl(content: string): ChatMessage[] { const timestamp = obj.timestamp as string | undefined const msg = obj.message + let newContent: ContentBlock[] + let newMessage: ChatMessage + if (typeof msg === 'string') { // Simple/legacy format: message is a plain string - const nextMessage: ChatMessage = { + newContent = [{ type: 'text', text: msg }] + newMessage = { role, - content: [{ type: 'text', text: msg }], + content: newContent, ...(timestamp ? { timestamp } : {}), ...(pickOptionalString(obj.model) ? { model: pickOptionalString(obj.model) } : {}), ...(pickOptionalString(obj.parentId, obj.parent_id) ? { parentId: pickOptionalString(obj.parentId, obj.parent_id) } : {}), @@ -73,18 +83,12 @@ export function extractChatMessagesFromJsonl(content: string): ChatMessage[] { messageId: pickOptionalString(obj.id, obj.messageId, obj.message_id), } : {}), } - if (!nextMessage.messageId) { - const fingerprint = createDurableMessageFingerprint(nextMessage) - const occurrence = fingerprintOccurrences.get(fingerprint) ?? 0 - fingerprintOccurrences.set(fingerprint, occurrence + 1) - nextMessage.messageId = synthesizeDeterministicMessageId(nextMessage, occurrence) - } - messages.push(nextMessage) } else if (msg && typeof msg === 'object' && Array.isArray(msg.content)) { // Structured format: message is a ClaudeMessage object - const nextMessage: ChatMessage = { + newContent = msg.content as ContentBlock[] + newMessage = { role: msg.role || role, - content: msg.content as ContentBlock[], + content: newContent, ...(timestamp ? { timestamp } : {}), ...(msg.model ? { model: msg.model } : {}), ...(pickOptionalString(msg.parentId, msg.parent_id, obj.parentId, obj.parent_id) ? { @@ -95,13 +99,30 @@ export function extractChatMessagesFromJsonl(content: string): ChatMessage[] { } : {}), ...(typeof msg.id === 'string' && msg.id.trim().length > 0 ? { messageId: msg.id } : {}), } - if (!nextMessage.messageId) { - const fingerprint = createDurableMessageFingerprint(nextMessage) - const occurrence = fingerprintOccurrences.get(fingerprint) ?? 0 - fingerprintOccurrences.set(fingerprint, occurrence + 1) - nextMessage.messageId = synthesizeDeterministicMessageId(nextMessage, occurrence) - } - messages.push(nextMessage) + } else { + continue + } + + // Generate deterministic messageId if not present + if (!newMessage.messageId) { + const fingerprint = createDurableMessageFingerprint(newMessage) + const occurrence = fingerprintOccurrences.get(fingerprint) ?? 0 + fingerprintOccurrences.set(fingerprint, occurrence + 1) + newMessage.messageId = synthesizeDeterministicMessageId(newMessage, occurrence) + } + + // Coalesce consecutive tool-only assistant messages + const prevMessage = messages[messages.length - 1] + if ( + prevMessage?.role === 'assistant' && + newMessage.role === 'assistant' && + isToolOnly(prevMessage.content) && + isToolOnly(newMessage.content) + ) { + // Append content blocks to previous message + prevMessage.content = [...prevMessage.content, ...newMessage.content] + } else { + messages.push(newMessage) } } diff --git a/server/session-metadata-store.ts b/server/session-metadata-store.ts index a97b083ee..25e88b2ac 100644 --- a/server/session-metadata-store.ts +++ b/server/session-metadata-store.ts @@ -4,6 +4,7 @@ import { logger } from './logger.js' export interface SessionMetadataEntry { sessionType?: string + derivedTitle?: string } interface MetadataFile { @@ -126,7 +127,10 @@ export class SessionMetadataStore { if (!sessions[provider]) { sessions[provider] = safeRecord() } - sessions[provider][sessionId] = { ...entry } + sessions[provider][sessionId] = { + ...(sessions[provider][sessionId] ?? {}), + ...entry, + } await this.save({ version: 1, sessions }) }) } diff --git a/server/terminal-registry.ts b/server/terminal-registry.ts index 5e7394ff2..f110de704 100644 --- a/server/terminal-registry.ts +++ b/server/terminal-registry.ts @@ -11,6 +11,7 @@ import { getPerfConfig, logPerfEvent, shouldLog, startPerfTimer } from './perf-l import type { ServerSettings } from '../shared/settings.js' import { convertWindowsPathToWslPath, isReachableDirectorySync } from './path-utils.js' import { isValidClaudeSessionId } from './claude-session-id.js' +import type { OpencodeServerEndpoint } from './local-port.js' import { makeSessionKey, parseSessionKey, type CodingCliProviderName } from './coding-cli/types.js' import { SessionBindingAuthority, type BindResult } from './session-binding-authority.js' import type { @@ -165,10 +166,11 @@ function providerNotificationArgs( return { args: mcpInjection.args, env: mcpInjection.env } } -type ProviderSettings = { +export type ProviderSettings = { permissionMode?: string model?: string sandbox?: string + opencodeServer?: OpencodeServerEndpoint } function resolveCodingCliCommand( @@ -199,6 +201,24 @@ function resolveCodingCliCommand( } } const settingsArgs: string[] = [] + if (mode === 'opencode') { + const endpoint = providerSettings?.opencodeServer + if ( + !endpoint + || endpoint.hostname !== '127.0.0.1' + || !Number.isInteger(endpoint.port) + || endpoint.port <= 0 + || endpoint.port > 65535 + ) { + throw new Error('OpenCode launch requires an allocated localhost control endpoint.') + } + settingsArgs.push( + '--hostname', + endpoint.hostname, + '--port', + String(endpoint.port), + ) + } const effectiveModel = mode === 'opencode' ? resolveOpencodeLaunchModel(providerSettings?.model, { ...process.env, ...commandEnv }) : providerSettings?.model @@ -323,6 +343,7 @@ export type TerminalRecord = { title: string description?: string mode: TerminalMode + opencodeServer?: OpencodeServerEndpoint resumeSessionId?: string pendingResumeName?: string createdAt: number @@ -1137,6 +1158,7 @@ export class TerminalRegistry extends EventEmitter { title, description: undefined, mode: opts.mode, + opencodeServer: opts.mode === 'opencode' ? opts.providerSettings?.opencodeServer : undefined, resumeSessionId: undefined, createdAt, lastActivityAt: createdAt, diff --git a/server/ws-handler.ts b/server/ws-handler.ts index 4147838af..d6f1be6df 100644 --- a/server/ws-handler.ts +++ b/server/ws-handler.ts @@ -16,8 +16,14 @@ import type { SessionScanResult, SessionRepairResult } from './session-scanner/t import { isValidClaudeSessionId } from './claude-session-id.js' import type { SdkBridge } from './sdk-bridge.js' import { createAgentHistorySource, type AgentHistorySource } from './agent-timeline/history-source.js' -import type { CodexActivityRecord, SdkServerMessage, SdkSessionStatus } from '../shared/ws-protocol.js' +import type { + CodexActivityRecord, + OpencodeActivityRecord, + SdkServerMessage, + SdkSessionStatus, +} from '../shared/ws-protocol.js' import type { ExtensionManager } from './extension-manager.js' +import { allocateLocalhostPort } from './local-port.js' import { TerminalStreamBroker } from './terminal-stream/broker.js' import { buildSidebarOpenSessionKeys, type SidebarSessionLocator } from './sidebar-session-selection.js' import { loadSessionHistory } from './session-history-loader.js' @@ -34,6 +40,9 @@ import { CodexActivityListResponseSchema, CodexActivityListSchema, CodexActivityUpdatedSchema, + OpencodeActivityListResponseSchema, + OpencodeActivityListSchema, + OpencodeActivityUpdatedSchema, HelloSchema, PingSchema, TerminalAttachSchema, @@ -73,6 +82,21 @@ type WsHandlerConfig = { terminalCreateRateWindowMs: number } +export type WsHandlerOptions = { + codingCliManager?: CodingCliSessionManager + sdkBridge?: SdkBridge + sessionRepairService?: SessionRepairService + handshakeSnapshotProvider?: HandshakeSnapshotProvider + terminalMetaListProvider?: () => TerminalMeta[] + tabsRegistryStore?: TabsRegistryStore + serverInstanceId?: string + layoutStore?: LayoutStore + extensionManager?: ExtensionManager + codexActivityListProvider?: () => CodexActivityRecord[] + agentHistorySource?: AgentHistorySource + opencodeActivityListProvider?: () => OpencodeActivityRecord[] +} + function readWsHandlerConfig(): WsHandlerConfig { // Use ?? so only unset vars fall back; preserves explicit MAX_REGULAR_WS_MESSAGE_BYTES values. const regularWsMessageBytesEnv = process.env.MAX_REGULAR_WS_MESSAGE_BYTES ?? process.env.DEFAULT_WS_MESSAGE_BYTES @@ -321,6 +345,9 @@ function createScreenshotError(code: ScreenshotErrorCode, message: string): Erro export class WsHandler { private readonly config: WsHandlerConfig private readonly authToken: string + private readonly registry: TerminalRegistry + private readonly codingCliManager?: CodingCliSessionManager + private readonly sdkBridge?: SdkBridge private wss: WebSocketServer private connections = new Set() private clientStates = new Map() @@ -330,6 +357,7 @@ export class WsHandler { private handshakeSnapshotProvider?: HandshakeSnapshotProvider private terminalMetaListProvider?: () => TerminalMeta[] private codexActivityListProvider?: () => CodexActivityRecord[] + private opencodeActivityListProvider?: () => OpencodeActivityRecord[] private tabsRegistryStore?: TabsRegistryStore private layoutStore?: LayoutStore private extensionManager?: ExtensionManager @@ -358,44 +386,39 @@ export class WsHandler { constructor( server: http.Server, - private registry: TerminalRegistry, - private codingCliManager?: CodingCliSessionManager, - private sdkBridge?: SdkBridge, - sessionRepairService?: SessionRepairService, - handshakeSnapshotProvider?: HandshakeSnapshotProvider, - terminalMetaListProvider?: () => TerminalMeta[], - tabsRegistryStore?: TabsRegistryStore, - serverInstanceId?: string, - layoutStore?: LayoutStore, - extensionManager?: ExtensionManager, - codexActivityListProvider?: () => CodexActivityRecord[], - agentHistorySource?: AgentHistorySource, + registry: TerminalRegistry, + options: WsHandlerOptions = {}, ) { this.config = readWsHandlerConfig() this.authToken = getRequiredAuthToken() - this.sessionRepairService = sessionRepairService - this.handshakeSnapshotProvider = handshakeSnapshotProvider - this.terminalMetaListProvider = terminalMetaListProvider - this.codexActivityListProvider = codexActivityListProvider - this.tabsRegistryStore = tabsRegistryStore - this.layoutStore = layoutStore - this.extensionManager = extensionManager - this.agentHistorySource = agentHistorySource ?? (this.sdkBridge + this.registry = registry + this.codingCliManager = options.codingCliManager + this.sdkBridge = options.sdkBridge + this.sessionRepairService = options.sessionRepairService + this.handshakeSnapshotProvider = options.handshakeSnapshotProvider + this.terminalMetaListProvider = options.terminalMetaListProvider + this.codexActivityListProvider = options.codexActivityListProvider + this.opencodeActivityListProvider = options.opencodeActivityListProvider + this.tabsRegistryStore = options.tabsRegistryStore + this.layoutStore = options.layoutStore + this.extensionManager = options.extensionManager + this.agentHistorySource = options.agentHistorySource ?? (this.sdkBridge ? createAgentHistorySource({ loadSessionHistory, getLiveSessionBySdkSessionId: (sdkSessionId) => this.sdkBridge?.getLiveSession(sdkSessionId), getLiveSessionByCliSessionId: (timelineSessionId) => this.sdkBridge?.findLiveSessionByCliSessionId(timelineSessionId), }) : undefined) - this.serverInstanceId = serverInstanceId && serverInstanceId.trim().length > 0 - ? serverInstanceId + this.serverInstanceId = options.serverInstanceId && options.serverInstanceId.trim().length > 0 + ? options.serverInstanceId : `srv-${randomUUID()}` this.bootId = `boot-${randomUUID()}` this.terminalStreamBroker = new TerminalStreamBroker(this.registry) // Build the set of valid CLI provider/mode names from extensions + const extensionManager = this.extensionManager const canEnumerateCliExtensions = typeof extensionManager?.getAll === 'function' - const extensionModes = canEnumerateCliExtensions + const extensionModes = canEnumerateCliExtensions && extensionManager ? extensionManager.getAll() .filter(e => e.manifest.category === 'cli') .map(e => e.manifest.name) @@ -454,6 +477,7 @@ export class WsHandler { TerminalResizeSchema, TerminalKillSchema, CodexActivityListSchema, + OpencodeActivityListSchema, TabsSyncPushSchema, TabsSyncQuerySchema, dynamicCodingCliCreateSchema, @@ -1600,19 +1624,24 @@ export class WsHandler { effectiveResumeSessionId, }, '[TRACE resumeSessionId] about to create terminal') + const spawnProviderSettings = providerSettings + ? { + permissionMode: providerSettings.permissionMode, + model: providerSettings.model, + sandbox: providerSettings.sandbox, + ...(m.mode === 'opencode' + ? { opencodeServer: await allocateLocalhostPort() } + : {}), + } + : undefined + const record = this.registry.create({ mode: m.mode as TerminalMode, shell: m.shell as 'system' | 'cmd' | 'powershell' | 'wsl', cwd: m.cwd, resumeSessionId: effectiveResumeSessionId, envContext: { tabId: m.tabId, paneId: m.paneId }, - providerSettings: providerSettings - ? { - permissionMode: providerSettings.permissionMode, - model: providerSettings.model, - sandbox: providerSettings.sandbox, - } - : undefined, + providerSettings: spawnProviderSettings, }) if (m.mode !== 'shell' && typeof m.cwd === 'string' && m.cwd.trim()) { @@ -1762,6 +1791,26 @@ export class WsHandler { return } + case 'opencode.activity.list': { + const terminals = this.opencodeActivityListProvider ? this.opencodeActivityListProvider() : [] + const response = OpencodeActivityListResponseSchema.safeParse({ + type: 'opencode.activity.list.response', + requestId: m.requestId, + terminals, + }) + if (!response.success) { + log.warn({ issues: response.error.issues }, 'Invalid opencode.activity.list.response payload') + this.sendError(ws, { + code: 'INTERNAL_ERROR', + message: 'OpenCode activity unavailable', + requestId: m.requestId, + }) + return + } + this.send(ws, response.data) + return + } + case 'tabs.sync.push': { if (!this.tabsRegistryStore) { this.sendError(ws, { @@ -2497,6 +2546,21 @@ export class WsHandler { this.broadcastAuthenticated(parsed.data) } + broadcastOpencodeActivityUpdated(msg: { upsert?: OpencodeActivityRecord[]; remove?: string[] }): void { + const parsed = OpencodeActivityUpdatedSchema.safeParse({ + type: 'opencode.activity.updated', + upsert: msg.upsert || [], + remove: msg.remove || [], + }) + + if (!parsed.success) { + log.warn({ issues: parsed.error.issues }, 'Invalid opencode.activity.updated payload') + return + } + + this.broadcastAuthenticated(parsed.data) + } + /** * Prepare for hot rebind: close all client connections and set the closed * flag so the patched server.close() → this.close() is a no-op. diff --git a/shared/agent-chat-plugins.ts b/shared/agent-chat-plugins.ts new file mode 100644 index 000000000..fc06302de --- /dev/null +++ b/shared/agent-chat-plugins.ts @@ -0,0 +1,18 @@ +function normalizePluginPath(value: string): string { + return value.replace(/\\/g, '/').replace(/\/+/g, '/').trim() +} + +export function isLegacyFreshellOrchestrationPluginPath(value: string): boolean { + const normalized = normalizePluginPath(value) + return normalized === '.claude/plugins/freshell-orchestration' + || normalized.endsWith('/.claude/plugins/freshell-orchestration') +} + +export function sanitizeAgentChatPluginPaths(paths: readonly string[] | undefined): string[] { + if (!paths) return [] + return paths + .filter((value): value is string => typeof value === 'string') + .map(value => value.trim()) + .filter(value => value.length > 0) + .filter(value => !isLegacyFreshellOrchestrationPluginPath(value)) +} diff --git a/shared/settings.ts b/shared/settings.ts index cb231fc27..450aaa8c4 100644 --- a/shared/settings.ts +++ b/shared/settings.ts @@ -1,5 +1,6 @@ import { z } from 'zod' +import { sanitizeAgentChatPluginPaths } from './agent-chat-plugins.js' import { DEFAULT_ENABLED_CLI_PROVIDERS } from './coding-cli-defaults.js' import { normalizeTrimmedStringList } from './string-list.js' @@ -60,6 +61,11 @@ const SIDEBAR_LOCAL_KEYS = [ 'width', 'collapsed', ] as const +const AGENT_CHAT_LOCAL_KEYS = [ + 'showThinking', + 'showTools', + 'showTimecodes', +] as const export type ThemeMode = (typeof THEME_VALUES)[number] export type TerminalTheme = (typeof TERMINAL_THEME_VALUES)[number] @@ -180,6 +186,11 @@ export type LocalSettings = { width: number collapsed: boolean } + agentChat: { + showThinking: boolean + showTools: boolean + showTimecodes: boolean + } notifications: { soundEnabled: boolean } @@ -201,7 +212,7 @@ export type ResolvedSettings = { codingCli: ServerSettings['codingCli'] panes: ServerSettings['panes'] & LocalSettings['panes'] editor: ServerSettings['editor'] - agentChat: ServerSettings['agentChat'] + agentChat: ServerSettings['agentChat'] & LocalSettings['agentChat'] extensions: ServerSettings['extensions'] network: ServerSettings['network'] } @@ -465,6 +476,22 @@ function normalizeExtractedLocalSeed(patch: Record): LocalSetti } } + if (isRecord(patch.agentChat)) { + const agentChat: LocalSettingsPatch['agentChat'] = {} + if (typeof patch.agentChat.showThinking === 'boolean') { + agentChat.showThinking = patch.agentChat.showThinking as boolean + } + if (typeof patch.agentChat.showTools === 'boolean') { + agentChat.showTools = patch.agentChat.showTools as boolean + } + if (typeof patch.agentChat.showTimecodes === 'boolean') { + agentChat.showTimecodes = patch.agentChat.showTimecodes as boolean + } + if (Object.keys(agentChat).length > 0) { + normalized.agentChat = agentChat + } + } + if (isRecord(patch.notifications)) { const notifications: LocalSettingsPatch['notifications'] = {} if (typeof patch.notifications.soundEnabled === 'boolean') { @@ -695,6 +722,11 @@ export const defaultLocalSettings: LocalSettings = { width: 288, collapsed: false, }, + agentChat: { + showThinking: false, + showTools: false, + showTimecodes: false, + }, notifications: { soundEnabled: true, }, @@ -860,9 +892,7 @@ function sanitizeServerSettingsPatch(patch: ServerSettingsPatch): ServerSettings agentChat.initialSetupDone = candidate.agentChat.initialSetupDone } if (hasOwn(candidate.agentChat, 'defaultPlugins') && Array.isArray(candidate.agentChat.defaultPlugins)) { - agentChat.defaultPlugins = candidate.agentChat.defaultPlugins.filter( - (value): value is string => typeof value === 'string', - ) + agentChat.defaultPlugins = sanitizeAgentChatPluginPaths(candidate.agentChat.defaultPlugins) } if (isRecord(candidate.agentChat.providers)) { const providers: NonNullable['providers'] = {} @@ -953,7 +983,7 @@ export function mergeServerSettings(base: ServerSettings, patch: ServerSettingsP agentChat: { ...mergeDefined(base.agentChat, agentChatPatch), defaultPlugins: hasOwn(agentChatPatch, 'defaultPlugins') - ? normalizeTrimmedStringList(agentChatPatch?.defaultPlugins) + ? sanitizeAgentChatPluginPaths(agentChatPatch?.defaultPlugins) : base.agentChat.defaultPlugins, providers: mergeRecordOfObjects(base.agentChat.providers, agentChatPatch?.providers), }, @@ -978,6 +1008,7 @@ export function resolveLocalSettings(patch?: LocalSettingsPatch): LocalSettings sortMode: normalizeLocalSortMode(patch?.sidebar?.sortMode), worktreeGrouping: normalizeWorktreeGrouping(patch?.sidebar?.worktreeGrouping), }, + agentChat: mergeDefined(defaultLocalSettings.agentChat, patch?.agentChat), notifications: mergeDefined(defaultLocalSettings.notifications, patch?.notifications), } } @@ -1018,6 +1049,14 @@ export function mergeLocalSettings(base: LocalSettingsPatch | undefined, patch: next.sidebar = sidebar as LocalSettingsPatch['sidebar'] } + const agentChat = mergeDefined( + (base?.agentChat || {}) as Record, + patch.agentChat as Record | undefined, + ) + if (Object.keys(agentChat).length > 0) { + next.agentChat = agentChat as LocalSettingsPatch['agentChat'] + } + const notifications = mergeDefined( (base?.notifications || {}) as Record, patch.notifications as Record | undefined, @@ -1062,6 +1101,7 @@ export function composeResolvedSettings(server: ServerSettings, local: LocalSett ...server.agentChat, defaultPlugins: [...server.agentChat.defaultPlugins], providers: mergeRecordOfObjects(server.agentChat.providers), + ...local.agentChat, }, extensions: { disabled: [...server.extensions.disabled], @@ -1097,6 +1137,9 @@ export function extractLegacyLocalSettingsSeed( } maybeAssignNested(patch, 'sidebar', sidebarPatch) } + if (isRecord(raw.agentChat)) { + maybeAssignNested(patch, 'agentChat', pickKeys(raw.agentChat, AGENT_CHAT_LOCAL_KEYS)) + } if (isRecord(raw.notifications)) { maybeAssignNested(patch, 'notifications', pickKeys(raw.notifications, ['soundEnabled'])) } @@ -1140,5 +1183,14 @@ export function stripLocalSettings( } } + if (isRecord(raw.agentChat)) { + const strippedAgentChat = omitKeys(raw.agentChat, AGENT_CHAT_LOCAL_KEYS) + if (Object.keys(strippedAgentChat).length > 0) { + next.agentChat = strippedAgentChat + } else { + delete next.agentChat + } + } + return next } diff --git a/shared/ws-protocol.ts b/shared/ws-protocol.ts index 6636d4493..80f84d0b7 100644 --- a/shared/ws-protocol.ts +++ b/shared/ws-protocol.ts @@ -106,6 +106,27 @@ export const CodexActivityUpdatedSchema = z.object({ remove: z.array(z.string().min(1)), }) +export const OpencodeActivityRecordSchema = z.object({ + terminalId: z.string().min(1), + sessionId: z.string().optional(), + phase: z.literal('busy'), + updatedAt: z.number().int().nonnegative(), +}) + +export type OpencodeActivityRecord = z.infer + +export const OpencodeActivityListResponseSchema = z.object({ + type: z.literal('opencode.activity.list.response'), + requestId: z.string().min(1), + terminals: z.array(OpencodeActivityRecordSchema), +}) + +export const OpencodeActivityUpdatedSchema = z.object({ + type: z.literal('opencode.activity.updated'), + upsert: z.array(OpencodeActivityRecordSchema), + remove: z.array(z.string().min(1)), +}) + // ────────────────────────────────────────────────────────────── // SDK content block schemas (from Claude Code NDJSON) // ────────────────────────────────────────────────────────────── @@ -230,6 +251,11 @@ export const CodexActivityListSchema = z.object({ requestId: z.string().min(1), }) +export const OpencodeActivityListSchema = z.object({ + type: z.literal('opencode.activity.list'), + requestId: z.string().min(1), +}) + export const UiLayoutSyncSchema = z.object({ type: z.literal('ui.layout.sync'), tabs: z.array(z.object({ @@ -376,6 +402,7 @@ export const ClientMessageSchema = z.discriminatedUnion('type', [ TerminalResizeSchema, TerminalKillSchema, CodexActivityListSchema, + OpencodeActivityListSchema, UiLayoutSyncSchema, UiScreenshotResultSchema, CodingCliCreateSchema, @@ -492,6 +519,10 @@ export type CodexActivityListResponseMessage = z.infer +export type OpencodeActivityListResponseMessage = z.infer + +export type OpencodeActivityUpdatedMessage = z.infer + // -- Sessions -- export type SessionsChangedMessage = { @@ -717,6 +748,8 @@ export type ServerMessage = | TerminalInventoryMessage | CodexActivityListResponseMessage | CodexActivityUpdatedMessage + | OpencodeActivityListResponseMessage + | OpencodeActivityUpdatedMessage | SessionsChangedMessage | SettingsUpdatedMessage | UiCommandMessage diff --git a/src/App.tsx b/src/App.tsx index 17aa9c46f..379406fc3 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -58,6 +58,7 @@ import { setTerminalMetaSnapshot, upsertTerminalMeta, removeTerminalMeta } from import { clearDeadTerminals } from '@/store/panesSlice' import { addTerminalRestoreRequestId } from '@/lib/terminal-restore' import { setCodexActivitySnapshot, upsertCodexActivity, removeCodexActivity, resetCodexActivity } from '@/store/codexActivitySlice' +import { setOpencodeActivitySnapshot, upsertOpencodeActivity, removeOpencodeActivity, resetOpencodeActivity } from '@/store/opencodeActivitySlice' import { setRegistry, updateServerStatus } from '@/store/extensionsSlice' import { handleSdkMessage } from '@/lib/sdk-message-handler' import { createLogger } from '@/lib/client-logger' @@ -220,7 +221,9 @@ export default function App() { const mainContentRef = useRef(null) const userOpenedSidebarOnMobileRef = useRef(false) const codexActivityListRequestSeqRef = useRef(new Map()) + const opencodeActivityListRequestSeqRef = useRef(new Map()) const codexActivityOrderRef = useRef(0) + const opencodeActivityOrderRef = useRef(0) const copiedResetTimeoutRef = useRef | null>(null) const fullscreenTouchStartYRef = useRef(null) const isLandscapeTerminalView = isMobile && isLandscape && view === 'terminal' @@ -626,11 +629,26 @@ export default function App() { }) } + const requestOpencodeActivityList = () => { + const requestId = `opencode-activity-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` + const requestSeq = ++opencodeActivityOrderRef.current + opencodeActivityListRequestSeqRef.current.set(requestId, requestSeq) + ws.send({ + type: 'opencode.activity.list', + requestId, + }) + } + const resetCodexActivityOverlay = () => { codexActivityListRequestSeqRef.current.clear() dispatch(resetCodexActivity()) } + const resetOpencodeActivityOverlay = () => { + opencodeActivityListRequestSeqRef.current.clear() + dispatch(resetOpencodeActivity()) + } + const wsWithOptionalDisconnect = ws as typeof ws & { onDisconnect?: (handler: () => void) => (() => void) | void } @@ -638,6 +656,7 @@ export default function App() { stopWsDisconnectSync = wsWithOptionalDisconnect.onDisconnect?.(() => { if (cancelled) return resetCodexActivityOverlay() + resetOpencodeActivityOverlay() dispatch(setStatus('disconnected')) }) ?? null @@ -723,6 +742,7 @@ export default function App() { // If the initial connect attempt failed before ready, WsClient may still auto-reconnect. // Treat 'ready' as the source of truth for connection status. resetCodexActivityOverlay() + resetOpencodeActivityOverlay() dispatch(setError(undefined)) dispatch(setStatus('ready')) dispatch(setServerInstanceId(nextServerInstanceId)) @@ -739,6 +759,7 @@ export default function App() { // from this bootstrap cycle is still safe for enabling follow-up refreshes. promoteRecentHttpSessionsBaseline() requestCodexActivityList() + requestOpencodeActivityList() lastSessionsRevision = -1 void recoverMissingStartupState() } @@ -828,6 +849,35 @@ export default function App() { })) } } + if (msg.type === 'opencode.activity.list.response') { + const requestId = typeof msg.requestId === 'string' ? msg.requestId : '' + if (!requestId) return + const requestSeq = opencodeActivityListRequestSeqRef.current.get(requestId) + opencodeActivityListRequestSeqRef.current.delete(requestId) + if (requestSeq === undefined) return + dispatch(setOpencodeActivitySnapshot({ + terminals: msg.terminals || [], + requestSeq, + })) + } + if (msg.type === 'opencode.activity.updated') { + const mutationSeq = ++opencodeActivityOrderRef.current + const upsert = Array.isArray(msg.upsert) ? msg.upsert : [] + if (upsert.length > 0) { + dispatch(upsertOpencodeActivity({ + terminals: upsert, + mutationSeq, + })) + } + + const remove = Array.isArray(msg.remove) ? msg.remove : [] + if (remove.length > 0) { + dispatch(removeOpencodeActivity({ + terminalIds: remove, + mutationSeq, + })) + } + } if (msg.type === 'terminal.exit') { const terminalId = msg.terminalId const code = msg.exitCode @@ -903,6 +953,7 @@ export default function App() { } lastReadyServerInstanceId = ws.serverInstanceId resetCodexActivityOverlay() + resetOpencodeActivityOverlay() dispatch(setError(undefined)) dispatch(setStatus('ready')) dispatch(setServerInstanceId(ws.serverInstanceId)) @@ -912,6 +963,7 @@ export default function App() { if (!cancelled) { requestCodexActivityList() + requestOpencodeActivityList() } void recoverMissingStartupState() return @@ -925,6 +977,7 @@ export default function App() { } catch (err: any) { if (!cancelled) { resetCodexActivityOverlay() + resetOpencodeActivityOverlay() dispatch(setStatus('disconnected')) dispatch(setError(err?.message || 'WebSocket connection failed')) if (typeof err?.wsCloseCode === 'number') { diff --git a/src/components/MobileTabStrip.tsx b/src/components/MobileTabStrip.tsx index ef76e91a9..927266d44 100644 --- a/src/components/MobileTabStrip.tsx +++ b/src/components/MobileTabStrip.tsx @@ -8,6 +8,7 @@ import type { ChatSessionState } from '@/store/agentChatTypes' import type { PaneRuntimeActivityRecord } from '@/store/paneRuntimeActivitySlice' const EMPTY_CODEX_ACTIVITY_BY_ID = {} +const EMPTY_OPENCODE_ACTIVITY_BY_ID = {} const EMPTY_AGENT_CHAT_SESSIONS: Record = {} const EMPTY_PANE_RUNTIME_ACTIVITY_BY_ID: Record = {} @@ -24,6 +25,7 @@ export function MobileTabStrip({ onOpenSwitcher, sidebarCollapsed, onToggleSideb const paneLayouts = useAppSelector((s) => s.panes.layouts) const paneTitles = useAppSelector((s) => s.panes.paneTitles) const codexActivityByTerminalId = useAppSelector((s) => s.codexActivity?.byTerminalId ?? EMPTY_CODEX_ACTIVITY_BY_ID) + const opencodeActivityByTerminalId = useAppSelector((s) => s.opencodeActivity?.byTerminalId ?? EMPTY_OPENCODE_ACTIVITY_BY_ID) const agentChatSessions = useAppSelector((s) => s.agentChat?.sessions ?? EMPTY_AGENT_CHAT_SESSIONS) const paneRuntimeActivityByPaneId = useAppSelector( (s) => s.paneRuntimeActivity?.byPaneId ?? EMPTY_PANE_RUNTIME_ACTIVITY_BY_ID @@ -41,6 +43,7 @@ export function MobileTabStrip({ onOpenSwitcher, sidebarCollapsed, onToggleSideb tab: activeTab, paneLayouts, codexActivityByTerminalId, + opencodeActivityByTerminalId, paneRuntimeActivityByPaneId, agentChatSessions, }).length > 0 diff --git a/src/components/Sidebar.tsx b/src/components/Sidebar.tsx index 22078cec7..c79f43f7e 100644 --- a/src/components/Sidebar.tsx +++ b/src/components/Sidebar.tsx @@ -26,6 +26,7 @@ import type { PaneRuntimeActivityRecord } from '@/store/paneRuntimeActivitySlice const EMPTY_TERMINALS: BackgroundTerminal[] = [] const EMPTY_LAYOUTS: Record = {} const EMPTY_CODEX_ACTIVITY_BY_ID = {} +const EMPTY_OPENCODE_ACTIVITY_BY_ID = {} const EMPTY_AGENT_CHAT_SESSIONS: Record = {} const EMPTY_PANE_RUNTIME_ACTIVITY_BY_ID: Record = {} @@ -308,6 +309,7 @@ export default function Sidebar({ tabs: state.tabs.tabs, paneLayouts: state.panes?.layouts ?? EMPTY_LAYOUTS, codexActivityByTerminalId: state.codexActivity?.byTerminalId ?? EMPTY_CODEX_ACTIVITY_BY_ID, + opencodeActivityByTerminalId: state.opencodeActivity?.byTerminalId ?? EMPTY_OPENCODE_ACTIVITY_BY_ID, paneRuntimeActivityByPaneId: state.paneRuntimeActivity?.byPaneId ?? EMPTY_PANE_RUNTIME_ACTIVITY_BY_ID, agentChatSessions: state.agentChat?.sessions ?? EMPTY_AGENT_CHAT_SESSIONS, }), shallowEqual) diff --git a/src/components/TabBar.tsx b/src/components/TabBar.tsx index a90ac5739..cf7471d9f 100644 --- a/src/components/TabBar.tsx +++ b/src/components/TabBar.tsx @@ -130,6 +130,7 @@ const EMPTY_LAYOUTS: Record = {} const EMPTY_PANE_TITLES: Record> = {} const EMPTY_ATTENTION: Record = {} const EMPTY_CODEX_ACTIVITY_BY_ID = {} +const EMPTY_OPENCODE_ACTIVITY_BY_ID = {} const EMPTY_AGENT_CHAT_SESSIONS: Record = {} const EMPTY_PANE_RUNTIME_ACTIVITY_BY_ID: Record = {} @@ -151,6 +152,7 @@ export default function TabBar({ sidebarCollapsed, onToggleSidebar }: TabBarProp const attentionByTab = useAppSelector((s) => s.turnCompletion?.attentionByTab) ?? EMPTY_ATTENTION const attentionByPane = useAppSelector((s) => s.turnCompletion?.attentionByPane) ?? EMPTY_ATTENTION const codexActivityByTerminalId = useAppSelector((s) => s.codexActivity?.byTerminalId ?? EMPTY_CODEX_ACTIVITY_BY_ID) + const opencodeActivityByTerminalId = useAppSelector((s) => s.opencodeActivity?.byTerminalId ?? EMPTY_OPENCODE_ACTIVITY_BY_ID) const agentChatSessions = useAppSelector((s) => s.agentChat?.sessions ?? EMPTY_AGENT_CHAT_SESSIONS) const paneRuntimeActivityByPaneId = useAppSelector( (s) => s.paneRuntimeActivity?.byPaneId ?? EMPTY_PANE_RUNTIME_ACTIVITY_BY_ID @@ -208,9 +210,10 @@ export default function TabBar({ sidebarCollapsed, onToggleSidebar }: TabBarProp tab, paneLayouts: paneLayouts as Record, codexActivityByTerminalId, + opencodeActivityByTerminalId, paneRuntimeActivityByPaneId, agentChatSessions, - }), [agentChatSessions, codexActivityByTerminalId, paneLayouts, paneRuntimeActivityByPaneId]) + }), [agentChatSessions, codexActivityByTerminalId, opencodeActivityByTerminalId, paneLayouts, paneRuntimeActivityByPaneId]) const [renamingId, setRenamingId] = useState(null) const [renameValue, setRenameValue] = useState('') diff --git a/src/components/TabSwitcher.tsx b/src/components/TabSwitcher.tsx index 6dc8dfa44..525d50a8a 100644 --- a/src/components/TabSwitcher.tsx +++ b/src/components/TabSwitcher.tsx @@ -10,6 +10,7 @@ import type { ChatSessionState } from '@/store/agentChatTypes' import type { PaneRuntimeActivityRecord } from '@/store/paneRuntimeActivitySlice' const EMPTY_CODEX_ACTIVITY_BY_ID = {} +const EMPTY_OPENCODE_ACTIVITY_BY_ID = {} const EMPTY_AGENT_CHAT_SESSIONS: Record = {} const EMPTY_PANE_RUNTIME_ACTIVITY_BY_ID: Record = {} @@ -39,6 +40,7 @@ export function TabSwitcher({ onClose }: TabSwitcherProps) { const paneLayouts = useAppSelector((s) => s.panes.layouts) const paneTitles = useAppSelector((s) => s.panes.paneTitles) const codexActivityByTerminalId = useAppSelector((s) => s.codexActivity?.byTerminalId ?? EMPTY_CODEX_ACTIVITY_BY_ID) + const opencodeActivityByTerminalId = useAppSelector((s) => s.opencodeActivity?.byTerminalId ?? EMPTY_OPENCODE_ACTIVITY_BY_ID) const agentChatSessions = useAppSelector((s) => s.agentChat?.sessions ?? EMPTY_AGENT_CHAT_SESSIONS) const paneRuntimeActivityByPaneId = useAppSelector( (s) => s.paneRuntimeActivity?.byPaneId ?? EMPTY_PANE_RUNTIME_ACTIVITY_BY_ID @@ -95,6 +97,7 @@ export function TabSwitcher({ onClose }: TabSwitcherProps) { tab, paneLayouts, codexActivityByTerminalId, + opencodeActivityByTerminalId, paneRuntimeActivityByPaneId, agentChatSessions, }).length > 0 diff --git a/src/components/TerminalView.tsx b/src/components/TerminalView.tsx index fcdd6782f..f044c5f90 100644 --- a/src/components/TerminalView.tsx +++ b/src/components/TerminalView.tsx @@ -1359,22 +1359,6 @@ function TerminalView({ tabId, paneId, paneContent, hidden }: TerminalViewProps) } }) - // When the clipboard contains an image but no text, the browser paste event - // fires but xterm has nothing to write. CLI tools like Codex listen for the - // raw Ctrl+V byte (\x16) to trigger a native clipboard read. Send it so - // image paste works for CLIs running inside the terminal. - const xtermTextarea = term.textarea - const handleImagePaste = (e: ClipboardEvent) => { - const hasText = e.clipboardData?.types.includes('text/plain') - const hasImage = Array.from(e.clipboardData?.items ?? []).some( - (item) => item.kind === 'file' && item.type.startsWith('image/'), - ) - if (hasImage && !hasText) { - sendInput('\x16') - } - } - xtermTextarea?.addEventListener('paste', handleImagePaste) - term.attachCustomKeyEventHandler((event) => { if ( event.ctrlKey && @@ -1458,7 +1442,6 @@ function TerminalView({ tabId, paneId, paneContent, hidden }: TerminalViewProps) delete wrapperEl.dataset.hoveredUrl } ro.disconnect() - xtermTextarea?.removeEventListener('paste', handleImagePaste) unregisterActions() unregisterCaptureHandler() if (writeQueueRef.current === writeQueue) { diff --git a/src/components/agent-chat/AgentChatSettings.tsx b/src/components/agent-chat/AgentChatSettings.tsx index 232eb21b7..80c3ebf46 100644 --- a/src/components/agent-chat/AgentChatSettings.tsx +++ b/src/components/agent-chat/AgentChatSettings.tsx @@ -7,7 +7,11 @@ import type { AgentChatPaneContent } from '@/store/paneTypes' import type { AgentChatProviderConfig } from '@/lib/agent-chat-types' import { formatModelDisplayName } from '../../../shared/format-model-name' -type SettingsFields = Pick +type SettingsFields = Pick & { + showThinking?: boolean + showTools?: boolean + showTimecodes?: boolean +} interface AgentChatSettingsProps { model: string diff --git a/src/components/agent-chat/AgentChatView.tsx b/src/components/agent-chat/AgentChatView.tsx index dfa19ec73..e0b54cef1 100644 --- a/src/components/agent-chat/AgentChatView.tsx +++ b/src/components/agent-chat/AgentChatView.tsx @@ -30,6 +30,7 @@ import { getAgentChatProviderConfig } from '@/lib/agent-chat-utils' import { isValidClaudeSessionId } from '@/lib/claude-session-id' import { getInstalledPerfAuditBridge } from '@/lib/perf-audit-bridge' import { saveServerSettingsPatch } from '@/store/settingsThunks' +import { updateSettingsLocal } from '@/store/settingsSlice' import type { Tab } from '@/store/types' import { buildAgentChatPersistedIdentityUpdate, @@ -64,9 +65,10 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag const defaultModel = providerConfig?.defaultModel ?? 'claude-opus-4-6' const defaultPermissionMode = providerConfig?.defaultPermissionMode ?? 'bypassPermissions' const defaultEffort = providerConfig?.defaultEffort ?? 'high' - const defaultShowThinking = providerConfig?.defaultShowThinking ?? true - const defaultShowTools = providerConfig?.defaultShowTools ?? true - const defaultShowTimecodes = providerConfig?.defaultShowTimecodes ?? false + const localSettings = useAppSelector((state) => state.settings.settings) + const defaultShowThinking = localSettings.agentChat.showThinking + const defaultShowTools = localSettings.agentChat.showTools + const defaultShowTimecodes = localSettings.agentChat.showTimecodes const providerLabel = providerConfig?.label ?? 'Agent Chat' const createSentRef = useRef(false) const attachSentRef = useRef(false) @@ -543,11 +545,28 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag }, []) const handleSettingsChange = useCallback((changes: Record) => { - dispatch(updatePaneContent({ - tabId, - paneId, - content: { ...paneContentRef.current, ...changes }, - })) + const paneChanges: Partial = {} + const localChanges: Record = {} + + for (const [key, value] of Object.entries(changes)) { + if (key === 'showThinking' || key === 'showTools' || key === 'showTimecodes') { + localChanges[key] = value + } else { + (paneChanges as Record)[key] = value + } + } + + if (Object.keys(paneChanges).length > 0) { + dispatch(updatePaneContent({ + tabId, + paneId, + content: { ...paneContentRef.current, ...paneChanges }, + })) + } + + if (Object.keys(localChanges).length > 0) { + dispatch(updateSettingsLocal({ agentChat: localChanges })) + } const pc = paneContentRef.current @@ -703,9 +722,9 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag model={paneContent.model ?? defaultModel} permissionMode={paneContent.permissionMode ?? defaultPermissionMode} effort={paneContent.effort ?? defaultEffort} - showThinking={paneContent.showThinking ?? defaultShowThinking} - showTools={paneContent.showTools ?? defaultShowTools} - showTimecodes={paneContent.showTimecodes ?? defaultShowTimecodes} + showThinking={defaultShowThinking} + showTools={defaultShowTools} + showTimecodes={defaultShowTimecodes} sessionStarted={sessionStarted} defaultOpen={shouldShowSettings} modelOptions={availableModels.length > 0 ? availableModels : undefined} @@ -765,9 +784,9 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag content={turn.message.content} timestamp={turn.message.timestamp} model={turn.message.model} - showThinking={paneContent.showThinking ?? defaultShowThinking} - showTools={paneContent.showTools ?? defaultShowTools} - showTimecodes={paneContent.showTimecodes ?? defaultShowTimecodes} + showThinking={defaultShowThinking} + showTools={defaultShowTools} + showTimecodes={defaultShowTimecodes} /> ) } @@ -785,9 +804,9 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag turnId: item.turnId, })) }} - showThinking={paneContent.showThinking ?? defaultShowThinking} - showTools={paneContent.showTools ?? defaultShowTools} - showTimecodes={paneContent.showTimecodes ?? defaultShowTimecodes} + showThinking={defaultShowThinking} + showTools={defaultShowTools} + showTimecodes={defaultShowTimecodes} /> ) })} @@ -805,9 +824,9 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag key={`turn-${i}`} userMessage={item.user} assistantMessage={item.assistant} - showThinking={paneContent.showThinking ?? defaultShowThinking} - showTools={paneContent.showTools ?? defaultShowTools} - showTimecodes={paneContent.showTimecodes ?? defaultShowTimecodes} + showThinking={defaultShowThinking} + showTools={defaultShowTools} + showTimecodes={defaultShowTimecodes} /> ) } @@ -817,9 +836,9 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag speaker={item.user.role} content={item.user.content} timestamp={item.user.timestamp} - showThinking={paneContent.showThinking ?? defaultShowThinking} - showTools={paneContent.showTools ?? defaultShowTools} - showTimecodes={paneContent.showTimecodes ?? defaultShowTimecodes} + showThinking={defaultShowThinking} + showTools={defaultShowTools} + showTimecodes={defaultShowTimecodes} />