diff --git a/src/__tests__/openai-compat.test.ts b/src/__tests__/openai-compat.test.ts index 4766db9..8197988 100644 --- a/src/__tests__/openai-compat.test.ts +++ b/src/__tests__/openai-compat.test.ts @@ -12,6 +12,9 @@ import { buildToolPromptBlock, parseToolCallsFromText, serializeToolResults, + isToolsPerMessageModeEnabled, + noToolsSystemPrompt, + buildSessionSystemPrompt, } from '../openai-compat.js'; import { resolveEngineAndModel, getModelList } from '../models.js'; import type { OpenAIChatMessage } from '../openai-compat.js'; @@ -67,6 +70,16 @@ describe('resolveEngineAndModel', () => { // ─── resolveSessionKey ─────────────────────────────────────────────────────── describe('resolveSessionKey', () => { + let savedToolsPerMessage: string | undefined; + beforeEach(() => { + savedToolsPerMessage = process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + }); + afterEach(() => { + if (savedToolsPerMessage === undefined) delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + else process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = savedToolsPerMessage; + }); + it('prefers X-Session-Id header', () => { const key = resolveSessionKey({ messages: [], user: 'user-1' }, { 'x-session-id': 'my-session' }); expect(key).toBe('my-session'); @@ -159,6 +172,194 @@ describe('resolveSessionKey', () => { const key = resolveSessionKey({ model: 'claude-opus-4-6', messages: [{ role: 'user', content: 'hi' }] }, {}); expect(key).toMatch(/^sys-[0-9a-f]{12}$/); }); + + it('produces distinct keys when system prompt is identical but tools differ', () => { + const base = { + model: 'claude-sonnet-4-6', + messages: [ + { role: 'system' as const, content: 'SAME' }, + { role: 'user' as const, content: 'hi' }, + ], + }; + const withToolsA = resolveSessionKey( + { + ...base, + tools: [{ type: 'function', function: { name: 'foo', description: 'does foo', parameters: {} } }], + }, + {}, + ); + const withToolsB = resolveSessionKey( + { + ...base, + tools: [{ type: 'function', function: { name: 'bar', description: 'does bar', parameters: {} } }], + }, + {}, + ); + const withoutTools = resolveSessionKey(base, {}); + expect(withToolsA).not.toBe(withToolsB); + expect(withToolsA).not.toBe(withoutTools); + expect(withToolsB).not.toBe(withoutTools); + }); + + it('produces the same key for identical tool lists across calls', () => { + const body = { + model: 'claude-sonnet-4-6', + messages: [ + { role: 'system' as const, content: 'SAME' }, + { role: 'user' as const, content: 'hi' }, + ], + tools: [ + { type: 'function', function: { name: 'foo', description: 'does foo', parameters: {} } }, + { type: 'function', function: { name: 'bar', description: 'does bar', parameters: {} } }, + ], + }; + expect(resolveSessionKey(body, {})).toBe(resolveSessionKey(body, {})); + }); + + it('ignores tools in the key when OPENAI_COMPAT_TOOLS_PER_MESSAGE=1 (legacy opt-out)', () => { + process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = '1'; + const base = { + model: 'claude-sonnet-4-6', + messages: [ + { role: 'system' as const, content: 'SAME' }, + { role: 'user' as const, content: 'hi' }, + ], + }; + const withToolsA = resolveSessionKey( + { + ...base, + tools: [{ type: 'function', function: { name: 'foo', description: 'does foo', parameters: {} } }], + }, + {}, + ); + const withToolsB = resolveSessionKey( + { + ...base, + tools: [{ type: 'function', function: { name: 'bar', description: 'does bar', parameters: {} } }], + }, + {}, + ); + const withoutTools = resolveSessionKey(base, {}); + // With the opt-out enabled, the tool list is NOT part of the session key, + // so all three bodies collapse to the same key (the pre-fix behavior). + expect(withToolsA).toBe(withToolsB); + expect(withToolsA).toBe(withoutTools); + }); +}); + +// ─── isToolsPerMessageModeEnabled ──────────────────────────────────────────── + +describe('isToolsPerMessageModeEnabled', () => { + let saved: string | undefined; + beforeEach(() => { + saved = process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + }); + afterEach(() => { + if (saved === undefined) delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + else process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = saved; + }); + + it('defaults to false when the env var is unset', () => { + expect(isToolsPerMessageModeEnabled()).toBe(false); + }); + + it('returns true for "1", "true", "yes" (case-insensitive, trimmed)', () => { + for (const v of ['1', 'true', 'yes', 'TRUE', ' Yes ', 'YES']) { + process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = v; + expect(isToolsPerMessageModeEnabled()).toBe(true); + } + }); + + it('returns false for "0", "false", "no", unknown values', () => { + for (const v of ['0', 'false', 'no', 'off', 'maybe', '']) { + process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = v; + expect(isToolsPerMessageModeEnabled()).toBe(false); + } + }); +}); + +// ─── noToolsSystemPrompt ───────────────────────────────────────────────────── + +describe('noToolsSystemPrompt', () => { + it('references "block below" for system location', () => { + const prompt = noToolsSystemPrompt('system'); + expect(prompt).toContain('block below'); + expect(prompt).not.toContain('in the user message'); + }); + + it('references "user message" for user location', () => { + const prompt = noToolsSystemPrompt('user'); + expect(prompt).toContain('in the user message'); + expect(prompt).not.toContain('block below'); + }); + + it('always contains the no-tools preamble', () => { + for (const loc of ['system', 'user'] as const) { + const prompt = noToolsSystemPrompt(loc); + expect(prompt).toContain('You do NOT have access to any tools'); + expect(prompt).toContain(''); + } + }); +}); + +// ─── buildSessionSystemPrompt ──────────────────────────────────────────────── + +describe('buildSessionSystemPrompt', () => { + let saved: string | undefined; + beforeEach(() => { + saved = process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + }); + afterEach(() => { + if (saved === undefined) delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + else process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = saved; + }); + + const sampleTools = [ + { + type: 'function' as const, + function: { + name: 'get_weather', + description: 'Get weather for a city', + parameters: { type: 'object', properties: { city: { type: 'string' } } }, + }, + }, + ]; + + it('default mode: embeds in the system prompt', () => { + const result = buildSessionSystemPrompt(sampleTools, undefined); + expect(result).toContain(''); + expect(result).toContain('get_weather'); + expect(result).toContain('block below'); + }); + + it('default mode: appends caller system prompt after tools', () => { + const result = buildSessionSystemPrompt(sampleTools, 'You are a weather bot.'); + expect(result).toContain(''); + expect(result).toContain('You are a weather bot.'); + // Caller prompt comes after tool block + const toolIdx = result.indexOf(''); + const callerIdx = result.indexOf('You are a weather bot.'); + expect(callerIdx).toBeGreaterThan(toolIdx); + }); + + it('legacy mode: does NOT embed tool definitions in system prompt', () => { + process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = '1'; + const result = buildSessionSystemPrompt(sampleTools, undefined); + // The tool block (with actual tool definitions) should NOT be present + expect(result).not.toContain('get_weather'); + expect(result).not.toContain('## Available Tools'); + // But the preamble still references user message location + expect(result).toContain('in the user message'); + }); + + it('legacy mode: still includes caller system prompt', () => { + process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = '1'; + const result = buildSessionSystemPrompt(sampleTools, 'You are a weather bot.'); + expect(result).not.toContain('get_weather'); + expect(result).toContain('You are a weather bot.'); + }); }); // ─── sessionNameFromKey ────────────────────────────────────────────────────── diff --git a/src/openai-compat.ts b/src/openai-compat.ts index 746fbf8..fb7b5bd 100644 --- a/src/openai-compat.ts +++ b/src/openai-compat.ts @@ -105,6 +105,66 @@ export interface OpenAIChatCompletionChunk { * responses from the wrong model. Originally diagnosed in PR #40 by * @megayounus786. */ +/** + * When set (to '1', 'true', 'yes'), the proxy preserves the pre-fix behavior: + * - tools injected into every user message + * - session key NOT fingerprinted by tools (same session across tool changes) + * Default (unset) is the new behavior: tools embedded in session system prompt + * at create time + session key fingerprinted by tools. The new behavior + * eliminates periodic latency spikes but does not support mutating the tool + * list within a single session (a new session is created when tools change). + */ +export function isToolsPerMessageModeEnabled(): boolean { + const v = process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE; + if (!v) return false; + const t = v.trim().toLowerCase(); + return t === '1' || t === 'true' || t === 'yes'; +} + +/** + * Generate the "no built-in tools" system prompt preamble. + * The `toolLocation` parameter controls how the model is told where to find + * tool definitions — 'system' means "in the block below" + * (tools baked into system prompt), 'user' means "in tags + * in the user message" (legacy per-turn injection). + */ +export function noToolsSystemPrompt(toolLocation: 'system' | 'user'): string { + const locationHint = + toolLocation === 'system' + ? 'in the block below' + : 'in tags in the user message'; + return ( + 'You are a helpful AI assistant acting as a pure LLM behind an API proxy.\n' + + 'You do NOT have access to any tools such as Bash, Read, Write, Edit, Glob, Grep, or any other built-in tools.\n' + + 'Do NOT attempt to call any tools or execute any commands.\n' + + `When you need to perform an action, use ONLY the tools defined ${locationHint}, ` + + 'and respond with tags as instructed there.\n' + + 'If no are provided, respond with text only.' + ); +} + +/** + * Build the full session system prompt for a Claude Code session with tools. + * Exported for testability — called from `handleChatCompletion`. + * + * - Default mode: tools are embedded in the system prompt (cacheable by Anthropic). + * - Legacy mode (OPENAI_COMPAT_TOOLS_PER_MESSAGE=1): tools are NOT embedded; + * they'll be injected per-turn in the user message instead. + */ +export function buildSessionSystemPrompt( + tools: OpenAIChatCompletionRequest['tools'], + callerSystemPrompt: string | undefined, +): string { + if (isToolsPerMessageModeEnabled()) { + const preamble = noToolsSystemPrompt('user'); + return callerSystemPrompt ? `${preamble}\n\n${callerSystemPrompt}` : preamble; + } + const preamble = noToolsSystemPrompt('system'); + const toolBlock = buildToolPromptBlock(tools); + const systemWithTools = `${preamble}\n\n${toolBlock}`; + return callerSystemPrompt ? `${systemWithTools}\n\n${callerSystemPrompt}` : systemWithTools; +} + export function resolveSessionKey(body: OpenAIChatCompletionRequest, headers: http.IncomingHttpHeaders): string { const headerKey = headers['x-session-id']; if (typeof headerKey === 'string' && headerKey.trim()) return headerKey.trim(); @@ -114,11 +174,33 @@ export function resolveSessionKey(body: OpenAIChatCompletionRequest, headers: ht .map((m) => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))) .join('\n'); const modelTag = (body.model || '').toString(); - if (sys || modelTag) { + // Include a fingerprint of the tool list so that two requests with the same + // system prompt but different tool definitions land in different sessions. + // The tool schemas are baked into the session system prompt on create; if + // tools change we need a new session rather than re-using a stale one. + // Hash only tool names + a short description prefix to keep the fingerprint + // small and stable against schema formatting differences. + // + // Opt-out: OPENAI_COMPAT_TOOLS_PER_MESSAGE=1 restores the pre-fix behavior + // of keying sessions only by system prompt + model. Enable this if you have + // callers that mutate their tool list within one conversation and rely on + // continuing history across tool changes. + const toolsFingerprint = isToolsPerMessageModeEnabled() + ? '' + : (body.tools || []) + .map((t) => { + const fn = t?.function; + if (!fn?.name) return ''; + const descPrefix = (typeof fn.description === 'string' ? fn.description : '').slice(0, 64); + return `${fn.name}:${descPrefix}`; + }) + .filter(Boolean) + .join('|'); + if (sys || modelTag || toolsFingerprint) { return ( 'sys-' + createHash('sha1') - .update(modelTag + '\n' + sys) + .update(modelTag + '\n' + sys + '\n' + toolsFingerprint) .digest('hex') .slice(0, 12) ); @@ -513,21 +595,11 @@ export async function handleChatCompletion( } // Claude Code CLI supports --system-prompt (replace) and --append-system-prompt (append). // When the caller provides tools, use --system-prompt to REPLACE the CLI's entire - // system prompt. This removes the CLI's built-in tool definitions (Bash, Read, Edit, etc.) - // so the model only sees the caller's tools via in the user message. - // --append-system-prompt doesn't work because the CLI's own tool instructions take priority. + // system prompt via buildSessionSystemPrompt(). See that function's doc for details + // on default vs legacy (OPENAI_COMPAT_TOOLS_PER_MESSAGE=1) behavior. if (engine === 'claude') { if (request.tools?.length) { - const noToolsPrompt = - 'You are a helpful AI assistant acting as a pure LLM behind an API proxy.\n' + - 'You do NOT have access to any tools such as Bash, Read, Write, Edit, Glob, Grep, or any other built-in tools.\n' + - 'Do NOT attempt to call any tools or execute any commands.\n' + - 'When you need to perform an action, use ONLY the tools defined in tags in the user message, ' + - 'and respond with tags as instructed there.\n' + - 'If no are provided, respond with text only.'; - sessionConfig.systemPrompt = extracted.systemPrompt - ? `${noToolsPrompt}\n\n${extracted.systemPrompt}` - : noToolsPrompt; + sessionConfig.systemPrompt = buildSessionSystemPrompt(request.tools, extracted.systemPrompt); } else if (extracted.systemPrompt) { sessionConfig.appendSystemPrompt = extracted.systemPrompt; } @@ -568,9 +640,23 @@ export async function handleChatCompletion( userMessage = `\n${extracted.systemPrompt}\n\n\n${userMessage}`; } - // Inject tool definitions into the user message + // Inject tool definitions into the user message. + // + // Default path for Claude Code: tools are already embedded in the session + // system prompt (see session create block above) — do NOT re-inject them + // per turn. Repeatedly prepending a large block to every + // user message bloats each turn's input, defeats Anthropic prompt caching, + // and was the cause of periodic 30-50s latency spikes. + // + // Opt-out path for Claude Code (OPENAI_COMPAT_TOOLS_PER_MESSAGE=1): fall + // back to the legacy behavior of injecting the tool block into each user + // message. Enables dynamic tool list updates within a single session. + // + // Non-claude engines: the CLI is spawned fresh per turn with no persistent + // system prompt, so tools must always be injected per message. const hasTools = !!request.tools?.length; - if (hasTools) { + const injectToolsPerTurn = hasTools && (engine !== 'claude' || isToolsPerMessageModeEnabled()); + if (injectToolsPerTurn) { const toolBlock = buildToolPromptBlock(request.tools); userMessage = `${toolBlock}\n\n${userMessage}`; }