Enderfga · Enderfga · Apr 16, 2026 · Apr 14, 2026 · Apr 16, 2026
diff --git a/src/__tests__/openai-compat.test.ts b/src/__tests__/openai-compat.test.ts
@@ -12,6 +12,9 @@ import {
   buildToolPromptBlock,
   parseToolCallsFromText,
   serializeToolResults,
+  isToolsPerMessageModeEnabled,
+  noToolsSystemPrompt,
+  buildSessionSystemPrompt,
 } from '../openai-compat.js';
 import { resolveEngineAndModel, getModelList } from '../models.js';
 import type { OpenAIChatMessage } from '../openai-compat.js';
@@ -67,6 +70,16 @@ describe('resolveEngineAndModel', () => {
 // ─── resolveSessionKey ───────────────────────────────────────────────────────
 
 describe('resolveSessionKey', () => {
+  let savedToolsPerMessage: string | undefined;
+  beforeEach(() => {
+    savedToolsPerMessage = process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+    delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+  });
+  afterEach(() => {
+    if (savedToolsPerMessage === undefined) delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+    else process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = savedToolsPerMessage;
+  });
+
   it('prefers X-Session-Id header', () => {
     const key = resolveSessionKey({ messages: [], user: 'user-1' }, { 'x-session-id': 'my-session' });
     expect(key).toBe('my-session');
@@ -159,6 +172,194 @@ describe('resolveSessionKey', () => {
     const key = resolveSessionKey({ model: 'claude-opus-4-6', messages: [{ role: 'user', content: 'hi' }] }, {});
     expect(key).toMatch(/^sys-[0-9a-f]{12}$/);
   });
+
+  it('produces distinct keys when system prompt is identical but tools differ', () => {
+    const base = {
+      model: 'claude-sonnet-4-6',
+      messages: [
+        { role: 'system' as const, content: 'SAME' },
+        { role: 'user' as const, content: 'hi' },
+      ],
+    };
+    const withToolsA = resolveSessionKey(
+      {
+        ...base,
+        tools: [{ type: 'function', function: { name: 'foo', description: 'does foo', parameters: {} } }],
+      },
+      {},
+    );
+    const withToolsB = resolveSessionKey(
+      {
+        ...base,
+        tools: [{ type: 'function', function: { name: 'bar', description: 'does bar', parameters: {} } }],
+      },
+      {},
+    );
+    const withoutTools = resolveSessionKey(base, {});
+    expect(withToolsA).not.toBe(withToolsB);
+    expect(withToolsA).not.toBe(withoutTools);
+    expect(withToolsB).not.toBe(withoutTools);
+  });
+
+  it('produces the same key for identical tool lists across calls', () => {
+    const body = {
+      model: 'claude-sonnet-4-6',
+      messages: [
+        { role: 'system' as const, content: 'SAME' },
+        { role: 'user' as const, content: 'hi' },
+      ],
+      tools: [
+        { type: 'function', function: { name: 'foo', description: 'does foo', parameters: {} } },
+        { type: 'function', function: { name: 'bar', description: 'does bar', parameters: {} } },
+      ],
+    };
+    expect(resolveSessionKey(body, {})).toBe(resolveSessionKey(body, {}));
+  });
+
+  it('ignores tools in the key when OPENAI_COMPAT_TOOLS_PER_MESSAGE=1 (legacy opt-out)', () => {
+    process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = '1';
+    const base = {
+      model: 'claude-sonnet-4-6',
+      messages: [
+        { role: 'system' as const, content: 'SAME' },
+        { role: 'user' as const, content: 'hi' },
+      ],
+    };
+    const withToolsA = resolveSessionKey(
+      {
+        ...base,
+        tools: [{ type: 'function', function: { name: 'foo', description: 'does foo', parameters: {} } }],
+      },
+      {},
+    );
+    const withToolsB = resolveSessionKey(
+      {
+        ...base,
+        tools: [{ type: 'function', function: { name: 'bar', description: 'does bar', parameters: {} } }],
+      },
+      {},
+    );
+    const withoutTools = resolveSessionKey(base, {});
+    // With the opt-out enabled, the tool list is NOT part of the session key,
+    // so all three bodies collapse to the same key (the pre-fix behavior).
+    expect(withToolsA).toBe(withToolsB);
+    expect(withToolsA).toBe(withoutTools);
+  });
+});
+
+// ─── isToolsPerMessageModeEnabled ────────────────────────────────────────────
+
+describe('isToolsPerMessageModeEnabled', () => {
+  let saved: string | undefined;
+  beforeEach(() => {
+    saved = process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+    delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+  });
+  afterEach(() => {
+    if (saved === undefined) delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+    else process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = saved;
+  });
+
+  it('defaults to false when the env var is unset', () => {
+    expect(isToolsPerMessageModeEnabled()).toBe(false);
+  });
+
+  it('returns true for "1", "true", "yes" (case-insensitive, trimmed)', () => {
+    for (const v of ['1', 'true', 'yes', 'TRUE', ' Yes ', 'YES']) {
+      process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = v;
+      expect(isToolsPerMessageModeEnabled()).toBe(true);
+    }
+  });
+
+  it('returns false for "0", "false", "no", unknown values', () => {
+    for (const v of ['0', 'false', 'no', 'off', 'maybe', '']) {
+      process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = v;
+      expect(isToolsPerMessageModeEnabled()).toBe(false);
+    }
+  });
+});
+
+// ─── noToolsSystemPrompt ─────────────────────────────────────────────────────
+
+describe('noToolsSystemPrompt', () => {
+  it('references "block below" for system location', () => {
+    const prompt = noToolsSystemPrompt('system');
+    expect(prompt).toContain('block below');
+    expect(prompt).not.toContain('in the user message');
+  });
+
+  it('references "user message" for user location', () => {
+    const prompt = noToolsSystemPrompt('user');
+    expect(prompt).toContain('in the user message');
+    expect(prompt).not.toContain('block below');
+  });
+
+  it('always contains the no-tools preamble', () => {
+    for (const loc of ['system', 'user'] as const) {
+      const prompt = noToolsSystemPrompt(loc);
+      expect(prompt).toContain('You do NOT have access to any tools');
+      expect(prompt).toContain('<tool_calls>');
+    }
+  });
+});
+
+// ─── buildSessionSystemPrompt ────────────────────────────────────────────────
+
+describe('buildSessionSystemPrompt', () => {
+  let saved: string | undefined;
+  beforeEach(() => {
+    saved = process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+    delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+  });
+  afterEach(() => {
+    if (saved === undefined) delete process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+    else process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = saved;
+  });
+
+  const sampleTools = [
+    {
+      type: 'function' as const,
+      function: {
+        name: 'get_weather',
+        description: 'Get weather for a city',
+        parameters: { type: 'object', properties: { city: { type: 'string' } } },
+      },
+    },
+  ];
+
+  it('default mode: embeds <available_tools> in the system prompt', () => {
+    const result = buildSessionSystemPrompt(sampleTools, undefined);
+    expect(result).toContain('<available_tools>');
+    expect(result).toContain('get_weather');
+    expect(result).toContain('block below');
+  });
+
+  it('default mode: appends caller system prompt after tools', () => {
+    const result = buildSessionSystemPrompt(sampleTools, 'You are a weather bot.');
+    expect(result).toContain('<available_tools>');
+    expect(result).toContain('You are a weather bot.');
+    // Caller prompt comes after tool block
+    const toolIdx = result.indexOf('<available_tools>');
+    const callerIdx = result.indexOf('You are a weather bot.');
+    expect(callerIdx).toBeGreaterThan(toolIdx);
+  });
+
+  it('legacy mode: does NOT embed tool definitions in system prompt', () => {
+    process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = '1';
+    const result = buildSessionSystemPrompt(sampleTools, undefined);
+    // The tool block (with actual tool definitions) should NOT be present
+    expect(result).not.toContain('get_weather');
+    expect(result).not.toContain('## Available Tools');
+    // But the preamble still references user message location
+    expect(result).toContain('in the user message');
+  });
+
+  it('legacy mode: still includes caller system prompt', () => {
+    process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE = '1';
+    const result = buildSessionSystemPrompt(sampleTools, 'You are a weather bot.');
+    expect(result).not.toContain('get_weather');
+    expect(result).toContain('You are a weather bot.');
+  });
 });
 
 // ─── sessionNameFromKey ──────────────────────────────────────────────────────

diff --git a/src/openai-compat.ts b/src/openai-compat.ts
@@ -105,6 +105,66 @@ export interface OpenAIChatCompletionChunk {
  * responses from the wrong model. Originally diagnosed in PR #40 by
  * @megayounus786.
  */
+/**
+ * When set (to '1', 'true', 'yes'), the proxy preserves the pre-fix behavior:
+ *   - tools injected into every user message
+ *   - session key NOT fingerprinted by tools (same session across tool changes)
+ * Default (unset) is the new behavior: tools embedded in session system prompt
+ * at create time + session key fingerprinted by tools. The new behavior
+ * eliminates periodic latency spikes but does not support mutating the tool
+ * list within a single session (a new session is created when tools change).
+ */
+export function isToolsPerMessageModeEnabled(): boolean {
+  const v = process.env.OPENAI_COMPAT_TOOLS_PER_MESSAGE;
+  if (!v) return false;
+  const t = v.trim().toLowerCase();
+  return t === '1' || t === 'true' || t === 'yes';
+}
+
+/**
+ * Generate the "no built-in tools" system prompt preamble.
+ * The `toolLocation` parameter controls how the model is told where to find
+ * tool definitions — 'system' means "in the <available_tools> block below"
+ * (tools baked into system prompt), 'user' means "in <available_tools> tags
+ * in the user message" (legacy per-turn injection).
+ */
+export function noToolsSystemPrompt(toolLocation: 'system' | 'user'): string {
+  const locationHint =
+    toolLocation === 'system'
+      ? 'in the <available_tools> block below'
+      : 'in <available_tools> tags in the user message';
+  return (
+    'You are a helpful AI assistant acting as a pure LLM behind an API proxy.\n' +
+    'You do NOT have access to any tools such as Bash, Read, Write, Edit, Glob, Grep, or any other built-in tools.\n' +
+    'Do NOT attempt to call any tools or execute any commands.\n' +
+    `When you need to perform an action, use ONLY the tools defined ${locationHint}, ` +
+    'and respond with <tool_calls> tags as instructed there.\n' +
+    'If no <available_tools> are provided, respond with text only.'
+  );
+}
+
+/**
+ * Build the full session system prompt for a Claude Code session with tools.
+ * Exported for testability — called from `handleChatCompletion`.
+ *
+ * - Default mode: tools are embedded in the system prompt (cacheable by Anthropic).
+ * - Legacy mode (OPENAI_COMPAT_TOOLS_PER_MESSAGE=1): tools are NOT embedded;
+ *   they'll be injected per-turn in the user message instead.
+ */
+export function buildSessionSystemPrompt(
+  tools: OpenAIChatCompletionRequest['tools'],
+  callerSystemPrompt: string | undefined,
+): string {
+  if (isToolsPerMessageModeEnabled()) {
+    const preamble = noToolsSystemPrompt('user');
+    return callerSystemPrompt ? `${preamble}\n\n${callerSystemPrompt}` : preamble;
+  }
+  const preamble = noToolsSystemPrompt('system');
+  const toolBlock = buildToolPromptBlock(tools);
+  const systemWithTools = `${preamble}\n\n${toolBlock}`;
+  return callerSystemPrompt ? `${systemWithTools}\n\n${callerSystemPrompt}` : systemWithTools;
+}
+
 export function resolveSessionKey(body: OpenAIChatCompletionRequest, headers: http.IncomingHttpHeaders): string {
   const headerKey = headers['x-session-id'];
   if (typeof headerKey === 'string' && headerKey.trim()) return headerKey.trim();
@@ -114,11 +174,33 @@ export function resolveSessionKey(body: OpenAIChatCompletionRequest, headers: ht
     .map((m) => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content)))
     .join('\n');
   const modelTag = (body.model || '').toString();
-  if (sys || modelTag) {
+  // Include a fingerprint of the tool list so that two requests with the same
+  // system prompt but different tool definitions land in different sessions.
+  // The tool schemas are baked into the session system prompt on create; if
+  // tools change we need a new session rather than re-using a stale one.
+  // Hash only tool names + a short description prefix to keep the fingerprint
+  // small and stable against schema formatting differences.
+  //
+  // Opt-out: OPENAI_COMPAT_TOOLS_PER_MESSAGE=1 restores the pre-fix behavior
+  // of keying sessions only by system prompt + model. Enable this if you have
+  // callers that mutate their tool list within one conversation and rely on
+  // continuing history across tool changes.
+  const toolsFingerprint = isToolsPerMessageModeEnabled()
+    ? ''
+    : (body.tools || [])
+        .map((t) => {
+          const fn = t?.function;
+          if (!fn?.name) return '';
+          const descPrefix = (typeof fn.description === 'string' ? fn.description : '').slice(0, 64);
+          return `${fn.name}:${descPrefix}`;
+        })
+        .filter(Boolean)
+        .join('|');
+  if (sys || modelTag || toolsFingerprint) {
     return (
       'sys-' +
       createHash('sha1')
-        .update(modelTag + '\n' + sys)
+        .update(modelTag + '\n' + sys + '\n' + toolsFingerprint)
         .digest('hex')
         .slice(0, 12)
     );
@@ -513,21 +595,11 @@ export async function handleChatCompletion(
     }
     // Claude Code CLI supports --system-prompt (replace) and --append-system-prompt (append).
     // When the caller provides tools, use --system-prompt to REPLACE the CLI's entire
-    // system prompt. This removes the CLI's built-in tool definitions (Bash, Read, Edit, etc.)
-    // so the model only sees the caller's tools via <available_tools> in the user message.
-    // --append-system-prompt doesn't work because the CLI's own tool instructions take priority.
+    // system prompt via buildSessionSystemPrompt(). See that function's doc for details
+    // on default vs legacy (OPENAI_COMPAT_TOOLS_PER_MESSAGE=1) behavior.
     if (engine === 'claude') {
       if (request.tools?.length) {
-        const noToolsPrompt =
-          'You are a helpful AI assistant acting as a pure LLM behind an API proxy.\n' +
-          'You do NOT have access to any tools such as Bash, Read, Write, Edit, Glob, Grep, or any other built-in tools.\n' +
-          'Do NOT attempt to call any tools or execute any commands.\n' +
-          'When you need to perform an action, use ONLY the tools defined in <available_tools> tags in the user message, ' +
-          'and respond with <tool_calls> tags as instructed there.\n' +
-          'If no <available_tools> are provided, respond with text only.';
-        sessionConfig.systemPrompt = extracted.systemPrompt
-          ? `${noToolsPrompt}\n\n${extracted.systemPrompt}`
-          : noToolsPrompt;
+        sessionConfig.systemPrompt = buildSessionSystemPrompt(request.tools, extracted.systemPrompt);
       } else if (extracted.systemPrompt) {
         sessionConfig.appendSystemPrompt = extracted.systemPrompt;
       }
@@ -568,9 +640,23 @@ export async function handleChatCompletion(
     userMessage = `<system>\n${extracted.systemPrompt}\n</system>\n\n${userMessage}`;
   }
 
-  // Inject tool definitions into the user message
+  // Inject tool definitions into the user message.
+  //
+  // Default path for Claude Code: tools are already embedded in the session
+  // system prompt (see session create block above) — do NOT re-inject them
+  // per turn. Repeatedly prepending a large <available_tools> block to every
+  // user message bloats each turn's input, defeats Anthropic prompt caching,
+  // and was the cause of periodic 30-50s latency spikes.
+  //
+  // Opt-out path for Claude Code (OPENAI_COMPAT_TOOLS_PER_MESSAGE=1): fall
+  // back to the legacy behavior of injecting the tool block into each user
+  // message. Enables dynamic tool list updates within a single session.
+  //
+  // Non-claude engines: the CLI is spawned fresh per turn with no persistent
+  // system prompt, so tools must always be injected per message.
   const hasTools = !!request.tools?.length;
-  if (hasTools) {
+  const injectToolsPerTurn = hasTools && (engine !== 'claude' || isToolsPerMessageModeEnabled());
+  if (injectToolsPerTurn) {
     const toolBlock = buildToolPromptBlock(request.tools);
     userMessage = `${toolBlock}\n\n${userMessage}`;
   }