LanternOps · ToddHebebrand · Jun 11, 2026 · May 18, 2026 · May 18, 2026 · May 18, 2026
diff --git a/.gitignore b/.gitignore
@@ -62,6 +62,9 @@ target/
 # Playwright MCP screenshots
 .playwright-mcp/
 
+# impeccable design-critique plugin artifacts
+.impeccable/
+
 # Claude Code
 CLAUDE.md
 .claude/plans/

diff --git a/apps/api/src/config/validate.test.ts b/apps/api/src/config/validate.test.ts
@@ -670,6 +670,43 @@ describe('validateConfig', () => {
     });
   });
 
+  const openAiCompatibleEnv = {
+    ...validEnv,
+    MCP_LLM_PROVIDER: 'openai-compatible',
+    MCP_LLM_BASE_URL: 'http://localhost:8000/v1',
+    MCP_LLM_MODEL: 'test-model',
+    MCP_LLM_API_KEY: 'sk-test',
+  };
+
+  it('requires MCP_LLM_MODEL when MCP_LLM_PROVIDER is openai-compatible', () => {
+    const { MCP_LLM_MODEL: _, ...rest } = openAiCompatibleEnv;
+    withEnv(rest as Record<string, string>, () => {
+      expect(() => validateConfig()).toThrow('MCP_LLM_MODEL');
+    });
+  });
+
+  it('requires non-empty MCP_LLM_MODEL when MCP_LLM_PROVIDER is openai-compatible', () => {
+    withEnv({ ...openAiCompatibleEnv, MCP_LLM_MODEL: '   ' }, () => {
+      expect(() => validateConfig()).toThrow('MCP_LLM_MODEL');
+    });
+  });
+
+  it('requires MCP_LLM_API_KEY when MCP_LLM_PROVIDER is openai-compatible', () => {
+    const { MCP_LLM_API_KEY: _, ...rest } = openAiCompatibleEnv;
+    withEnv(rest as Record<string, string>, () => {
+      expect(() => validateConfig()).toThrow('MCP_LLM_API_KEY');
+    });
+  });
+
+  it('accepts openai-compatible when base URL, model, and API key are set', () => {
+    withEnv(openAiCompatibleEnv, () => {
+      const config = validateConfig();
+      expect(config.MCP_LLM_PROVIDER).toBe('openai-compatible');
+      expect(config.MCP_LLM_MODEL).toBe('test-model');
+      expect(config.MCP_LLM_API_KEY).toBe('sk-test');
+    });
+  });
+
   // ---- OAuth DCR (Dynamic Client Registration) hardening (Task 21) -------
   // When DCR is on in production, every client registration is anonymous and
   // self-asserting. Without an initial-access-token gate, the registration

diff --git a/apps/api/src/routes/ai.ts b/apps/api/src/routes/ai.ts
@@ -39,6 +39,52 @@ import {
 } from '@breeze/shared/validators/ai';
 import { aiActionPlans } from '../db/schema';
 import { captureException } from '../services/sentry';
+import { getConfig } from '../config/validate';
+import { OpenAICompatibleProvider } from '../services/llm/openaiCompatibleProvider';
+import { OpenAISessionManager } from '../services/llm/openaiSessionManager';
+
+// Provider check that tolerates an unvalidated config: route unit tests never
+// call validateConfig(), and getConfig() throws in that state. Without a
+// validated config, behave as the default anthropic path. Production always
+// validates at boot, so this never masks a misconfiguration there.
+function isOpenAICompatibleProvider(): boolean {
+  try {
+    return isOpenAICompatibleProvider();
+  } catch {
+    return false;
+  }
+}
+
+// Lazy singleton for the openai-compatible path.
+// Only constructed on first use when MCP_LLM_PROVIDER=openai-compatible.
+let _openaiSessionManager: OpenAISessionManager | null = null;
+function getOpenAISessionManager(): OpenAISessionManager {
+  if (!_openaiSessionManager) {
+    const cfg = getConfig();
+    if (!cfg.MCP_LLM_BASE_URL) {
+      // Should be caught at startup by the superRefine cross-field validation,
+      // but guard here in case getConfig() is called before validateConfig().
+      throw new Error('MCP_LLM_BASE_URL is required when MCP_LLM_PROVIDER is openai-compatible');
+    }
+    if (
+      cfg.MCP_LLM_PROVIDER === 'openai-compatible' &&
+      cfg.MCP_LLM_PRICE_INPUT_PER_M_USD === 0 &&
+      cfg.MCP_LLM_PRICE_OUTPUT_PER_M_USD === 0
+    ) {
+      console.warn(
+        'MCP_LLM_PROVIDER=openai-compatible but both MCP_LLM_PRICE_*_PER_M_USD are 0: cost tracking and budget enforcement are no-ops on this path.'
+      );
+    }
+    const provider = new OpenAICompatibleProvider({
+      baseUrl: cfg.MCP_LLM_BASE_URL,
+      apiKey: cfg.MCP_LLM_API_KEY!,
+      priceInputPerMUsd: cfg.MCP_LLM_PRICE_INPUT_PER_M_USD,
+      priceOutputPerMUsd: cfg.MCP_LLM_PRICE_OUTPUT_PER_M_USD,
+    });
+    _openaiSessionManager = new OpenAISessionManager(provider);
+  }
+  return _openaiSessionManager;
+}
 
 const createAiSessionSchema = sharedCreateAiSessionSchema.extend({
   orgId: z.string().uuid().optional(),
@@ -187,7 +233,11 @@ aiRoutes.delete(
       return c.json({ error: 'Session not found' }, 404);
     }
 
-    streamingSessionManager.remove(sessionId);
+    const manager =
+      isOpenAICompatibleProvider()
+        ? getOpenAISessionManager()
+        : streamingSessionManager;
+    manager.remove(sessionId);
 
     writeRouteAudit(c, {
       orgId: closed.orgId,
@@ -327,6 +377,68 @@ aiRoutes.post(
 
     const { session: dbSession, sanitizedContent, systemPrompt, maxBudgetUsd } = preflight;
 
+    // ---- OpenAI-compatible path (chat-only, no tool-calling) ----
+    if (isOpenAICompatibleProvider()) {
+      const openaiManager = getOpenAISessionManager();
+      const openaiSession = openaiManager.getOrCreate(sessionId, dbSession.orgId, auth, c);
+
+      if (!openaiManager.tryTransitionToProcessing(openaiSession)) {
+        return c.json({ error: 'A message is already being processed for this session' }, 409);
+      }
+
+      writeRouteAudit(c, {
+        orgId: dbSession.orgId,
+        action: 'ai.message.send',
+        resourceType: 'ai_session',
+        resourceId: sessionId,
+        details: { contentLength: body.content.length },
+      });
+
+      try {
+        await db.insert(aiMessages).values({
+          sessionId,
+          role: 'user',
+          content: sanitizedContent,
+        });
+      } catch (err) {
+        console.error('[AI/OpenAI] Failed to save user message to DB:', err);
+        openaiSession.state = 'idle';
+        return c.json({ error: 'Failed to save message' }, 500);
+      }
+
+      if (!dbSession.title) {
+        const title = generateSessionTitle(sanitizedContent);
+        try {
+          await db.update(aiSessions).set({ title }).where(eq(aiSessions.id, sessionId));
+          openaiSession.eventBus.publish({ type: 'title_updated', title });
+        } catch (err) {
+          console.error('[AI/OpenAI] Failed to auto-set session title:', err);
+        }
+      }
+
+      openaiManager.startTurn(openaiSession, dbSession.model, systemPrompt, sanitizedContent);
+
+      const subscriptionId = crypto.randomUUID();
+      return streamSSE(c, async (stream) => {
+        const events = openaiSession.eventBus.subscribe(subscriptionId);
+        try {
+          for await (const event of events) {
+            await stream.writeSSE({ event: event.type, data: JSON.stringify(event) });
+            if (event.type === 'done') break;
+          }
+        } catch (err) {
+          console.error('[AI/OpenAI] Stream error:', err);
+          await stream.writeSSE({
+            event: 'error',
+            data: JSON.stringify({ type: 'error', message: err instanceof Error ? err.message : 'Stream failed' }),
+          });
+        } finally {
+          openaiSession.eventBus.unsubscribe(subscriptionId);
+        }
+      });
+    }
+    // ---- End OpenAI-compatible path ----
+
     // Get or create streaming session
     const activeSession = await streamingSessionManager.getOrCreate(
       sessionId,
@@ -436,7 +548,11 @@ aiRoutes.post(
 
     let result: { interrupted: boolean; reason?: string };
     try {
-      result = await streamingSessionManager.interrupt(sessionId);
+      const manager =
+        isOpenAICompatibleProvider()
+          ? getOpenAISessionManager()
+          : streamingSessionManager;
+      result = await manager.interrupt(sessionId);
     } catch (err) {
       console.error('[AI] Interrupt failed:', err);
       return c.json({ error: 'Failed to interrupt session' }, 500);
@@ -518,6 +634,16 @@ aiRoutes.post(
       return c.json({ error: 'Session not found' }, 404);
     }
 
+    if (isOpenAICompatibleProvider()) {
+      return c.json(
+        {
+          error: 'This operation is not supported when using the OpenAI-compatible provider.',
+          code: 'NOT_SUPPORTED_ON_PROVIDER',
+        },
+        501,
+      );
+    }
+
     const activeSession = streamingSessionManager.get(sessionId);
     if (!activeSession) {
       return c.json({ error: 'Session not active in memory' }, 404);
@@ -565,6 +691,16 @@ aiRoutes.post(
       return c.json({ error: 'Session not found' }, 404);
     }
 
+    if (isOpenAICompatibleProvider()) {
+      return c.json(
+        {
+          error: 'This operation is not supported when using the OpenAI-compatible provider.',
+          code: 'NOT_SUPPORTED_ON_PROVIDER',
+        },
+        501,
+      );
+    }
+
     const activeSession = streamingSessionManager.get(sessionId);
     if (!activeSession) {
       return c.json({ error: 'Session not active in memory' }, 404);
@@ -628,6 +764,16 @@ aiRoutes.post(
       return c.json({ error: 'Session not found' }, 404);
     }
 
+    if (isOpenAICompatibleProvider()) {
+      return c.json(
+        {
+          error: 'This operation is not supported when using the OpenAI-compatible provider.',
+          code: 'NOT_SUPPORTED_ON_PROVIDER',
+        },
+        501,
+      );
+    }
+
     const activeSession = streamingSessionManager.get(sessionId);
     if (!activeSession) {
       return c.json({ error: 'Session not active in memory' }, 404);

diff --git a/apps/api/src/services/aiCostTracker.ts b/apps/api/src/services/aiCostTracker.ts
@@ -332,6 +332,80 @@ export async function recordUsageFromSdkResult(
   await deductBillingCredits(orgId, costCents);
 }
 
+/**
+ * Record usage for a single openai-compatible turn.
+ * Cost is calculated from declared per-token pricing (best-effort).
+ * No prompt caching equivalent exists on vLLM; the full context is re-sent each turn.
+ */
+export async function recordOpenAIUsage(
+  sessionId: string,
+  orgId: string,
+  inputTokens: number,
+  outputTokens: number,
+  costUsd: number,
+): Promise<void> {
+  if (!orgId) {
+    console.warn(`[AI] Skipping recordOpenAIUsage — empty orgId for session=${sessionId}`);
+    return;
+  }
+  const costCents = Math.round(costUsd * 100 * 100) / 100;
+  const now = new Date();
+  const dailyKey = `${now.getUTCFullYear()}-${String(now.getUTCMonth() + 1).padStart(2, '0')}-${String(now.getUTCDate()).padStart(2, '0')}`;
+  const monthlyKey = `${now.getUTCFullYear()}-${String(now.getUTCMonth() + 1).padStart(2, '0')}`;
+
+  try {
+    await db
+      .update(aiSessions)
+      .set({
+        totalInputTokens: sql`${aiSessions.totalInputTokens} + ${inputTokens}`,
+        totalOutputTokens: sql`${aiSessions.totalOutputTokens} + ${outputTokens}`,
+        totalCostCents: sql`${aiSessions.totalCostCents} + ${costCents}`,
+        lastActivityAt: now,
+        updatedAt: now,
+      })
+      .where(eq(aiSessions.id, sessionId));
+  } catch (err) {
+    console.error(`[AI] Failed to update session totals (OpenAI) for session=${sessionId}:`, err);
+    throw err;
+  }
+
+  for (const [period, periodKey] of [['daily', dailyKey], ['monthly', monthlyKey]] as const) {
+    try {
+      await db
+        .insert(aiCostUsage)
+        .values({
+          orgId,
+          period,
+          periodKey,
+          inputTokens,
+          outputTokens,
+          totalCostCents: costCents,
+          sessionCount: 0,
+          messageCount: 1,
+          toolExecutionCount: 0,
+        })
+        .onConflictDoUpdate({
+          target: [aiCostUsage.orgId, aiCostUsage.period, aiCostUsage.periodKey],
+          set: {
+            inputTokens: sql`${aiCostUsage.inputTokens} + ${inputTokens}`,
+            outputTokens: sql`${aiCostUsage.outputTokens} + ${outputTokens}`,
+            totalCostCents: sql`${aiCostUsage.totalCostCents} + ${costCents}`,
+            messageCount: sql`${aiCostUsage.messageCount} + 1`,
+            updatedAt: now,
+          },
+        });
+    } catch (err) {
+      console.error(`[AI] Failed to update ${period} aggregate (OpenAI) for org=${orgId}:`, err);
+    }
+  }
+
+  checkCostAnomalies(sessionId, orgId, costCents, dailyKey).catch(err => {
+    console.error('[AI] Cost anomaly check failed (OpenAI):', err);
+  });
+
+  await deductBillingCredits(orgId, costCents);
+}
+
 /**
  * Get the remaining monthly budget for an org in USD.
  * Returns null if no budget is configured (unlimited).