Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c07df5c
feat(llm): palier 1.1 - types et interface LLMProvider (chat-only PoC)
claude May 18, 2026
3d712de
feat(llm): palier 1.2 - lecture du flag MCP_LLM_PROVIDER dans la config
claude May 18, 2026
077d97c
feat(llm): palier 1.3 - OpenAICompatibleProvider et historyBuilder
claude May 18, 2026
8f8566d
feat(llm): palier 1.4 - OpenAISessionManager + branchement dans la route
claude May 18, 2026
12e79a0
fix(llm): corrections issues review Opus (B2, B3, M1-M5)
claude May 18, 2026
8db2021
test(llm): palier 1.8 - tests unitaires historyBuilder
claude May 18, 2026
821431b
chore: ajoute .devscripts/ au gitignore
claude May 18, 2026
ad3b41a
fix(llm): router les tours openai-compatible vers MCP_LLM_MODEL
May 18, 2026
d267c7d
fix(llm): route interrupt/stop to openai-compatible session manager
May 23, 2026
24198c8
fix(llm): return 501 for plan/pause flows on openai-compatible
May 23, 2026
a9f534c
fix(llm): drop redundant provider message_start yield
May 23, 2026
dd39576
fix(llm): always increment OpenAI-compat turnCount after chatStream s…
May 23, 2026
8c1fd79
fix(llm): treat openai-compat user abort as clean stream end
May 23, 2026
d3db110
fix(llm): surface openai-compat timeout distinct from user Stop
May 23, 2026
d95ba17
fix(llm): assert MCP_LLM_API_KEY presence instead of empty fallback
May 23, 2026
765ce64
chore(llm): note multipart content is dropped on openai-compat path
May 23, 2026
568f929
chore(llm): commit openai-compat smoke script under versioned path
May 23, 2026
5d1e643
fix(llm): run openai-compatible turns outside HTTP request ALS.
May 23, 2026
c0a4273
fix(llm): route session delete to openai-compatible manager
May 24, 2026
b695423
fix(llm): log unhandled errors from background runTurn
May 24, 2026
4be5b46
fix(llm): append current OpenAI-compat user message in-memory for vLLM
May 24, 2026
c4d0735
chore(llm): warn once when openai-compatible has zero MCP prices
May 24, 2026
bfcf3aa
docs: process-level resource drill-down design + e2e coverage index
ToddHebebrand Jun 11, 2026
19bcf50
chore: gitignore .impeccable/ plugin artifacts
ToddHebebrand Jun 11, 2026
8311931
Merge main into claude/poc-alternative-llm-YrlE8
ToddHebebrand Jun 11, 2026
22784d1
fix(llm): tolerate unvalidated config in provider check
ToddHebebrand Jun 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ target/
# Playwright MCP screenshots
.playwright-mcp/

# impeccable design-critique plugin artifacts
.impeccable/

# Claude Code
CLAUDE.md
.claude/plans/
Expand Down
37 changes: 37 additions & 0 deletions apps/api/src/config/validate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,43 @@ describe('validateConfig', () => {
});
});

const openAiCompatibleEnv = {
...validEnv,
MCP_LLM_PROVIDER: 'openai-compatible',
MCP_LLM_BASE_URL: 'http://localhost:8000/v1',
MCP_LLM_MODEL: 'test-model',
MCP_LLM_API_KEY: 'sk-test',
};

it('requires MCP_LLM_MODEL when MCP_LLM_PROVIDER is openai-compatible', () => {
const { MCP_LLM_MODEL: _, ...rest } = openAiCompatibleEnv;
withEnv(rest as Record<string, string>, () => {
expect(() => validateConfig()).toThrow('MCP_LLM_MODEL');
});
});

it('requires non-empty MCP_LLM_MODEL when MCP_LLM_PROVIDER is openai-compatible', () => {
withEnv({ ...openAiCompatibleEnv, MCP_LLM_MODEL: ' ' }, () => {
expect(() => validateConfig()).toThrow('MCP_LLM_MODEL');
});
});

it('requires MCP_LLM_API_KEY when MCP_LLM_PROVIDER is openai-compatible', () => {
const { MCP_LLM_API_KEY: _, ...rest } = openAiCompatibleEnv;
withEnv(rest as Record<string, string>, () => {
expect(() => validateConfig()).toThrow('MCP_LLM_API_KEY');
});
});

it('accepts openai-compatible when base URL, model, and API key are set', () => {
withEnv(openAiCompatibleEnv, () => {
const config = validateConfig();
expect(config.MCP_LLM_PROVIDER).toBe('openai-compatible');
expect(config.MCP_LLM_MODEL).toBe('test-model');
expect(config.MCP_LLM_API_KEY).toBe('sk-test');
});
});

// ---- OAuth DCR (Dynamic Client Registration) hardening (Task 21) -------
// When DCR is on in production, every client registration is anonymous and
// self-asserting. Without an initial-access-token gate, the registration
Expand Down
150 changes: 148 additions & 2 deletions apps/api/src/routes/ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,52 @@ import {
} from '@breeze/shared/validators/ai';
import { aiActionPlans } from '../db/schema';
import { captureException } from '../services/sentry';
import { getConfig } from '../config/validate';
import { OpenAICompatibleProvider } from '../services/llm/openaiCompatibleProvider';
import { OpenAISessionManager } from '../services/llm/openaiSessionManager';

// Provider check that tolerates an unvalidated config: route unit tests never
// call validateConfig(), and getConfig() throws in that state. Without a
// validated config, behave as the default anthropic path. Production always
// validates at boot, so this never masks a misconfiguration there.
function isOpenAICompatibleProvider(): boolean {
try {
return isOpenAICompatibleProvider();
} catch {
return false;
}
}

// Lazy singleton for the openai-compatible path.
// Only constructed on first use when MCP_LLM_PROVIDER=openai-compatible.
let _openaiSessionManager: OpenAISessionManager | null = null;
function getOpenAISessionManager(): OpenAISessionManager {
if (!_openaiSessionManager) {
const cfg = getConfig();
if (!cfg.MCP_LLM_BASE_URL) {
// Should be caught at startup by the superRefine cross-field validation,
// but guard here in case getConfig() is called before validateConfig().
throw new Error('MCP_LLM_BASE_URL is required when MCP_LLM_PROVIDER is openai-compatible');
}
if (
cfg.MCP_LLM_PROVIDER === 'openai-compatible' &&
cfg.MCP_LLM_PRICE_INPUT_PER_M_USD === 0 &&
cfg.MCP_LLM_PRICE_OUTPUT_PER_M_USD === 0
) {
console.warn(
'MCP_LLM_PROVIDER=openai-compatible but both MCP_LLM_PRICE_*_PER_M_USD are 0: cost tracking and budget enforcement are no-ops on this path.'
);
}
const provider = new OpenAICompatibleProvider({
baseUrl: cfg.MCP_LLM_BASE_URL,
apiKey: cfg.MCP_LLM_API_KEY!,
priceInputPerMUsd: cfg.MCP_LLM_PRICE_INPUT_PER_M_USD,
priceOutputPerMUsd: cfg.MCP_LLM_PRICE_OUTPUT_PER_M_USD,
});
_openaiSessionManager = new OpenAISessionManager(provider);
}
return _openaiSessionManager;
}

const createAiSessionSchema = sharedCreateAiSessionSchema.extend({
orgId: z.string().uuid().optional(),
Expand Down Expand Up @@ -187,7 +233,11 @@ aiRoutes.delete(
return c.json({ error: 'Session not found' }, 404);
}

streamingSessionManager.remove(sessionId);
const manager =
isOpenAICompatibleProvider()
? getOpenAISessionManager()
: streamingSessionManager;
manager.remove(sessionId);

writeRouteAudit(c, {
orgId: closed.orgId,
Expand Down Expand Up @@ -327,6 +377,68 @@ aiRoutes.post(

const { session: dbSession, sanitizedContent, systemPrompt, maxBudgetUsd } = preflight;

// ---- OpenAI-compatible path (chat-only, no tool-calling) ----
if (isOpenAICompatibleProvider()) {
const openaiManager = getOpenAISessionManager();
const openaiSession = openaiManager.getOrCreate(sessionId, dbSession.orgId, auth, c);

if (!openaiManager.tryTransitionToProcessing(openaiSession)) {
return c.json({ error: 'A message is already being processed for this session' }, 409);
}

writeRouteAudit(c, {
orgId: dbSession.orgId,
action: 'ai.message.send',
resourceType: 'ai_session',
resourceId: sessionId,
details: { contentLength: body.content.length },
});

try {
await db.insert(aiMessages).values({
sessionId,
role: 'user',
content: sanitizedContent,
});
} catch (err) {
console.error('[AI/OpenAI] Failed to save user message to DB:', err);
openaiSession.state = 'idle';
return c.json({ error: 'Failed to save message' }, 500);
}

if (!dbSession.title) {
const title = generateSessionTitle(sanitizedContent);
try {
await db.update(aiSessions).set({ title }).where(eq(aiSessions.id, sessionId));
openaiSession.eventBus.publish({ type: 'title_updated', title });
} catch (err) {
console.error('[AI/OpenAI] Failed to auto-set session title:', err);
}
}

openaiManager.startTurn(openaiSession, dbSession.model, systemPrompt, sanitizedContent);

const subscriptionId = crypto.randomUUID();
return streamSSE(c, async (stream) => {
const events = openaiSession.eventBus.subscribe(subscriptionId);
try {
for await (const event of events) {
await stream.writeSSE({ event: event.type, data: JSON.stringify(event) });
if (event.type === 'done') break;
}
} catch (err) {
console.error('[AI/OpenAI] Stream error:', err);
await stream.writeSSE({
event: 'error',
data: JSON.stringify({ type: 'error', message: err instanceof Error ? err.message : 'Stream failed' }),
});
} finally {
openaiSession.eventBus.unsubscribe(subscriptionId);
}
});
}
// ---- End OpenAI-compatible path ----

// Get or create streaming session
const activeSession = await streamingSessionManager.getOrCreate(
sessionId,
Expand Down Expand Up @@ -436,7 +548,11 @@ aiRoutes.post(

let result: { interrupted: boolean; reason?: string };
try {
result = await streamingSessionManager.interrupt(sessionId);
const manager =
isOpenAICompatibleProvider()
? getOpenAISessionManager()
: streamingSessionManager;
result = await manager.interrupt(sessionId);
} catch (err) {
console.error('[AI] Interrupt failed:', err);
return c.json({ error: 'Failed to interrupt session' }, 500);
Expand Down Expand Up @@ -518,6 +634,16 @@ aiRoutes.post(
return c.json({ error: 'Session not found' }, 404);
}

if (isOpenAICompatibleProvider()) {
return c.json(
{
error: 'This operation is not supported when using the OpenAI-compatible provider.',
code: 'NOT_SUPPORTED_ON_PROVIDER',
},
501,
);
}

const activeSession = streamingSessionManager.get(sessionId);
if (!activeSession) {
return c.json({ error: 'Session not active in memory' }, 404);
Expand Down Expand Up @@ -565,6 +691,16 @@ aiRoutes.post(
return c.json({ error: 'Session not found' }, 404);
}

if (isOpenAICompatibleProvider()) {
return c.json(
{
error: 'This operation is not supported when using the OpenAI-compatible provider.',
code: 'NOT_SUPPORTED_ON_PROVIDER',
},
501,
);
}

const activeSession = streamingSessionManager.get(sessionId);
if (!activeSession) {
return c.json({ error: 'Session not active in memory' }, 404);
Expand Down Expand Up @@ -628,6 +764,16 @@ aiRoutes.post(
return c.json({ error: 'Session not found' }, 404);
}

if (isOpenAICompatibleProvider()) {
return c.json(
{
error: 'This operation is not supported when using the OpenAI-compatible provider.',
code: 'NOT_SUPPORTED_ON_PROVIDER',
},
501,
);
}

const activeSession = streamingSessionManager.get(sessionId);
if (!activeSession) {
return c.json({ error: 'Session not active in memory' }, 404);
Expand Down
74 changes: 74 additions & 0 deletions apps/api/src/services/aiCostTracker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,80 @@ export async function recordUsageFromSdkResult(
await deductBillingCredits(orgId, costCents);
}

/**
* Record usage for a single openai-compatible turn.
* Cost is calculated from declared per-token pricing (best-effort).
* No prompt caching equivalent exists on vLLM; the full context is re-sent each turn.
*/
export async function recordOpenAIUsage(
sessionId: string,
orgId: string,
inputTokens: number,
outputTokens: number,
costUsd: number,
): Promise<void> {
if (!orgId) {
console.warn(`[AI] Skipping recordOpenAIUsage — empty orgId for session=${sessionId}`);
return;
}
const costCents = Math.round(costUsd * 100 * 100) / 100;
const now = new Date();
const dailyKey = `${now.getUTCFullYear()}-${String(now.getUTCMonth() + 1).padStart(2, '0')}-${String(now.getUTCDate()).padStart(2, '0')}`;
const monthlyKey = `${now.getUTCFullYear()}-${String(now.getUTCMonth() + 1).padStart(2, '0')}`;

try {
await db
.update(aiSessions)
.set({
totalInputTokens: sql`${aiSessions.totalInputTokens} + ${inputTokens}`,
totalOutputTokens: sql`${aiSessions.totalOutputTokens} + ${outputTokens}`,
totalCostCents: sql`${aiSessions.totalCostCents} + ${costCents}`,
lastActivityAt: now,
updatedAt: now,
})
.where(eq(aiSessions.id, sessionId));
} catch (err) {
console.error(`[AI] Failed to update session totals (OpenAI) for session=${sessionId}:`, err);
throw err;
}

for (const [period, periodKey] of [['daily', dailyKey], ['monthly', monthlyKey]] as const) {
try {
await db
.insert(aiCostUsage)
.values({
orgId,
period,
periodKey,
inputTokens,
outputTokens,
totalCostCents: costCents,
sessionCount: 0,
messageCount: 1,
toolExecutionCount: 0,
})
.onConflictDoUpdate({
target: [aiCostUsage.orgId, aiCostUsage.period, aiCostUsage.periodKey],
set: {
inputTokens: sql`${aiCostUsage.inputTokens} + ${inputTokens}`,
outputTokens: sql`${aiCostUsage.outputTokens} + ${outputTokens}`,
totalCostCents: sql`${aiCostUsage.totalCostCents} + ${costCents}`,
messageCount: sql`${aiCostUsage.messageCount} + 1`,
updatedAt: now,
},
});
} catch (err) {
console.error(`[AI] Failed to update ${period} aggregate (OpenAI) for org=${orgId}:`, err);
}
}

checkCostAnomalies(sessionId, orgId, costCents, dailyKey).catch(err => {
console.error('[AI] Cost anomaly check failed (OpenAI):', err);
});

await deductBillingCredits(orgId, costCents);
}

/**
* Get the remaining monthly budget for an org in USD.
* Returns null if no budget is configured (unlimited).
Expand Down
Loading
Loading