diff --git a/packages/cognitive/package.json b/packages/cognitive/package.json index 7c15e527acc..f317370db83 100644 --- a/packages/cognitive/package.json +++ b/packages/cognitive/package.json @@ -1,6 +1,6 @@ { "name": "@botpress/cognitive", - "version": "0.5.3", + "version": "0.5.4", "description": "Wrapper around the Botpress Client to call LLMs", "main": "./dist/index.cjs", "module": "./dist/index.mjs", diff --git a/packages/cognitive/refresh-models.ts b/packages/cognitive/refresh-models.ts index 85496c82371..3ac0a4d0718 100644 --- a/packages/cognitive/refresh-models.ts +++ b/packages/cognitive/refresh-models.ts @@ -51,8 +51,12 @@ export const defaultModel: Model = ${JSON.stringify(defaultModel, undefined, 2)} fs.writeFileSync(modelsListPath, newFile, 'utf8') - const collectRefs = (list: Model[]) => - Array.from(new Set(list.map(toRef).filter(Boolean))).sort((a, b) => a.localeCompare(b)) + const collectRefs = (list: Model[]) => { + const refs = list.map(toRef).filter((x) => x !== null) + const uniqueRefs = Array.from(new Set(refs)) + return uniqueRefs.sort((a, b) => a.localeCompare(b)) + } + const collectAliases = (list: Model[]) => Array.from(new Set(list.flatMap((m) => (m.aliases || []).map((a) => `${m.id.split(':')[0]}:${a}`)))) diff --git a/packages/cognitive/src/client.ts b/packages/cognitive/src/client.ts index 590f8b01606..69c5de04439 100644 --- a/packages/cognitive/src/client.ts +++ b/packages/cognitive/src/client.ts @@ -18,7 +18,7 @@ import { RemoteModelProvider, } from './models' import { GenerateContentOutput } from './schemas.gen' -import { CognitiveProps, Events, InputProps, Request, Response } from './types' +import { CognitiveProps, Events, InputModel, InputProps, Request, Response } from './types' export class Cognitive { public ['$$IS_COGNITIVE'] = true @@ -136,6 +136,10 @@ export class Cognitive { }) } + private _getPrimaryModel(input: InputProps): InputModel | undefined { + return Array.isArray(input.model) ? input.model[0] : input.model + } + private async _selectModel(ref: string): Promise<{ integration: string; model: string }> { const parseRef = (ref: string) => { const parts = ref.split(':') @@ -150,7 +154,7 @@ export class Cognitive { const downtimes = [...preferences.downtimes, ...(this._downtimes ?? [])] - if (ref === 'best') { + if (ref === 'best' || ref === 'auto') { return parseRef(pickModel(preferences.best, downtimes)) } @@ -229,7 +233,9 @@ export class Cognitive { } public async generateContent(input: InputProps): Promise { - if (!this._useBeta || !isKnownV2Model(input.model)) { + const primaryInputModel = this._getPrimaryModel(input) + + if (!this._useBeta || !isKnownV2Model(primaryInputModel)) { return this._generateContent(input) } @@ -325,6 +331,8 @@ export class Cognitive { const client = this._client.abortable(signal) + const primaryInputModel = this._getPrimaryModel(input) + let props: Request = { input } let integration: string let model: string @@ -336,7 +344,7 @@ export class Cognitive { meta: any }>( async () => { - const selection = await this._selectModel(input.model ?? 'best') + const selection = await this._selectModel(primaryInputModel ?? 'best') integration = selection.integration model = selection.model diff --git a/packages/cognitive/src/types.ts b/packages/cognitive/src/types.ts index 25747bea108..3b542b4eb59 100644 --- a/packages/cognitive/src/types.ts +++ b/packages/cognitive/src/types.ts @@ -20,8 +20,25 @@ export type GenerationMetadata = { } } +/** + * Model selector accepted by `generateContent`. + * + * - `'best'` / `'auto'`: aliases. `'best'` is the original SDK selector; + * `'auto'` was added when cognitive-v2 landed (the v2 server uses that + * name). Both pick the first available entry from `preferences.best` on + * the legacy path, and are forwarded as-is on the v2 path. + * - `'fast'`: same shape — first available from `preferences.fast` on the + * legacy path, forwarded on the v2 path. + * - `ModelRef`: any `provider:model` string. + */ +export type InputModel = 'auto' | 'best' | 'fast' | ModelRef + export type InputProps = Omit & { - model?: 'best' | 'fast' | ModelRef + /** + * Model to use, or an ordered list of fallback models. Ordered fallback is honored only on the cognitive-v2 path; + * the legacy integration path uses the first entry and falls back to server-side preferences instead. + */ + model?: InputModel | InputModel[] signal?: AbortSignal } diff --git a/packages/cognitive/tsconfig.json b/packages/cognitive/tsconfig.json index a1e4ba204cc..e489f598077 100644 --- a/packages/cognitive/tsconfig.json +++ b/packages/cognitive/tsconfig.json @@ -1,7 +1,5 @@ { "extends": "../../tsconfig.json", - "compilerOptions": { - "strict": false - }, + "compilerOptions": {}, "include": ["src/**/*", "e2e/**/*", "*.ts"] } diff --git a/packages/llmz/package.json b/packages/llmz/package.json index 8daec3f60c6..3f88d38fd11 100644 --- a/packages/llmz/package.json +++ b/packages/llmz/package.json @@ -72,7 +72,7 @@ }, "peerDependencies": { "@botpress/client": "1.45.0", - "@botpress/cognitive": "0.5.3", + "@botpress/cognitive": "0.5.4", "@bpinternal/thicktoken": "^2.0.0", "@bpinternal/zui": "^2.1.1" }, diff --git a/packages/llmz/src/context.ts b/packages/llmz/src/context.ts index ebe65859e3d..f8cf7ea8ea6 100644 --- a/packages/llmz/src/context.ts +++ b/packages/llmz/src/context.ts @@ -913,8 +913,17 @@ export class Context implements Serializable { throw new Error('Invalid temperature. Expected a number between 0 and 2.') } - if (typeof model !== 'string' || model.length === 0) { - throw new Error('Invalid model. Expected a non-empty string.') + const isValidModel = (m: unknown): m is string => + typeof m === 'string' && (m === 'best' || m === 'fast' || m === 'auto' || m.includes(':')) + + if (Array.isArray(model)) { + if (model.length === 0 || !model.every(isValidModel)) { + throw new Error( + "Invalid model. Expected a non-empty array of model strings ('best'/'fast'/'auto' or 'provider:model')." + ) + } + } else if (!isValidModel(model)) { + throw new Error("Invalid model. Expected 'best'/'fast'/'auto' or 'provider:model'.") } return { diff --git a/packages/llmz/src/llmz.ts b/packages/llmz/src/llmz.ts index b8854bc50fb..032fd2b7e92 100644 --- a/packages/llmz/src/llmz.ts +++ b/packages/llmz/src/llmz.ts @@ -431,6 +431,9 @@ const executeIteration = async ({ let startedAt = Date.now() const traces = iteration.traces + // When an array is provided, additional entries act as ordered fallbacks at the + // cognitive layer. Token budget and stop-token computation only need the primary + // model's details, so we resolve those off the first entry. const modelRef = Array.isArray(iteration.model) ? iteration.model[0]! : iteration.model const model = await cognitive.getModelDetails(modelRef).catch((thrown) => { throw new CognitiveError(`Failed to fetch model details for model "${modelRef}": ${getErrorMessage(thrown)}`) @@ -461,7 +464,9 @@ const executeIteration = async ({ .generateContent({ signal: controller.signal, systemPrompt: messages.find((x) => x.role === 'system')?.content, - model: model.ref, + // Validated upstream in Context (see context.ts isValidModel). Cast narrows + // Models' `({} & string)` escape hatch down to cognitive's stricter InputModel. + model: iteration.model as Required[0]>['model'], temperature: iteration.temperature, responseFormat: 'text', reasoningEffort: iteration.reasoningEffort, diff --git a/packages/zai/package.json b/packages/zai/package.json index 60e4d28034c..833db27e6f7 100644 --- a/packages/zai/package.json +++ b/packages/zai/package.json @@ -1,7 +1,7 @@ { "name": "@botpress/zai", "description": "Zui AI (zai) – An LLM utility library written on top of Zui and the Botpress API", - "version": "2.6.19", + "version": "2.6.20", "main": "./dist/index.js", "types": "./dist/index.d.ts", "exports": { @@ -32,7 +32,7 @@ "author": "", "license": "ISC", "dependencies": { - "@botpress/cognitive": "0.5.3", + "@botpress/cognitive": "0.5.4", "json5": "^2.2.3", "jsonrepair": "^3.10.0", "lodash-es": "^4.17.21", diff --git a/packages/zai/src/context.ts b/packages/zai/src/context.ts index 3a600481de7..777d189e514 100644 --- a/packages/zai/src/context.ts +++ b/packages/zai/src/context.ts @@ -15,7 +15,7 @@ export type ZaiContextProps = { client: Cognitive taskType: string taskId: string - modelId: string + modelId: GenerateContentInput['model'] adapter?: Adapter source?: GenerateContentInput['meta'] memoizer?: Memoizer @@ -141,7 +141,10 @@ export class ZaiContext { } public async getModel(): Promise { - return this._client.getModelDetails(this.modelId) + // getModelDetails resolves a single model; for a fallback array we report + // the primary entry's details. Fallbacks are honored at the request layer. + const primary = Array.isArray(this.modelId) ? this.modelId[0] : this.modelId + return this._client.getModelDetails(primary) } public on(type: K, listener: (event: ContextEvents[K]) => void) { diff --git a/packages/zai/src/zai.ts b/packages/zai/src/zai.ts index 77cec628e26..65b655fe37a 100644 --- a/packages/zai/src/zai.ts +++ b/packages/zai/src/zai.ts @@ -91,8 +91,13 @@ export type ZaiConfig = { client: BotpressClientLike | Cognitive /** Optional user ID for tracking and attribution */ userId?: string - /** Model to use: 'best' (default), 'fast', or specific model like 'openai:gpt-4' */ - modelId?: Models + /** + * Model to use: 'best' (default), 'fast', or specific model like 'openai:gpt-4'. + * An array can be provided to specify ordered fallback models if the primary + * model is unavailable. Server-side fallback is honored on the cognitive-v2 + * path; the legacy integration path uses the first entry only. + */ + modelId?: Models | Models[] /** Active learning configuration to improve operations over time */ activeLearning?: ActiveLearning /** Namespace for organizing tasks (default: 'zai') */ @@ -113,23 +118,22 @@ const _ZaiConfig = z.object({ client: z.custom(), userId: z.string().describe('The ID of the user consuming the API').optional(), modelId: z - .custom( + .custom( (value) => { - if (typeof value !== 'string') { - return false - } + const isValidSingle = (v: unknown): v is string => + typeof v === 'string' && (v === 'best' || v === 'fast' || v === 'auto' || v.includes(':')) - if (value !== 'best' && value !== 'fast' && !value.includes(':')) { - return false + if (Array.isArray(value)) { + return value.length > 0 && value.every(isValidSingle) } - return true + return isValidSingle(value) }, { - message: 'Invalid model ID', + message: 'At least one model ID is invalid. Expected a model string or an array of model strings.', } ) - .describe('The ID of the model you want to use') + .describe('The ID of the model you want to use, or an ordered list of fallback models') .default('best' satisfies Models), activeLearning: _ActiveLearning.default({ enable: false }), namespace: z @@ -211,7 +215,7 @@ export class Zai { private _userId: string | undefined - protected Model: Models + protected Model: Models | Models[] protected ModelDetails: Model protected namespace: string protected adapter: Adapter @@ -249,7 +253,7 @@ export class Zai { this.namespace = parsed.namespace this._userId = parsed.userId - this.Model = parsed.modelId as Models + this.Model = parsed.modelId as Models | Models[] this.activeLearning = parsed.activeLearning as ActiveLearning this.adapter = parsed.activeLearning?.enable @@ -295,7 +299,11 @@ export class Zai { protected async fetchModelDetails(): Promise { if (!this.ModelDetails) { - this.ModelDetails = await this.client.getModelDetails(this.Model) + // getModelDetails resolves a single model. When a fallback array is + // configured, we describe the primary model — fallbacks are only relevant + // at the request layer. + const primaryModel = Array.isArray(this.Model) ? this.Model[0] : this.Model + this.ModelDetails = await this.client.getModelDetails(primaryModel) } } diff --git a/plugins/conversation-insights/package.json b/plugins/conversation-insights/package.json index 37083ac8330..d48070ef299 100644 --- a/plugins/conversation-insights/package.json +++ b/plugins/conversation-insights/package.json @@ -7,7 +7,7 @@ }, "private": true, "dependencies": { - "@botpress/cognitive": "0.5.3", + "@botpress/cognitive": "0.5.4", "@botpress/sdk": "workspace:*", "browser-or-node": "^2.1.1", "jsonrepair": "^3.10.0" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c555841b02e..d3df6258cd0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -3028,7 +3028,7 @@ importers: specifier: 1.45.0 version: link:../client '@botpress/cognitive': - specifier: 0.5.3 + specifier: 0.5.4 version: link:../cognitive '@bpinternal/thicktoken': specifier: ^2.0.0 @@ -3214,7 +3214,7 @@ importers: packages/zai: dependencies: '@botpress/cognitive': - specifier: 0.5.3 + specifier: 0.5.4 version: link:../cognitive '@bpinternal/thicktoken': specifier: ^1.0.0 @@ -3327,7 +3327,7 @@ importers: plugins/conversation-insights: dependencies: '@botpress/cognitive': - specifier: 0.5.3 + specifier: 0.5.4 version: link:../../packages/cognitive '@botpress/sdk': specifier: workspace:*