code · pull · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/packages/cognitive/package.json b/packages/cognitive/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@botpress/cognitive",
-  "version": "0.5.3",
+  "version": "0.5.4",
   "description": "Wrapper around the Botpress Client to call LLMs",
   "main": "./dist/index.cjs",
   "module": "./dist/index.mjs",

diff --git a/packages/cognitive/refresh-models.ts b/packages/cognitive/refresh-models.ts
@@ -51,8 +51,12 @@ export const defaultModel: Model = ${JSON.stringify(defaultModel, undefined, 2)}
 
   fs.writeFileSync(modelsListPath, newFile, 'utf8')
 
-  const collectRefs = (list: Model[]) =>
-    Array.from(new Set(list.map(toRef).filter(Boolean))).sort((a, b) => a.localeCompare(b))
+  const collectRefs = (list: Model[]) => {
+    const refs = list.map(toRef).filter((x) => x !== null)
+    const uniqueRefs = Array.from(new Set(refs))
+    return uniqueRefs.sort((a, b) => a.localeCompare(b))
+  }
+
   const collectAliases = (list: Model[]) =>
     Array.from(new Set(list.flatMap((m) => (m.aliases || []).map((a) => `${m.id.split(':')[0]}:${a}`))))
 

diff --git a/packages/cognitive/src/client.ts b/packages/cognitive/src/client.ts
@@ -18,7 +18,7 @@ import {
   RemoteModelProvider,
 } from './models'
 import { GenerateContentOutput } from './schemas.gen'
-import { CognitiveProps, Events, InputProps, Request, Response } from './types'
+import { CognitiveProps, Events, InputModel, InputProps, Request, Response } from './types'
 
 export class Cognitive {
   public ['$$IS_COGNITIVE'] = true
@@ -136,6 +136,10 @@ export class Cognitive {
     })
   }
 
+  private _getPrimaryModel(input: InputProps): InputModel | undefined {
+    return Array.isArray(input.model) ? input.model[0] : input.model
+  }
+
   private async _selectModel(ref: string): Promise<{ integration: string; model: string }> {
     const parseRef = (ref: string) => {
       const parts = ref.split(':')
@@ -150,7 +154,7 @@ export class Cognitive {
 
     const downtimes = [...preferences.downtimes, ...(this._downtimes ?? [])]
 
-    if (ref === 'best') {
+    if (ref === 'best' || ref === 'auto') {
       return parseRef(pickModel(preferences.best, downtimes))
     }
 
@@ -229,7 +233,9 @@ export class Cognitive {
   }
 
   public async generateContent(input: InputProps): Promise<Response> {
-    if (!this._useBeta || !isKnownV2Model(input.model)) {
+    const primaryInputModel = this._getPrimaryModel(input)
+
+    if (!this._useBeta || !isKnownV2Model(primaryInputModel)) {
       return this._generateContent(input)
     }
 
@@ -325,6 +331,8 @@ export class Cognitive {
 
     const client = this._client.abortable(signal)
 
+    const primaryInputModel = this._getPrimaryModel(input)
+
     let props: Request = { input }
     let integration: string
     let model: string
@@ -336,7 +344,7 @@ export class Cognitive {
       meta: any
     }>(
       async () => {
-        const selection = await this._selectModel(input.model ?? 'best')
+        const selection = await this._selectModel(primaryInputModel ?? 'best')
 
         integration = selection.integration
         model = selection.model

diff --git a/packages/cognitive/src/types.ts b/packages/cognitive/src/types.ts
@@ -20,8 +20,25 @@ export type GenerationMetadata = {
   }
 }
 
+/**
+ * Model selector accepted by `generateContent`.
+ *
+ * - `'best'` / `'auto'`: aliases. `'best'` is the original SDK selector;
+ *   `'auto'` was added when cognitive-v2 landed (the v2 server uses that
+ *   name). Both pick the first available entry from `preferences.best` on
+ *   the legacy path, and are forwarded as-is on the v2 path.
+ * - `'fast'`: same shape — first available from `preferences.fast` on the
+ *   legacy path, forwarded on the v2 path.
+ * - `ModelRef`: any `provider:model` string.
+ */
+export type InputModel = 'auto' | 'best' | 'fast' | ModelRef
+
 export type InputProps = Omit<GenerateContentInput, 'model'> & {
-  model?: 'best' | 'fast' | ModelRef
+  /**
+   * Model to use, or an ordered list of fallback models. Ordered fallback is honored only on the cognitive-v2 path;
+   * the legacy integration path uses the first entry and falls back to server-side preferences instead.
+   */
+  model?: InputModel | InputModel[]
   signal?: AbortSignal
 }
 

diff --git a/packages/cognitive/tsconfig.json b/packages/cognitive/tsconfig.json
@@ -1,7 +1,5 @@
 {
   "extends": "../../tsconfig.json",
-  "compilerOptions": {
-    "strict": false
-  },
+  "compilerOptions": {},
   "include": ["src/**/*", "e2e/**/*", "*.ts"]
 }
diff --git a/packages/llmz/package.json b/packages/llmz/package.json
@@ -72,7 +72,7 @@
   },
   "peerDependencies": {
     "@botpress/client": "1.45.0",
-    "@botpress/cognitive": "0.5.3",
+    "@botpress/cognitive": "0.5.4",
     "@bpinternal/thicktoken": "^2.0.0",
     "@bpinternal/zui": "^2.1.1"
   },

diff --git a/packages/llmz/src/context.ts b/packages/llmz/src/context.ts
@@ -913,8 +913,17 @@ export class Context implements Serializable<Context.JSON> {
       throw new Error('Invalid temperature. Expected a number between 0 and 2.')
     }
 
-    if (typeof model !== 'string' || model.length === 0) {
-      throw new Error('Invalid model. Expected a non-empty string.')
+    const isValidModel = (m: unknown): m is string =>
+      typeof m === 'string' && (m === 'best' || m === 'fast' || m === 'auto' || m.includes(':'))
+
+    if (Array.isArray(model)) {
+      if (model.length === 0 || !model.every(isValidModel)) {
+        throw new Error(
+          "Invalid model. Expected a non-empty array of model strings ('best'/'fast'/'auto' or 'provider:model')."
+        )
+      }
+    } else if (!isValidModel(model)) {
+      throw new Error("Invalid model. Expected 'best'/'fast'/'auto' or 'provider:model'.")
     }
 
     return {

diff --git a/packages/llmz/src/llmz.ts b/packages/llmz/src/llmz.ts
@@ -431,6 +431,9 @@ const executeIteration = async ({
   let startedAt = Date.now()
   const traces = iteration.traces
 
+  // When an array is provided, additional entries act as ordered fallbacks at the
+  // cognitive layer. Token budget and stop-token computation only need the primary
+  // model's details, so we resolve those off the first entry.
   const modelRef = Array.isArray(iteration.model) ? iteration.model[0]! : iteration.model
   const model = await cognitive.getModelDetails(modelRef).catch((thrown) => {
     throw new CognitiveError(`Failed to fetch model details for model "${modelRef}": ${getErrorMessage(thrown)}`)
@@ -461,7 +464,9 @@ const executeIteration = async ({
     .generateContent({
       signal: controller.signal,
       systemPrompt: messages.find((x) => x.role === 'system')?.content,
-      model: model.ref,
+      // Validated upstream in Context (see context.ts isValidModel). Cast narrows
+      // Models' `({} & string)` escape hatch down to cognitive's stricter InputModel.
+      model: iteration.model as Required<Parameters<Cognitive['generateContent']>[0]>['model'],
       temperature: iteration.temperature,
       responseFormat: 'text',
       reasoningEffort: iteration.reasoningEffort,

diff --git a/packages/zai/package.json b/packages/zai/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@botpress/zai",
   "description": "Zui AI (zai) – An LLM utility library written on top of Zui and the Botpress API",
-  "version": "2.6.19",
+  "version": "2.6.20",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
   "exports": {
@@ -32,7 +32,7 @@
   "author": "",
   "license": "ISC",
   "dependencies": {
-    "@botpress/cognitive": "0.5.3",
+    "@botpress/cognitive": "0.5.4",
     "json5": "^2.2.3",
     "jsonrepair": "^3.10.0",
     "lodash-es": "^4.17.21",

diff --git a/packages/zai/src/context.ts b/packages/zai/src/context.ts
@@ -15,7 +15,7 @@ export type ZaiContextProps = {
   client: Cognitive
   taskType: string
   taskId: string
-  modelId: string
+  modelId: GenerateContentInput['model']
   adapter?: Adapter
   source?: GenerateContentInput['meta']
   memoizer?: Memoizer
@@ -141,7 +141,10 @@ export class ZaiContext {
   }
 
   public async getModel(): Promise<Model> {
-    return this._client.getModelDetails(this.modelId)
+    // getModelDetails resolves a single model; for a fallback array we report
+    // the primary entry's details. Fallbacks are honored at the request layer.
+    const primary = Array.isArray(this.modelId) ? this.modelId[0] : this.modelId
+    return this._client.getModelDetails(primary)
   }
 
   public on<K extends keyof ContextEvents>(type: K, listener: (event: ContextEvents[K]) => void) {

diff --git a/packages/zai/src/zai.ts b/packages/zai/src/zai.ts
@@ -91,8 +91,13 @@ export type ZaiConfig = {
   client: BotpressClientLike | Cognitive
   /** Optional user ID for tracking and attribution */
   userId?: string
-  /** Model to use: 'best' (default), 'fast', or specific model like 'openai:gpt-4' */
-  modelId?: Models
+  /**
+   * Model to use: 'best' (default), 'fast', or specific model like 'openai:gpt-4'.
+   * An array can be provided to specify ordered fallback models if the primary
+   * model is unavailable. Server-side fallback is honored on the cognitive-v2
+   * path; the legacy integration path uses the first entry only.
+   */
+  modelId?: Models | Models[]
   /** Active learning configuration to improve operations over time */
   activeLearning?: ActiveLearning
   /** Namespace for organizing tasks (default: 'zai') */
@@ -113,23 +118,22 @@ const _ZaiConfig = z.object({
   client: z.custom<BotpressClientLike | Cognitive>(),
   userId: z.string().describe('The ID of the user consuming the API').optional(),
   modelId: z
-    .custom<Models>(
+    .custom<Models | Models[]>(
       (value) => {
-        if (typeof value !== 'string') {
-          return false
-        }
+        const isValidSingle = (v: unknown): v is string =>
+          typeof v === 'string' && (v === 'best' || v === 'fast' || v === 'auto' || v.includes(':'))
 
-        if (value !== 'best' && value !== 'fast' && !value.includes(':')) {
-          return false
+        if (Array.isArray(value)) {
+          return value.length > 0 && value.every(isValidSingle)
         }
 
-        return true
+        return isValidSingle(value)
       },
       {
-        message: 'Invalid model ID',
+        message: 'At least one model ID is invalid. Expected a model string or an array of model strings.',
       }
     )
-    .describe('The ID of the model you want to use')
+    .describe('The ID of the model you want to use, or an ordered list of fallback models')
     .default('best' satisfies Models),
   activeLearning: _ActiveLearning.default({ enable: false }),
   namespace: z
@@ -211,7 +215,7 @@ export class Zai {
 
   private _userId: string | undefined
 
-  protected Model: Models
+  protected Model: Models | Models[]
   protected ModelDetails: Model
   protected namespace: string
   protected adapter: Adapter
@@ -249,7 +253,7 @@ export class Zai {
 
     this.namespace = parsed.namespace
     this._userId = parsed.userId
-    this.Model = parsed.modelId as Models
+    this.Model = parsed.modelId as Models | Models[]
     this.activeLearning = parsed.activeLearning as ActiveLearning
 
     this.adapter = parsed.activeLearning?.enable
@@ -295,7 +299,11 @@ export class Zai {
 
   protected async fetchModelDetails(): Promise<void> {
     if (!this.ModelDetails) {
-      this.ModelDetails = await this.client.getModelDetails(this.Model)
+      // getModelDetails resolves a single model. When a fallback array is
+      // configured, we describe the primary model — fallbacks are only relevant
+      // at the request layer.
+      const primaryModel = Array.isArray(this.Model) ? this.Model[0] : this.Model
+      this.ModelDetails = await this.client.getModelDetails(primaryModel)
     }
   }
 

diff --git a/plugins/conversation-insights/package.json b/plugins/conversation-insights/package.json
@@ -7,7 +7,7 @@
   },
   "private": true,
   "dependencies": {
-    "@botpress/cognitive": "0.5.3",
+    "@botpress/cognitive": "0.5.4",
     "@botpress/sdk": "workspace:*",
     "browser-or-node": "^2.1.1",
     "jsonrepair": "^3.10.0"

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml