code-yeongyu · youming-ai · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/docs/configurations.md b/docs/configurations.md
@@ -163,19 +163,20 @@ Override built-in agent settings:
 }
 ```
 
-Each agent supports: `model`, `temperature`, `top_p`, `prompt`, `prompt_append`, `tools`, `disable`, `description`, `mode`, `color`, `permission`, `category`, `variant`, `maxTokens`, `thinking`, `reasoningEffort`, `textVerbosity`, `providerOptions`.
+Each agent supports: `model`, `fallback_models`, `temperature`, `top_p`, `prompt`, `prompt_append`, `tools`, `disable`, `description`, `mode`, `color`, `permission`, `category`, `variant`, `maxTokens`, `thinking`, `reasoningEffort`, `textVerbosity`, `providerOptions`.
 
 ### Additional Agent Options
 
-| Option              | Type    | Description                                                                                     |
-| ------------------- | ------- | ----------------------------------------------------------------------------------------------- |
-| `category`          | string  | Category name to inherit model and other settings from category defaults                             |
-| `variant`           | string  | Model variant (e.g., `max`, `high`, `medium`, `low`, `xhigh`)                                 |
-| `maxTokens`         | number  | Maximum tokens for response. Passed directly to OpenCode SDK.                                      |
-| `thinking`          | object  | Extended thinking configuration for Anthropic models. See [Thinking Options](#thinking-options) below. |
-| `reasoningEffort`   | string  | OpenAI reasoning effort level. Values: `low`, `medium`, `high`, `xhigh`.                         |
-| `textVerbosity`      | string  | Text verbosity level. Values: `low`, `medium`, `high`.                                        |
-| `providerOptions`    | object  | Provider-specific options passed directly to OpenCode SDK.                                      |
+| Option              | Type           | Description                                                                                     |
+| ------------------- | -------------- | ----------------------------------------------------------------------------------------------- |
+| `fallback_models`   | string/array   | Fallback models for runtime switching on API errors. Single string or array of model strings.  |
+| `category`          | string         | Category name to inherit model and other settings from category defaults                        |
+| `variant`           | string         | Model variant (e.g., `max`, `high`, `medium`, `low`, `xhigh`)                                   |
+| `maxTokens`         | number         | Maximum tokens for response. Passed directly to OpenCode SDK.                                   |
+| `thinking`          | object         | Extended thinking configuration for Anthropic models. See [Thinking Options](#thinking-options) below. |
+| `reasoningEffort`   | string         | OpenAI reasoning effort level. Values: `low`, `medium`, `high`, `xhigh`.                        |
+| `textVerbosity`     | string         | Text verbosity level. Values: `low`, `medium`, `high`.                                          |
+| `providerOptions`   | object         | Provider-specific options passed directly to OpenCode SDK.                                      |
 
 #### Thinking Options (Anthropic)
 
@@ -714,6 +715,63 @@ Configure concurrency limits for background agent tasks. This controls how many
 - Allow more concurrent tasks for fast/cheap models (e.g., Gemini Flash)
 - Respect provider rate limits by setting provider-level caps
 
+## Runtime Fallback
+
+Automatically switch to backup models when the primary model encounters transient API errors (rate limits, overload, etc.). This keeps conversations running without manual intervention.
+
+```json
+{
+  "runtime_fallback": {
+    "enabled": true,
+    "retry_on_errors": [429, 503, 529],
+    "max_fallback_attempts": 3,
+    "cooldown_seconds": 60,
+    "notify_on_fallback": true
+  }
+}
+```
+
+| Option                  | Default           | Description                                                                 |
+| ----------------------- | ----------------- | --------------------------------------------------------------------------- |
+| `enabled`               | `true`            | Enable runtime fallback                                                     |
+| `retry_on_errors`       | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable)   |
+| `max_fallback_attempts` | `3`               | Maximum fallback attempts per session (1-10)                                |
+| `cooldown_seconds`      | `60`              | Cooldown in seconds before retrying a failed model                          |
+| `notify_on_fallback`    | `true`            | Show toast notification when switching to a fallback model                  |
+
+### How It Works
+
+1. When an API error matching `retry_on_errors` occurs, the hook intercepts it
+2. The next request automatically uses the next available model from `fallback_models`
+3. Failed models enter a cooldown period before being retried
+4. Toast notification (optional) informs you of the model switch
+
+### Configuring Fallback Models
+
+Define `fallback_models` at the agent or category level:
+
+```json
+{
+  "agents": {
+    "sisyphus": {
+      "model": "anthropic/claude-opus-4-5",
+      "fallback_models": ["openai/gpt-5.2", "google/gemini-3-pro"]
+    }
+  },
+  "categories": {
+    "ultrabrain": {
+      "model": "openai/gpt-5.2-codex",
+      "fallback_models": ["anthropic/claude-opus-4-5", "google/gemini-3-pro"]
+    }
+  }
+}
+```
+
+When the primary model fails:
+1. First fallback: `openai/gpt-5.2`
+2. Second fallback: `google/gemini-3-pro`
+3. After `max_fallback_attempts`, returns to primary model
+
 ## Categories
 
 Categories enable domain-specific task delegation via the `delegate_task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
@@ -825,14 +883,15 @@ Add your own categories or override built-in ones:
 }
 ```
 
-Each category supports: `model`, `temperature`, `top_p`, `maxTokens`, `thinking`, `reasoningEffort`, `textVerbosity`, `tools`, `prompt_append`, `variant`, `description`, `is_unstable_agent`.
+Each category supports: `model`, `fallback_models`, `temperature`, `top_p`, `maxTokens`, `thinking`, `reasoningEffort`, `textVerbosity`, `tools`, `prompt_append`, `variant`, `description`, `is_unstable_agent`.
 
 ### Additional Category Options
 
-| Option             | Type    | Default | Description                                                                                         |
-| ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- |
-| `description`       | string  | -       | Human-readable description of the category's purpose. Shown in delegate_task prompt.                     |
-| `is_unstable_agent`| boolean | `false`  | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. |
+| Option              | Type         | Default | Description                                                                                         |
+| ------------------- | ------------ | ------- | --------------------------------------------------------------------------------------------------- |
+| `fallback_models`   | string/array | -       | Fallback models for runtime switching on API errors. Single string or array of model strings.      |
+| `description`       | string       | -       | Human-readable description of the category's purpose. Shown in delegate_task prompt.                |
+| `is_unstable_agent` | boolean      | `false` | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models.    |
 
 ## Model Resolution System
 
@@ -967,7 +1026,7 @@ Disable specific built-in hooks via `disabled_hooks` in `~/.config/opencode/oh-m
 }
 ```
 
-Available hooks: `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`, `auto-slash-command`, `sisyphus-junior-notepad`, `start-work`
+Available hooks: `todo-continuation-enforcer`, `context-window-monitor`, `session-recovery`, `session-notification`, `comment-checker`, `grep-output-truncator`, `tool-output-truncator`, `directory-agents-injector`, `directory-readme-injector`, `empty-task-response-detector`, `think-mode`, `anthropic-context-window-limit-recovery`, `rules-injector`, `background-notification`, `auto-update-checker`, `startup-toast`, `keyword-detector`, `agent-usage-reminder`, `non-interactive-env`, `interactive-bash-session`, `compaction-context-injector`, `thinking-block-validator`, `claude-code-hooks`, `ralph-loop`, `preemptive-compaction`, `auto-slash-command`, `sisyphus-junior-notepad`, `start-work`, `runtime-fallback`
 
 **Note on `directory-agents-injector`**: This hook is **automatically disabled** when running on OpenCode 1.1.37+ because OpenCode now has native support for dynamically resolving AGENTS.md files from subdirectories (PR #10678). This prevents duplicate AGENTS.md injection. For older OpenCode versions, the hook remains active to provide the same functionality.
 

diff --git a/docs/features.md b/docs/features.md
@@ -352,6 +352,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle.
 | **session-recovery** | Stop | Recovers from session errors - missing tool results, thinking block issues, empty messages. |
 | **anthropic-context-window-limit-recovery** | Stop | Handles Claude context window limits gracefully. |
 | **background-compaction** | Stop | Auto-compacts sessions hitting token limits. |
+| **runtime-fallback** | Stop | Automatically switches to fallback models on API errors (429, 503, 529). Configurable via `runtime_fallback` and `fallback_models`. |
 
 #### Truncation & Context Management
 

diff --git a/src/agents/types.ts b/src/agents/types.ts
@@ -90,6 +90,7 @@ export type AgentName = BuiltinAgentName
 export type AgentOverrideConfig = Partial<AgentConfig> & {
   prompt_append?: string
   variant?: string
+  fallback_models?: string | string[]
 }
 
 export type AgentOverrides = Partial<Record<OverridableAgentName, AgentOverrideConfig>>
diff --git a/src/agents/utils.test.ts b/src/agents/utils.test.ts
@@ -49,7 +49,7 @@ describe("createBuiltinAgents with model overrides", () => {
     expect(agents.sisyphus.thinking).toBeUndefined()
   })
 
-  test("Atlas uses uiSelectedModel when provided", async () => {
+  test("Atlas does not use uiSelectedModel (respects its own fallback chain)", async () => {
     // #given
     const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
       new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
@@ -73,7 +73,7 @@ describe("createBuiltinAgents with model overrides", () => {
 
       // #then
       expect(agents.atlas).toBeDefined()
-      expect(agents.atlas.model).toBe("openai/gpt-5.2")
+      expect(agents.atlas.model).toBe("anthropic/claude-sonnet-4-5")
     } finally {
       fetchSpy.mockRestore()
     }

diff --git a/src/agents/utils.ts b/src/agents/utils.ts
@@ -11,7 +11,7 @@ import { createAtlasAgent, atlasPromptMetadata } from "./atlas"
 import { createMomusAgent, momusPromptMetadata } from "./momus"
 import { createHephaestusAgent } from "./hephaestus"
 import type { AvailableAgent, AvailableCategory, AvailableSkill } from "./dynamic-agent-prompt-builder"
-import { deepMerge, fetchAvailableModels, resolveModelPipeline, AGENT_MODEL_REQUIREMENTS, readConnectedProvidersCache, isModelAvailable, isAnyFallbackModelAvailable, migrateAgentConfig } from "../shared"
+import { deepMerge, fetchAvailableModels, resolveModelPipeline, AGENT_MODEL_REQUIREMENTS, readConnectedProvidersCache, isModelAvailable, isAnyFallbackModelAvailable, migrateAgentConfig, normalizeFallbackModels } from "../shared"
 import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../tools/delegate-task/constants"
 import { resolveMultipleSkills } from "../features/opencode-skill-loader/skill-content"
 import { createBuiltinSkills } from "../features/builtin-skills"
@@ -156,13 +156,14 @@ function applyCategoryOverride(
 function applyModelResolution(input: {
   uiSelectedModel?: string
   userModel?: string
+  userFallbackModels?: string[]
   requirement?: { fallbackChain?: { providers: string[]; model: string; variant?: string }[] }
   availableModels: Set<string>
   systemDefaultModel?: string
 }) {
-  const { uiSelectedModel, userModel, requirement, availableModels, systemDefaultModel } = input
+  const { uiSelectedModel, userModel, userFallbackModels, requirement, availableModels, systemDefaultModel } = input
   return resolveModelPipeline({
-    intent: { uiSelectedModel, userModel },
+    intent: { uiSelectedModel, userModel, userFallbackModels },
     constraints: { availableModels },
     policy: { fallbackChain: requirement?.fallbackChain, systemDefaultModel },
   })
@@ -290,8 +291,8 @@ export async function createBuiltinAgents(
      if (agentName === "atlas") continue
      if (disabledAgents.some((name) => name.toLowerCase() === agentName.toLowerCase())) continue
 
-     const override = agentOverrides[agentName]
-       ?? Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentName.toLowerCase())?.[1]
+      const overrideEntry = Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentName.toLowerCase())
+      const override = (agentOverrides[agentName] ?? overrideEntry?.[1]) as AgentOverrideConfig | undefined
      const requirement = AGENT_MODEL_REQUIREMENTS[agentName]
 
      // Check if agent requires a specific model
@@ -300,16 +301,16 @@ export async function createBuiltinAgents(
          continue
        }
      }
-
      const isPrimaryAgent = isFactory(source) && source.mode === "primary"
 
-    const resolution = applyModelResolution({
-      uiSelectedModel: isPrimaryAgent ? uiSelectedModel : undefined,
-      userModel: override?.model,
-      requirement,
-      availableModels,
-      systemDefaultModel,
-    })
+     const resolution = applyModelResolution({
+       uiSelectedModel: isPrimaryAgent ? uiSelectedModel : undefined,
+       userModel: override?.model,
+        userFallbackModels: normalizeFallbackModels(override?.fallback_models),
+       requirement,
+       availableModels,
+       systemDefaultModel,
+     })
     if (!resolution) continue
     const { model, variant: resolvedVariant } = resolution
 
@@ -355,13 +356,14 @@ export async function createBuiltinAgents(
      isAnyFallbackModelAvailable(sisyphusRequirement.fallbackChain, availableModels)
 
    if (!disabledAgents.includes("sisyphus") && meetsSisyphusAnyModelRequirement) {
-    let sisyphusResolution = applyModelResolution({
-      uiSelectedModel,
-      userModel: sisyphusOverride?.model,
-      requirement: sisyphusRequirement,
-      availableModels,
-      systemDefaultModel,
-    })
+     let sisyphusResolution = applyModelResolution({
+       uiSelectedModel,
+       userModel: sisyphusOverride?.model,
+        userFallbackModels: normalizeFallbackModels(sisyphusOverride?.fallback_models),
+       requirement: sisyphusRequirement,
+       availableModels,
+       systemDefaultModel,
+     })
 
     if (isFirstRunNoCache && !sisyphusOverride?.model && !uiSelectedModel) {
       sisyphusResolution = getFirstFallbackModel(sisyphusRequirement)
@@ -403,6 +405,7 @@ export async function createBuiltinAgents(
     if (hasRequiredModel) {
       let hephaestusResolution = applyModelResolution({
         userModel: hephaestusOverride?.model,
+        userFallbackModels: normalizeFallbackModels(hephaestusOverride?.fallback_models),
         requirement: hephaestusRequirement,
         availableModels,
         systemDefaultModel,
@@ -449,17 +452,18 @@ export async function createBuiltinAgents(
      result[name] = config
    }
 
-    if (!disabledAgents.includes("atlas")) {
-      const orchestratorOverride = agentOverrides["atlas"]
-      const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]
-
-      const atlasResolution = applyModelResolution({
-        uiSelectedModel,
-        userModel: orchestratorOverride?.model,
-        requirement: atlasRequirement,
-        availableModels,
-        systemDefaultModel,
-      })
+   if (!disabledAgents.includes("atlas")) {
+     const orchestratorOverride = agentOverrides["atlas"]
+     const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]
+
+     const atlasResolution = applyModelResolution({
+       // NOTE: Atlas does NOT use uiSelectedModel - respects its own fallbackChain (k2p5 primary)
+       userModel: orchestratorOverride?.model,
+        userFallbackModels: normalizeFallbackModels(orchestratorOverride?.fallback_models),
+       requirement: atlasRequirement,
+       availableModels,
+       systemDefaultModel,
+     })
 
     if (atlasResolution) {
       const { model: atlasModel, variant: atlasResolvedVariant } = atlasResolution

diff --git a/src/config/index.ts b/src/config/index.ts
@@ -11,6 +11,8 @@ export {
   RalphLoopConfigSchema,
   TmuxConfigSchema,
   TmuxLayoutSchema,
+  RuntimeFallbackConfigSchema,
+  FallbackModelsSchema,
 } from "./schema"
 
 export type {
@@ -29,4 +31,5 @@ export type {
   TmuxLayout,
   SisyphusConfig,
   SisyphusTasksConfig,
+  RuntimeFallbackConfig,
 } from "./schema"