diff --git a/.env.example b/.env.example
index 0fb9ad8aa6e..d928be01f9d 100644
--- a/.env.example
+++ b/.env.example
@@ -169,6 +169,9 @@ OPENAI_API_KEY=sk-xxxxxxxxx
# FAL_API_KEY=fal-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+### Nebius ###
+
+# NEBIUS_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
########################################
############ Market Service ############
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a20eabef9d..d441f6642ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,56 @@
# Changelog
+### [Version 1.120.4](https://github.com/lobehub/lobe-chat/compare/v1.120.3...v1.120.4)
+
+Released on **2025-09-01**
+
+#### 💄 Styles
+
+- **misc**: Adjust ControlsForm component to adapt to mobile phone display.
+
+
+
+
+Improvements and Fixes
+
+#### Styles
+
+- **misc**: Adjust ControlsForm component to adapt to mobile phone display, closes [#9013](https://github.com/lobehub/lobe-chat/issues/9013) ([c6038c0](https://github.com/lobehub/lobe-chat/commit/c6038c0))
+
+
+
+
+
+[](#readme-top)
+
+
+
+### [Version 1.120.3](https://github.com/lobehub/lobe-chat/compare/v1.120.2...v1.120.3)
+
+Released on **2025-09-01**
+
+#### 💄 Styles
+
+- **misc**: Support new provider Nebius.
+
+
+
+
+Improvements and Fixes
+
+#### Styles
+
+- **misc**: Support new provider Nebius, closes [#8903](https://github.com/lobehub/lobe-chat/issues/8903) ([c15791d](https://github.com/lobehub/lobe-chat/commit/c15791d))
+
+
+
+
+
+[](#readme-top)
+
+
+
### [Version 1.120.2](https://github.com/lobehub/lobe-chat/compare/v1.120.1...v1.120.2)
Released on **2025-08-31**
diff --git a/Dockerfile b/Dockerfile
index 8dbcbc5aa0e..17842e3c82b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -194,6 +194,8 @@ ENV \
MODELSCOPE_API_KEY="" MODELSCOPE_MODEL_LIST="" MODELSCOPE_PROXY_URL="" \
# Moonshot
MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \
+ # Nebius
+ NEBIUS_API_KEY="" NEBIUS_MODEL_LIST="" NEBIUS_PROXY_URL="" \
# Novita
NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \
# Nvidia NIM
diff --git a/Dockerfile.database b/Dockerfile.database
index ce39459d6da..e69e4ffc9a3 100644
--- a/Dockerfile.database
+++ b/Dockerfile.database
@@ -236,6 +236,8 @@ ENV \
MODELSCOPE_API_KEY="" MODELSCOPE_MODEL_LIST="" MODELSCOPE_PROXY_URL="" \
# Moonshot
MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \
+ # Nebius
+ NEBIUS_API_KEY="" NEBIUS_MODEL_LIST="" NEBIUS_PROXY_URL="" \
# Novita
NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \
# Nvidia NIM
diff --git a/Dockerfile.pglite b/Dockerfile.pglite
index 1c3f4e43456..b3195609eea 100644
--- a/Dockerfile.pglite
+++ b/Dockerfile.pglite
@@ -196,6 +196,8 @@ ENV \
MODELSCOPE_API_KEY="" MODELSCOPE_MODEL_LIST="" MODELSCOPE_PROXY_URL="" \
# Moonshot
MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \
+ # Nebius
+ NEBIUS_API_KEY="" NEBIUS_MODEL_LIST="" NEBIUS_PROXY_URL="" \
# Novita
NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \
# Nvidia NIM
diff --git a/changelog/v1.json b/changelog/v1.json
index 8d80aac21be..4c01b444162 100644
--- a/changelog/v1.json
+++ b/changelog/v1.json
@@ -1,4 +1,18 @@
[
+ {
+ "children": {
+ "improvements": ["Adjust ControlsForm component to adapt to mobile phone display."]
+ },
+ "date": "2025-09-01",
+ "version": "1.120.4"
+ },
+ {
+ "children": {
+ "improvements": ["Support new provider Nebius."]
+ },
+ "date": "2025-09-01",
+ "version": "1.120.3"
+ },
{
"children": {
"improvements": ["Remove base path."]
diff --git a/package.json b/package.json
index 8f9ec567007..d1ecf05da66 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@lobehub/chat",
- "version": "1.120.2",
+ "version": "1.120.4",
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
"keywords": [
"framework",
@@ -192,7 +192,7 @@
"fast-deep-equal": "^3.1.3",
"file-type": "^21.0.0",
"framer-motion": "^12.23.12",
- "gpt-tokenizer": "^2.9.0",
+ "gpt-tokenizer": "^3.0.0",
"gray-matter": "^4.0.3",
"html-to-text": "^9.0.5",
"i18next": "^25.3.2",
diff --git a/packages/model-bank/package.json b/packages/model-bank/package.json
index 002a5496378..87f3dfbeca2 100644
--- a/packages/model-bank/package.json
+++ b/packages/model-bank/package.json
@@ -36,6 +36,7 @@
"./mistral": "./src/aiModels/mistral.ts",
"./modelscope": "./src/aiModels/modelscope.ts",
"./moonshot": "./src/aiModels/moonshot.ts",
+ "./nebius": "./src/aiModels/nebius.ts",
"./novita": "./src/aiModels/novita.ts",
"./nvidia": "./src/aiModels/nvidia.ts",
"./ollama": "./src/aiModels/ollama.ts",
diff --git a/packages/model-bank/src/aiModels/index.ts b/packages/model-bank/src/aiModels/index.ts
index 7372f246b63..acfb8be8aeb 100644
--- a/packages/model-bank/src/aiModels/index.ts
+++ b/packages/model-bank/src/aiModels/index.ts
@@ -31,6 +31,7 @@ import { default as minimax } from './minimax';
import { default as mistral } from './mistral';
import { default as modelscope } from './modelscope';
import { default as moonshot } from './moonshot';
+import { default as nebius } from './nebius';
import { default as novita } from './novita';
import { default as nvidia } from './nvidia';
import { default as ollama } from './ollama';
@@ -111,6 +112,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({
mistral,
modelscope,
moonshot,
+ nebius,
novita,
nvidia,
ollama,
@@ -173,6 +175,7 @@ export { default as minimax } from './minimax';
export { default as mistral } from './mistral';
export { default as modelscope } from './modelscope';
export { default as moonshot } from './moonshot';
+export { default as nebius } from './nebius';
export { default as novita } from './novita';
export { default as nvidia } from './nvidia';
export { default as ollama } from './ollama';
diff --git a/packages/model-bank/src/aiModels/nebius.ts b/packages/model-bank/src/aiModels/nebius.ts
new file mode 100644
index 00000000000..2e73f4ead7c
--- /dev/null
+++ b/packages/model-bank/src/aiModels/nebius.ts
@@ -0,0 +1,1046 @@
+import { AIChatModelCard } from '../types/aiModel';
+
+// https://studio.nebius.com/
+
+const nebiusChatModels: AIChatModelCard[] = [
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Kimi-K2-Instruct',
+ id: 'moonshotai/Kimi-K2-Instruct',
+ organization: 'moonshotai',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 2.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 262_144,
+ displayName: 'Qwen/Qwen3-Coder-480B-A35B-Instruct',
+ id: 'Qwen/Qwen3-Coder-480B-A35B-Instruct',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 1.8, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'gpt-oss-120b',
+ enabled: true,
+ id: 'openai/gpt-oss-120b',
+ organization: 'openai',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'gpt-oss-20b',
+ id: 'openai/gpt-oss-20b',
+ organization: 'openai',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'GLM-4.5',
+ id: 'zai-org/GLM-4.5',
+ organization: 'zai-org',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 2.2, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'GLM-4.5-Air',
+ id: 'zai-org/GLM-4.5-Air',
+ organization: 'zai-org',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 1.2, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 163_840,
+ displayName: 'DeepSeek-R1-0528',
+ id: 'deepseek-ai/DeepSeek-R1-0528',
+ organization: 'deepseek',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.8, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 2.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 32_768,
+ displayName: 'DeepSeek-R1-0528 (fast)',
+ id: 'deepseek-ai/DeepSeek-R1-0528-fast',
+ organization: 'deepseek',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 6, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 262_144,
+ displayName: 'Qwen3-235B-A22B-Instruct-2507',
+ id: 'Qwen/Qwen3-235B-A22B-Instruct-2507',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 40_960,
+ displayName: 'Qwen3-235B-A22B',
+ id: 'Qwen/Qwen3-235B-A22B',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 40_960,
+ displayName: 'Qwen3-30B-A3B',
+ id: 'Qwen/Qwen3-30B-A3B',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 40_960,
+ displayName: 'Qwen3-30B-A3B (fast)',
+ id: 'Qwen/Qwen3-30B-A3B-fast',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.9, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 40_960,
+ displayName: 'Qwen3-32B',
+ id: 'Qwen/Qwen3-32B',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 40_960,
+ displayName: 'Qwen3-32B (fast)',
+ id: 'Qwen/Qwen3-32B-fast',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 40_960,
+ displayName: 'Qwen3-14B',
+ id: 'Qwen/Qwen3-14B',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.24, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 40_960,
+ displayName: 'Qwen3-4B (fast)',
+ id: 'Qwen/Qwen3-4B-fast',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.24, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Llama-3_1-Nemotron-Ultra-253B-v1',
+ id: 'nvidia/Llama-3_1-Nemotron-Ultra-253B-v1',
+ organization: 'nvidia',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 1.8, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 163_840,
+ displayName: 'DeepSeek-V3-0324',
+ id: 'deepseek-ai/DeepSeek-V3-0324',
+ organization: 'deepseek',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 32_768,
+ displayName: 'DeepSeek-V3-0324 (fast)',
+ id: 'deepseek-ai/DeepSeek-V3-0324-fast',
+ organization: 'deepseek',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 2.25, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 163_840,
+ displayName: 'DeepSeek-V3',
+ id: 'deepseek-ai/DeepSeek-V3',
+ organization: 'deepseek',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 163_840,
+ displayName: 'DeepSeek-R1',
+ id: 'deepseek-ai/DeepSeek-R1',
+ organization: 'deepseek',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.8, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 2.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 163_840,
+ displayName: 'DeepSeek-R1 (fast)',
+ id: 'deepseek-ai/DeepSeek-R1-fast',
+ organization: 'deepseek',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 6, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Llama-3.3-70B-Instruct',
+ id: 'meta-llama/Llama-3.3-70B-Instruct',
+ organization: 'meta',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Llama-3.3-70B-Instruct (fast)',
+ id: 'meta-llama/Llama-3.3-70B-Instruct-fast',
+ organization: 'meta',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Meta-Llama-3.1-70B-Instruct',
+ id: 'meta-llama/Meta-Llama-3.1-70B-Instruct',
+ organization: 'meta',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Meta-Llama-3.1-8B-Instruct',
+ id: 'meta-llama/Meta-Llama-3.1-8B-Instruct',
+ organization: 'meta',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Meta-Llama-3.1-8B-Instruct (fast)',
+ id: 'meta-llama/Meta-Llama-3.1-8B-Instruct-fast',
+ organization: 'meta',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.09, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Meta-Llama-3.1-405B-Instruct',
+ id: 'meta-llama/Meta-Llama-3.1-405B-Instruct',
+ organization: 'meta',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ contextWindowTokens: 128_000,
+ displayName: 'Mistral-Nemo-Instruct-2407',
+ id: 'mistralai/Mistral-Nemo-Instruct-2407',
+ organization: 'mistralai',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.04, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.12, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 32_768,
+ displayName: 'Qwen2.5-Coder-7B',
+ id: 'Qwen/Qwen2.5-Coder-7B',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 32_768,
+ displayName: 'Qwen2.5-Coder-7B (fast)',
+ id: 'Qwen/Qwen2.5-Coder-7B-fast',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.09, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Qwen2.5-Coder-32B-Instruct',
+ id: 'Qwen/Qwen2.5-Coder-32B-Instruct',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.18, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Qwen2.5-Coder-32B-Instruct (fast)',
+ id: 'Qwen/Qwen2.5-Coder-32B-Instruct-fast',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ contextWindowTokens: 8192,
+ displayName: 'Gemma-2-2b-it',
+ id: 'google/gemma-2-2b-it',
+ organization: 'google',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 8192,
+ displayName: 'Gemma-2-9b-it (fast)',
+ id: 'google/gemma-2-9b-it-fast',
+ organization: 'google',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.09, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Qwen2.5-32B-Instruct',
+ id: 'Qwen/Qwen2.5-32B-Instruct',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Qwen2.5-32B-Instruct (fast)',
+ id: 'Qwen/Qwen2.5-32B-Instruct-fast',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Qwen2.5-72B-Instruct',
+ id: 'Qwen/Qwen2.5-72B-Instruct',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Qwen2.5-72B-Instruct (fast)',
+ id: 'Qwen/Qwen2.5-72B-Instruct-fast',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 8192,
+ displayName: 'Llama3-OpenBioLLM-70B',
+ id: 'aaditya/Llama3-OpenBioLLM-70B',
+ organization: 'aaditya',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'QwQ-32B',
+ id: 'Qwen/QwQ-32B',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.45, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'QwQ-32B (fast)',
+ id: 'Qwen/QwQ-32B-fast',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 16_384,
+ displayName: 'phi-4',
+ id: 'microsoft/phi-4',
+ organization: 'microsoft',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Hermes-3-Llama-3.1-405B',
+ id: 'NousResearch/Hermes-3-Llama-405B',
+ organization: 'NousResearch',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Hermes-4-70B',
+ id: 'NousResearch/Hermes-4-70B',
+ organization: 'NousResearch',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Hermes-4-405B',
+ id: 'NousResearch/Hermes-4-405B',
+ organization: 'NousResearch',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'DeepSeek-R1-Distill-Llama-70B',
+ id: 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
+ organization: 'deepseek',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 128_000,
+ displayName: 'Devstral-Small-2505',
+ id: 'mistralai/Devstral-Small-2505',
+ organization: 'mistralai',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.24, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Llama-3_3-Nemotron-Super-49B-v1',
+ id: 'nvidia/Llama-3_3-Nemotron-Super-49B-v1',
+ organization: 'nvidia',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ reasoning: true,
+ },
+ contextWindowTokens: 262_144,
+ displayName: 'Qwen3-30B-A3B-Thinking-2507',
+ id: 'Qwen/Qwen3-30B-A3B-Thinking-2507',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 262_144,
+ displayName: 'Qwen3-30B-A3B-Instruct-2507',
+ id: 'Qwen/Qwen3-30B-A3B-Instruct-2507',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 262_144,
+ displayName: 'Qwen3-Coder-30B-A3B-Instruct',
+ id: 'Qwen/Qwen3-Coder-30B-A3B-Instruct',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Meta-Llama-Guard-3-8B',
+ id: 'meta-llama/Llama-Guard-3-8B',
+ organization: 'meta',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ vision: true,
+ },
+ contextWindowTokens: 32_768,
+ displayName: 'Qwen2-VL-72B-Instruct',
+ id: 'Qwen/Qwen2-VL-72B-Instruct',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ vision: true,
+ },
+ contextWindowTokens: 131_072,
+ displayName: 'Mistral-Small-3.1-24B-Instruct-2503',
+ id: 'mistralai/Mistral-Small-3.1-24B-Instruct-2503',
+ organization: 'mistralai',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ vision: true,
+ },
+ contextWindowTokens: 110_000,
+ displayName: 'Gemma-3-27b-it',
+ id: 'google/gemma-3-27b-it',
+ organization: 'google',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ vision: true,
+ },
+ contextWindowTokens: 110_000,
+ displayName: 'Gemma-3-27b-it (fast)',
+ id: 'google/gemma-3-27b-it-fast',
+ organization: 'google',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+ {
+ abilities: {
+ functionCall: true,
+ vision: true,
+ },
+ contextWindowTokens: 32_000,
+ displayName: 'Qwen2.5-VL-72B-Instruct',
+ id: 'Qwen/Qwen2.5-VL-72B-Instruct',
+ organization: 'Qwen',
+ pricing: {
+ units: [
+ { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
+ { name: 'textOutput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' },
+ ],
+ },
+ type: 'chat',
+ },
+];
+
+// 下述模型待验证
+
+// export const nebiusImageModels: AIImageModelCard[] = [
+// {
+// contextWindowTokens: 0,
+// displayName: 'FLUX.1-dev',
+// id: 'black-forest-labs/flux-dev',
+// pricing: {
+// units: [
+// { name: 'imageGeneration', rate: 0.007, strategy: 'fixed', unit: 'image' },
+// ],
+// },
+// type: 'image',
+// },
+// {
+// contextWindowTokens: 0,
+// displayName: 'FLUX.1-schnell',
+// id: 'black-forest-labs/flux-schnell',
+// pricing: {
+// units: [
+// { name: 'imageGeneration', rate: 0.0013, strategy: 'fixed', unit: 'image' },
+// ],
+// },
+// type: 'image',
+// },
+// {
+// contextWindowTokens: 0,
+// displayName: 'Stable Diffusion XL 1.0',
+// id: 'stability-ai/sdxl',
+// pricing: {
+// units: [
+// { name: 'imageGeneration', rate: 0.003, strategy: 'fixed', unit: 'image' },
+// ],
+// },
+// type: 'image',
+// },
+// ];
+
+// export const nebiusEmbeddingModels: AIEmbeddingModelCard[] = [
+// {
+// contextWindowTokens: 32_768,
+// displayName: 'BGE-ICL',
+// id: 'BAAI/bge-en-icl',
+// maxDimension: 3072,
+// pricing: {
+// units: [
+// { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' },
+// ],
+// },
+// type: 'embedding',
+// },
+// {
+// contextWindowTokens: 8192,
+// displayName: 'bge-multilingual-gemma2',
+// id: 'BAAI/bge-multilingual-gemma2',
+// maxDimension: 3072,
+// pricing: {
+// units: [
+// { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' },
+// ],
+// },
+// type: 'embedding',
+// },
+// {
+// contextWindowTokens: 32_768,
+// displayName: 'e5-mistral-7b-instruct',
+// id: 'intfloat/e5-mistral-7b-instruct',
+// maxDimension: 1536,
+// pricing: {
+// units: [
+// { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' },
+// ],
+// },
+// type: 'embedding',
+// },
+// {
+// contextWindowTokens: 40_960,
+// displayName: 'Qwen3-Embedding-8B',
+// id: 'Qwen/Qwen3-Embedding-8B',
+// maxDimension: 3072,
+// pricing: {
+// units: [
+// { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' },
+// ],
+// },
+// type: 'embedding',
+// },
+// ];
+
+export const allModels = [...nebiusChatModels];
+
+export default allModels;
diff --git a/packages/model-bank/src/aiModels/openrouter.ts b/packages/model-bank/src/aiModels/openrouter.ts
index 6ffd708b1bf..2b74d6c07c2 100644
--- a/packages/model-bank/src/aiModels/openrouter.ts
+++ b/packages/model-bank/src/aiModels/openrouter.ts
@@ -3,7 +3,7 @@ import { AIChatModelCard } from '../types/aiModel';
// https://openrouter.ai/docs/api-reference/list-available-models
const openrouterChatModels: AIChatModelCard[] = [
{
- contextWindowTokens: 128_000,
+ contextWindowTokens: 2_000_000,
description:
'根据上下文长度、主题和复杂性,你的请求将发送到 Llama 3 70B Instruct、Claude 3.5 Sonnet(自我调节)或 GPT-4o。',
displayName: 'Auto (best for prompt)',
@@ -184,7 +184,7 @@ const openrouterChatModels: AIChatModelCard[] = [
abilities: {
reasoning: true,
},
- contextWindowTokens: 40_960,
+ contextWindowTokens: 131_072,
description:
'Qwen3-235B-A22B 是由 Qwen 开发的 235B 参数专家混合 (MoE) 模型,每次前向传递激活 22B 参数。它支持在用于复杂推理、数学和代码任务的“思考”模式与用于一般对话效率的“非思考”模式之间无缝切换。该模型展示了强大的推理能力、多语言支持(100 多种语言和方言)、高级指令遵循和代理工具调用能力。它原生处理 32K 令牌上下文窗口,并使用基于 YaRN 的扩展扩展到 131K 令牌。',
displayName: 'Qwen3 235B A22B (Free)',
@@ -237,41 +237,11 @@ const openrouterChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
- {
- abilities: {
- reasoning: true,
- },
- contextWindowTokens: 32_000,
- description:
- 'GLM-Z1-9B-0414 是由 THUDM 开发的 GLM-4 系列中的 9B 参数语言模型。它采用了最初应用于更大 GLM-Z1 模型的技术,包括扩展强化学习、成对排名对齐以及对数学、代码和逻辑等推理密集型任务的训练。尽管其规模较小,但它在通用推理任务上表现出强大的性能,并在其权重级别中优于许多开源模型。',
- displayName: 'GLM Z1 9B (Free)',
- id: 'thudm/glm-z1-9b:free',
- type: 'chat',
- },
- {
- contextWindowTokens: 32_000,
- description:
- 'GLM-4-9B-0414 是 THUDM 开发的 GLM-4 系列中的 90 亿参数语言模型。GLM-4-9B-0414 使用与其较大的 32B 对应模型相同的强化学习和对齐策略进行训练,相对于其规模实现了高性能,使其适用于仍需要强大语言理解和生成能力的资源受限部署。',
- displayName: 'GLM 4 9B (Free)',
- id: 'thudm/glm-4-9b:free',
- type: 'chat',
- },
{
abilities: {
reasoning: true,
},
contextWindowTokens: 32_768,
- description:
- 'GLM-Z1-32B-0414 是 GLM-4-32B 的增强推理变体,专为深度数学、逻辑和面向代码的问题解决而构建。它应用扩展强化学习(任务特定和基于通用成对偏好)来提高复杂多步骤任务的性能。与基础 GLM-4-32B 模型相比,Z1 显著提升了结构化推理和形式化领域的能力。\n\n该模型支持通过提示工程强制执行“思考”步骤,并为长格式输出提供改进的连贯性。它针对代理工作流进行了优化,并支持长上下文(通过 YaRN)、JSON 工具调用和用于稳定推理的细粒度采样配置。非常适合需要深思熟虑、多步骤推理或形式化推导的用例。',
- displayName: 'GLM Z1 32B (Free)',
- id: 'thudm/glm-z1-32b:free',
- type: 'chat',
- },
- {
- abilities: {
- reasoning: true,
- },
- contextWindowTokens: 32_000,
description:
'GLM-Z1-32B-0414 是 GLM-4-32B 的增强推理变体,专为深度数学、逻辑和面向代码的问题解决而构建。它应用扩展强化学习(任务特定和基于通用成对偏好)来提高复杂多步骤任务的性能。与基础 GLM-4-32B 模型相比,Z1 显著提升了结构化推理和形式化领域的能力。\n\n该模型支持通过提示工程强制执行“思考”步骤,并为长格式输出提供改进的连贯性。它针对代理工作流进行了优化,并支持长上下文(通过 YaRN)、JSON 工具调用和用于稳定推理的细粒度采样配置。非常适合需要深思熟虑、多步骤推理或形式化推导的用例。',
displayName: 'GLM Z1 32B',
@@ -288,7 +258,7 @@ const openrouterChatModels: AIChatModelCard[] = [
abilities: {
reasoning: true,
},
- contextWindowTokens: 32_768,
+ contextWindowTokens: 32_000,
description:
'GLM-4-32B-0414 是一个 32B 双语(中英)开放权重语言模型,针对代码生成、函数调用和代理式任务进行了优化。它在 15T 高质量和重推理数据上进行了预训练,并使用人类偏好对齐、拒绝采样和强化学习进一步完善。该模型在复杂推理、工件生成和结构化输出任务方面表现出色,在多个基准测试中达到了与 GPT-4o 和 DeepSeek-V3-0324 相当的性能。',
displayName: 'GLM 4 32B (Free)',
@@ -715,7 +685,7 @@ const openrouterChatModels: AIChatModelCard[] = [
type: 'chat',
},
{
- contextWindowTokens: 64_000,
+ contextWindowTokens: 163_840,
description:
'DeepSeek V3 是一个 685B 参数的专家混合模型,是 DeepSeek 团队旗舰聊天模型系列的最新迭代。\n\n它继承了 [DeepSeek V3](/deepseek/deepseek-chat-v3) 模型,并在各种任务上表现出色。',
displayName: 'DeepSeek V3 0324',
@@ -1018,7 +988,7 @@ const openrouterChatModels: AIChatModelCard[] = [
abilities: {
functionCall: true,
},
- contextWindowTokens: 32_768,
+ contextWindowTokens: 131_072,
description:
'Llama 3.3 是 Llama 系列最先进的多语言开源大型语言模型,以极低成本体验媲美 405B 模型的性能。基于 Transformer 结构,并通过监督微调(SFT)和人类反馈强化学习(RLHF)提升有用性和安全性。其指令调优版本专为多语言对话优化,在多项行业基准上表现优于众多开源和封闭聊天模型。知识截止日期为 2023 年 12 月',
displayName: 'Llama 3.3 70B Instruct',
@@ -1035,7 +1005,7 @@ const openrouterChatModels: AIChatModelCard[] = [
abilities: {
functionCall: true,
},
- contextWindowTokens: 32_768,
+ contextWindowTokens: 65_536,
description:
'Llama 3.3 是 Llama 系列最先进的多语言开源大型语言模型,以极低成本体验媲美 405B 模型的性能。基于 Transformer 结构,并通过监督微调(SFT)和人类反馈强化学习(RLHF)提升有用性和安全性。其指令调优版本专为多语言对话优化,在多项行业基准上表现优于众多开源和封闭聊天模型。知识截止日期为 2023 年 12 月',
displayName: 'Llama 3.3 70B Instruct (Free)',
@@ -1050,7 +1020,7 @@ const openrouterChatModels: AIChatModelCard[] = [
type: 'chat',
},
{
- contextWindowTokens: 32_768,
+ contextWindowTokens: 131_072,
description: 'LLaMA 3.1 提供多语言支持,是业界领先的生成模型之一。',
displayName: 'Llama 3.1 8B (Free)',
id: 'meta-llama/llama-3.1-8b-instruct:free',
diff --git a/packages/model-runtime/src/index.ts b/packages/model-runtime/src/index.ts
index e948027cf0a..413154896b9 100644
--- a/packages/model-runtime/src/index.ts
+++ b/packages/model-runtime/src/index.ts
@@ -13,6 +13,7 @@ export { LobeMinimaxAI } from './minimax';
export { LobeMistralAI } from './mistral';
export { ModelRuntime } from './ModelRuntime';
export { LobeMoonshotAI } from './moonshot';
+export { LobeNebiusAI } from './nebius';
export { LobeOllamaAI } from './ollama';
export { LobeOpenAI } from './openai';
export { LobeOpenRouterAI } from './openrouter';
diff --git a/packages/model-runtime/src/nebius/index.ts b/packages/model-runtime/src/nebius/index.ts
new file mode 100644
index 00000000000..cdf0b6746b7
--- /dev/null
+++ b/packages/model-runtime/src/nebius/index.ts
@@ -0,0 +1,78 @@
+import { ModelProvider } from '../types';
+import { processMultiProviderModelList } from '../utils/modelParse';
+import { createOpenAICompatibleRuntime } from '../utils/openaiCompatibleFactory';
+
+export interface NebiusModelCard {
+ id: string;
+}
+
+export const LobeNebiusAI = createOpenAICompatibleRuntime({
+ baseURL: 'https://api.studio.nebius.com/v1',
+ chatCompletion: {
+ handlePayload: (payload) => {
+ const { model, ...rest } = payload;
+
+ return {
+ ...rest,
+ model,
+ stream: true,
+ } as any;
+ },
+ },
+ debug: {
+ chatCompletion: () => process.env.DEBUG_NEBIUS_CHAT_COMPLETION === '1',
+ },
+ models: async ({ client }) => {
+ const base = (client as any).baseURL || 'https://api.studio.nebius.com/v1';
+ const url = `${base.replace(/\/+$/, '')}/models?verbose=true`;
+
+ const res = await fetch(url, {
+ headers: {
+ Accept: 'application/json',
+ Authorization: `Bearer ${client.apiKey}`,
+ },
+ method: 'GET',
+ });
+
+ if (!res.ok) {
+ throw new Error(`Failed to fetch Nebius models: ${res.status} ${res.statusText}`);
+ }
+
+ const body = (await res.json()) as any;
+ const rawList = body?.data ?? [];
+
+ const standardList = rawList.map((m: any) => {
+ const modality = m.architecture?.modality;
+ let inferredType: string | undefined = undefined;
+
+ if (typeof modality === 'string' && modality.includes('->')) {
+ const parts = modality.split('->');
+ const right = parts[1]?.trim().toLowerCase();
+ if (right === 'image') {
+ inferredType = 'image';
+ }
+ if (right === 'embedding') {
+ inferredType = 'embedding';
+ }
+ }
+
+ return {
+ contextWindowTokens: m.context_length ?? undefined,
+ description: m.description ?? '',
+ displayName: m.name ?? m.id,
+ functionCall: m.features?.includes('function-calling'),
+ id: m.id,
+ pricing: {
+ input: m.pricing.prompt * 1_000_000,
+ output: m.pricing.completion * 1_000_000,
+ },
+ reasoning: m.features?.includes('reasoning'),
+ type: inferredType,
+ vision: m.features?.includes('vision'),
+ };
+ });
+
+ return processMultiProviderModelList(standardList, 'nebius');
+ },
+ provider: ModelProvider.Nebius,
+});
diff --git a/packages/model-runtime/src/runtimeMap.ts b/packages/model-runtime/src/runtimeMap.ts
index ca7882a1fdc..6dbb7ef9374 100644
--- a/packages/model-runtime/src/runtimeMap.ts
+++ b/packages/model-runtime/src/runtimeMap.ts
@@ -29,6 +29,7 @@ import { LobeMinimaxAI } from './minimax';
import { LobeMistralAI } from './mistral';
import { LobeModelScopeAI } from './modelscope';
import { LobeMoonshotAI } from './moonshot';
+import { LobeNebiusAI } from './nebius';
import { LobeNovitaAI } from './novita';
import { LobeNvidiaAI } from './nvidia';
import { LobeOllamaAI } from './ollama';
@@ -89,6 +90,7 @@ export const providerRuntimeMap = {
mistral: LobeMistralAI,
modelscope: LobeModelScopeAI,
moonshot: LobeMoonshotAI,
+ nebius: LobeNebiusAI,
novita: LobeNovitaAI,
nvidia: LobeNvidiaAI,
ollama: LobeOllamaAI,
diff --git a/packages/model-runtime/src/types/type.ts b/packages/model-runtime/src/types/type.ts
index 9aee729cddc..d8df11b5733 100644
--- a/packages/model-runtime/src/types/type.ts
+++ b/packages/model-runtime/src/types/type.ts
@@ -59,6 +59,7 @@ export enum ModelProvider {
Mistral = 'mistral',
ModelScope = 'modelscope',
Moonshot = 'moonshot',
+ Nebius = 'nebius',
Novita = 'novita',
Nvidia = 'nvidia',
Ollama = 'ollama',
diff --git a/packages/types/src/user/settings/keyVaults.ts b/packages/types/src/user/settings/keyVaults.ts
index d7cb8fb6b3b..9f15ed431ed 100644
--- a/packages/types/src/user/settings/keyVaults.ts
+++ b/packages/types/src/user/settings/keyVaults.ts
@@ -68,6 +68,7 @@ export interface UserKeyVaults extends SearchEngineKeyVaults {
mistral?: OpenAICompatibleKeyVault;
modelscope?: OpenAICompatibleKeyVault;
moonshot?: OpenAICompatibleKeyVault;
+ nebius?: OpenAICompatibleKeyVault;
novita?: OpenAICompatibleKeyVault;
nvidia?: OpenAICompatibleKeyVault;
ollama?: OpenAICompatibleKeyVault;
diff --git a/src/config/llm.ts b/src/config/llm.ts
index 09ad0af26dd..441ac8a8ce7 100644
--- a/src/config/llm.ts
+++ b/src/config/llm.ts
@@ -87,6 +87,9 @@ export const getLLMConfig = () => {
ENABLED_STEPFUN: z.boolean(),
STEPFUN_API_KEY: z.string().optional(),
+ ENABLED_NEBIUS: z.boolean(),
+ NEBIUS_API_KEY: z.string().optional(),
+
ENABLED_NOVITA: z.boolean(),
NOVITA_API_KEY: z.string().optional(),
@@ -364,6 +367,9 @@ export const getLLMConfig = () => {
ENABLED_AIHUBMIX: !!process.env.AIHUBMIX_API_KEY,
AIHUBMIX_API_KEY: process.env.AIHUBMIX_API_KEY,
+
+ ENABLED_NEBIUS: !!process.env.NEBIUS_API_KEY,
+ NEBIUS_API_KEY: process.env.NEBIUS_API_KEY,
},
});
};
diff --git a/src/config/modelProviders/index.ts b/src/config/modelProviders/index.ts
index 44a68642cf8..f2f60f5e1b1 100644
--- a/src/config/modelProviders/index.ts
+++ b/src/config/modelProviders/index.ts
@@ -31,6 +31,7 @@ import MinimaxProvider from './minimax';
import MistralProvider from './mistral';
import ModelScopeProvider from './modelscope';
import MoonshotProvider from './moonshot';
+import NebiusProvider from './nebius';
import NovitaProvider from './novita';
import NvidiaProvider from './nvidia';
import OllamaProvider from './ollama';
@@ -175,6 +176,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
InfiniAIProvider,
AkashChatProvider,
QiniuProvider,
+ NebiusProvider,
];
export const filterEnabledModels = (provider: ModelProviderCard) => {
@@ -218,6 +220,7 @@ export { default as MinimaxProviderCard } from './minimax';
export { default as MistralProviderCard } from './mistral';
export { default as ModelScopeProviderCard } from './modelscope';
export { default as MoonshotProviderCard } from './moonshot';
+export { default as NebiusProviderCard } from './nebius';
export { default as NovitaProviderCard } from './novita';
export { default as NvidiaProviderCard } from './nvidia';
export { default as OllamaProviderCard } from './ollama';
diff --git a/src/config/modelProviders/nebius.ts b/src/config/modelProviders/nebius.ts
new file mode 100644
index 00000000000..68e8a52649e
--- /dev/null
+++ b/src/config/modelProviders/nebius.ts
@@ -0,0 +1,20 @@
+import { ModelProviderCard } from '@/types/llm';
+
+const Nebius: ModelProviderCard = {
+ chatModels: [],
+ checkModel: 'Qwen/Qwen2.5-Coder-7B',
+ description: 'Nebius 通过构建大规模GPU集群和垂直整合的云平台,为全球AI创新者提供高性能基础设施。',
+ id: 'nebius',
+ modelsUrl: 'https://studio.nebius.com/',
+ name: 'Nebius',
+ settings: {
+ proxyUrl: {
+ placeholder: 'https://api.studio.nebius.com/v1',
+ },
+ sdkType: 'openai',
+ showModelFetcher: true,
+ },
+ url: 'https://nebius.com/',
+};
+
+export default Nebius;
diff --git a/src/features/ChatInput/ActionBar/Model/ControlsForm.tsx b/src/features/ChatInput/ActionBar/Model/ControlsForm.tsx
index 00905954f91..113316e8e0e 100644
--- a/src/features/ChatInput/ActionBar/Model/ControlsForm.tsx
+++ b/src/features/ChatInput/ActionBar/Model/ControlsForm.tsx
@@ -1,6 +1,6 @@
import { Form } from '@lobehub/ui';
import type { FormItemProps } from '@lobehub/ui';
-import { Form as AntdForm, Switch } from 'antd';
+import { Form as AntdForm, Switch, Grid } from 'antd';
import isEqual from 'fast-deep-equal';
import Link from 'next/link';
import { memo } from 'react';
@@ -32,11 +32,21 @@ const ControlsForm = memo(() => {
const modelExtendParams = useAiInfraStore(aiModelSelectors.modelExtendParams(model, provider));
+ const screens = Grid.useBreakpoint();
+ const isNarrow = !screens.sm;
+
+ const descWide = { display: 'inline-block', width: 300 } as const;
+ const descNarrow = {
+ display: 'block',
+ maxWidth: '100%',
+ whiteSpace: 'normal',
+ } as const;
+
const items = [
{
children: ,
desc: (
-
+
单条对话生成成本最高可降低 90%,响应速度提升 4 倍(
{
),
label: t('extendParams.disableContextCaching.title'),
+ layout: isNarrow ? 'vertical' : 'horizontal',
minWidth: undefined,
name: 'disableContextCaching',
},
{
children: ,
desc: (
-
+
基于 Claude Thinking 机制限制(
{
),
label: t('extendParams.enableReasoning.title'),
- layout: 'horizontal',
+ layout: isNarrow ? 'vertical' : 'horizontal',
minWidth: undefined,
name: 'enableReasoning',
},
@@ -132,14 +143,16 @@ const ControlsForm = memo(() => {
},
{
children: ,
- desc: t('extendParams.urlContext.desc'),
+ desc: isNarrow ? (
+ {t('extendParams.urlContext.desc')}
+ ) : (
+ t('extendParams.urlContext.desc')
+ ),
label: t('extendParams.urlContext.title'),
- layout: 'horizontal',
+ layout: isNarrow ? 'vertical' : 'horizontal',
minWidth: undefined,
name: 'urlContext',
- style: {
- width: 445,
- },
+ style: isNarrow ? undefined : { width: 445 },
tag: 'urlContext',
},
{