diff --git a/.env.example b/.env.example index 0fb9ad8aa6e..d928be01f9d 100644 --- a/.env.example +++ b/.env.example @@ -169,6 +169,9 @@ OPENAI_API_KEY=sk-xxxxxxxxx # FAL_API_KEY=fal-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +### Nebius ### + +# NEBIUS_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ######################################## ############ Market Service ############ diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a20eabef9d..d441f6642ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,56 @@ # Changelog +### [Version 1.120.4](https://github.com/lobehub/lobe-chat/compare/v1.120.3...v1.120.4) + +Released on **2025-09-01** + +#### 💄 Styles + +- **misc**: Adjust ControlsForm component to adapt to mobile phone display. + +
+ +
+Improvements and Fixes + +#### Styles + +- **misc**: Adjust ControlsForm component to adapt to mobile phone display, closes [#9013](https://github.com/lobehub/lobe-chat/issues/9013) ([c6038c0](https://github.com/lobehub/lobe-chat/commit/c6038c0)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +### [Version 1.120.3](https://github.com/lobehub/lobe-chat/compare/v1.120.2...v1.120.3) + +Released on **2025-09-01** + +#### 💄 Styles + +- **misc**: Support new provider Nebius. + +
+ +
+Improvements and Fixes + +#### Styles + +- **misc**: Support new provider Nebius, closes [#8903](https://github.com/lobehub/lobe-chat/issues/8903) ([c15791d](https://github.com/lobehub/lobe-chat/commit/c15791d)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ ### [Version 1.120.2](https://github.com/lobehub/lobe-chat/compare/v1.120.1...v1.120.2) Released on **2025-08-31** diff --git a/Dockerfile b/Dockerfile index 8dbcbc5aa0e..17842e3c82b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -194,6 +194,8 @@ ENV \ MODELSCOPE_API_KEY="" MODELSCOPE_MODEL_LIST="" MODELSCOPE_PROXY_URL="" \ # Moonshot MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \ + # Nebius + NEBIUS_API_KEY="" NEBIUS_MODEL_LIST="" NEBIUS_PROXY_URL="" \ # Novita NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \ # Nvidia NIM diff --git a/Dockerfile.database b/Dockerfile.database index ce39459d6da..e69e4ffc9a3 100644 --- a/Dockerfile.database +++ b/Dockerfile.database @@ -236,6 +236,8 @@ ENV \ MODELSCOPE_API_KEY="" MODELSCOPE_MODEL_LIST="" MODELSCOPE_PROXY_URL="" \ # Moonshot MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \ + # Nebius + NEBIUS_API_KEY="" NEBIUS_MODEL_LIST="" NEBIUS_PROXY_URL="" \ # Novita NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \ # Nvidia NIM diff --git a/Dockerfile.pglite b/Dockerfile.pglite index 1c3f4e43456..b3195609eea 100644 --- a/Dockerfile.pglite +++ b/Dockerfile.pglite @@ -196,6 +196,8 @@ ENV \ MODELSCOPE_API_KEY="" MODELSCOPE_MODEL_LIST="" MODELSCOPE_PROXY_URL="" \ # Moonshot MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \ + # Nebius + NEBIUS_API_KEY="" NEBIUS_MODEL_LIST="" NEBIUS_PROXY_URL="" \ # Novita NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \ # Nvidia NIM diff --git a/changelog/v1.json b/changelog/v1.json index 8d80aac21be..4c01b444162 100644 --- a/changelog/v1.json +++ b/changelog/v1.json @@ -1,4 +1,18 @@ [ + { + "children": { + "improvements": ["Adjust ControlsForm component to adapt to mobile phone display."] + }, + "date": "2025-09-01", + "version": "1.120.4" + }, + { + "children": { + "improvements": ["Support new provider Nebius."] + }, + "date": "2025-09-01", + "version": "1.120.3" + }, { "children": { "improvements": ["Remove base path."] diff --git a/package.json b/package.json index 8f9ec567007..d1ecf05da66 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lobehub/chat", - "version": "1.120.2", + "version": "1.120.4", "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.", "keywords": [ "framework", @@ -192,7 +192,7 @@ "fast-deep-equal": "^3.1.3", "file-type": "^21.0.0", "framer-motion": "^12.23.12", - "gpt-tokenizer": "^2.9.0", + "gpt-tokenizer": "^3.0.0", "gray-matter": "^4.0.3", "html-to-text": "^9.0.5", "i18next": "^25.3.2", diff --git a/packages/model-bank/package.json b/packages/model-bank/package.json index 002a5496378..87f3dfbeca2 100644 --- a/packages/model-bank/package.json +++ b/packages/model-bank/package.json @@ -36,6 +36,7 @@ "./mistral": "./src/aiModels/mistral.ts", "./modelscope": "./src/aiModels/modelscope.ts", "./moonshot": "./src/aiModels/moonshot.ts", + "./nebius": "./src/aiModels/nebius.ts", "./novita": "./src/aiModels/novita.ts", "./nvidia": "./src/aiModels/nvidia.ts", "./ollama": "./src/aiModels/ollama.ts", diff --git a/packages/model-bank/src/aiModels/index.ts b/packages/model-bank/src/aiModels/index.ts index 7372f246b63..acfb8be8aeb 100644 --- a/packages/model-bank/src/aiModels/index.ts +++ b/packages/model-bank/src/aiModels/index.ts @@ -31,6 +31,7 @@ import { default as minimax } from './minimax'; import { default as mistral } from './mistral'; import { default as modelscope } from './modelscope'; import { default as moonshot } from './moonshot'; +import { default as nebius } from './nebius'; import { default as novita } from './novita'; import { default as nvidia } from './nvidia'; import { default as ollama } from './ollama'; @@ -111,6 +112,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({ mistral, modelscope, moonshot, + nebius, novita, nvidia, ollama, @@ -173,6 +175,7 @@ export { default as minimax } from './minimax'; export { default as mistral } from './mistral'; export { default as modelscope } from './modelscope'; export { default as moonshot } from './moonshot'; +export { default as nebius } from './nebius'; export { default as novita } from './novita'; export { default as nvidia } from './nvidia'; export { default as ollama } from './ollama'; diff --git a/packages/model-bank/src/aiModels/nebius.ts b/packages/model-bank/src/aiModels/nebius.ts new file mode 100644 index 00000000000..2e73f4ead7c --- /dev/null +++ b/packages/model-bank/src/aiModels/nebius.ts @@ -0,0 +1,1046 @@ +import { AIChatModelCard } from '../types/aiModel'; + +// https://studio.nebius.com/ + +const nebiusChatModels: AIChatModelCard[] = [ + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Kimi-K2-Instruct', + id: 'moonshotai/Kimi-K2-Instruct', + organization: 'moonshotai', + pricing: { + units: [ + { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 2.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 262_144, + displayName: 'Qwen/Qwen3-Coder-480B-A35B-Instruct', + id: 'Qwen/Qwen3-Coder-480B-A35B-Instruct', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 1.8, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 131_072, + displayName: 'gpt-oss-120b', + enabled: true, + id: 'openai/gpt-oss-120b', + organization: 'openai', + pricing: { + units: [ + { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 131_072, + displayName: 'gpt-oss-20b', + id: 'openai/gpt-oss-20b', + organization: 'openai', + pricing: { + units: [ + { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 131_072, + displayName: 'GLM-4.5', + id: 'zai-org/GLM-4.5', + organization: 'zai-org', + pricing: { + units: [ + { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 2.2, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 131_072, + displayName: 'GLM-4.5-Air', + id: 'zai-org/GLM-4.5-Air', + organization: 'zai-org', + pricing: { + units: [ + { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 1.2, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 163_840, + displayName: 'DeepSeek-R1-0528', + id: 'deepseek-ai/DeepSeek-R1-0528', + organization: 'deepseek', + pricing: { + units: [ + { name: 'textInput', rate: 0.8, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 2.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 32_768, + displayName: 'DeepSeek-R1-0528 (fast)', + id: 'deepseek-ai/DeepSeek-R1-0528-fast', + organization: 'deepseek', + pricing: { + units: [ + { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 6, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 262_144, + displayName: 'Qwen3-235B-A22B-Instruct-2507', + id: 'Qwen/Qwen3-235B-A22B-Instruct-2507', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 40_960, + displayName: 'Qwen3-235B-A22B', + id: 'Qwen/Qwen3-235B-A22B', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 40_960, + displayName: 'Qwen3-30B-A3B', + id: 'Qwen/Qwen3-30B-A3B', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 40_960, + displayName: 'Qwen3-30B-A3B (fast)', + id: 'Qwen/Qwen3-30B-A3B-fast', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.9, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 40_960, + displayName: 'Qwen3-32B', + id: 'Qwen/Qwen3-32B', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 40_960, + displayName: 'Qwen3-32B (fast)', + id: 'Qwen/Qwen3-32B-fast', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 40_960, + displayName: 'Qwen3-14B', + id: 'Qwen/Qwen3-14B', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.24, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 40_960, + displayName: 'Qwen3-4B (fast)', + id: 'Qwen/Qwen3-4B-fast', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.24, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 131_072, + displayName: 'Llama-3_1-Nemotron-Ultra-253B-v1', + id: 'nvidia/Llama-3_1-Nemotron-Ultra-253B-v1', + organization: 'nvidia', + pricing: { + units: [ + { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 1.8, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 163_840, + displayName: 'DeepSeek-V3-0324', + id: 'deepseek-ai/DeepSeek-V3-0324', + organization: 'deepseek', + pricing: { + units: [ + { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 32_768, + displayName: 'DeepSeek-V3-0324 (fast)', + id: 'deepseek-ai/DeepSeek-V3-0324-fast', + organization: 'deepseek', + pricing: { + units: [ + { name: 'textInput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 2.25, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 163_840, + displayName: 'DeepSeek-V3', + id: 'deepseek-ai/DeepSeek-V3', + organization: 'deepseek', + pricing: { + units: [ + { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 163_840, + displayName: 'DeepSeek-R1', + id: 'deepseek-ai/DeepSeek-R1', + organization: 'deepseek', + pricing: { + units: [ + { name: 'textInput', rate: 0.8, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 2.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 163_840, + displayName: 'DeepSeek-R1 (fast)', + id: 'deepseek-ai/DeepSeek-R1-fast', + organization: 'deepseek', + pricing: { + units: [ + { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 6, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Llama-3.3-70B-Instruct', + id: 'meta-llama/Llama-3.3-70B-Instruct', + organization: 'meta', + pricing: { + units: [ + { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Llama-3.3-70B-Instruct (fast)', + id: 'meta-llama/Llama-3.3-70B-Instruct-fast', + organization: 'meta', + pricing: { + units: [ + { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Meta-Llama-3.1-70B-Instruct', + id: 'meta-llama/Meta-Llama-3.1-70B-Instruct', + organization: 'meta', + pricing: { + units: [ + { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Meta-Llama-3.1-8B-Instruct', + id: 'meta-llama/Meta-Llama-3.1-8B-Instruct', + organization: 'meta', + pricing: { + units: [ + { name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Meta-Llama-3.1-8B-Instruct (fast)', + id: 'meta-llama/Meta-Llama-3.1-8B-Instruct-fast', + organization: 'meta', + pricing: { + units: [ + { name: 'textInput', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.09, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Meta-Llama-3.1-405B-Instruct', + id: 'meta-llama/Meta-Llama-3.1-405B-Instruct', + organization: 'meta', + pricing: { + units: [ + { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + contextWindowTokens: 128_000, + displayName: 'Mistral-Nemo-Instruct-2407', + id: 'mistralai/Mistral-Nemo-Instruct-2407', + organization: 'mistralai', + pricing: { + units: [ + { name: 'textInput', rate: 0.04, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.12, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 32_768, + displayName: 'Qwen2.5-Coder-7B', + id: 'Qwen/Qwen2.5-Coder-7B', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 32_768, + displayName: 'Qwen2.5-Coder-7B (fast)', + id: 'Qwen/Qwen2.5-Coder-7B-fast', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.09, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Qwen2.5-Coder-32B-Instruct', + id: 'Qwen/Qwen2.5-Coder-32B-Instruct', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.18, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Qwen2.5-Coder-32B-Instruct (fast)', + id: 'Qwen/Qwen2.5-Coder-32B-Instruct-fast', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + contextWindowTokens: 8192, + displayName: 'Gemma-2-2b-it', + id: 'google/gemma-2-2b-it', + organization: 'google', + pricing: { + units: [ + { name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 8192, + displayName: 'Gemma-2-9b-it (fast)', + id: 'google/gemma-2-9b-it-fast', + organization: 'google', + pricing: { + units: [ + { name: 'textInput', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.09, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Qwen2.5-32B-Instruct', + id: 'Qwen/Qwen2.5-32B-Instruct', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Qwen2.5-32B-Instruct (fast)', + id: 'Qwen/Qwen2.5-32B-Instruct-fast', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Qwen2.5-72B-Instruct', + id: 'Qwen/Qwen2.5-72B-Instruct', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Qwen2.5-72B-Instruct (fast)', + id: 'Qwen/Qwen2.5-72B-Instruct-fast', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 8192, + displayName: 'Llama3-OpenBioLLM-70B', + id: 'aaditya/Llama3-OpenBioLLM-70B', + organization: 'aaditya', + pricing: { + units: [ + { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 131_072, + displayName: 'QwQ-32B', + id: 'Qwen/QwQ-32B', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.45, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 131_072, + displayName: 'QwQ-32B (fast)', + id: 'Qwen/QwQ-32B-fast', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 16_384, + displayName: 'phi-4', + id: 'microsoft/phi-4', + organization: 'microsoft', + pricing: { + units: [ + { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Hermes-3-Llama-3.1-405B', + id: 'NousResearch/Hermes-3-Llama-405B', + organization: 'NousResearch', + pricing: { + units: [ + { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Hermes-4-70B', + id: 'NousResearch/Hermes-4-70B', + organization: 'NousResearch', + pricing: { + units: [ + { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Hermes-4-405B', + id: 'NousResearch/Hermes-4-405B', + organization: 'NousResearch', + pricing: { + units: [ + { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 131_072, + displayName: 'DeepSeek-R1-Distill-Llama-70B', + id: 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', + organization: 'deepseek', + pricing: { + units: [ + { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 128_000, + displayName: 'Devstral-Small-2505', + id: 'mistralai/Devstral-Small-2505', + organization: 'mistralai', + pricing: { + units: [ + { name: 'textInput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.24, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Llama-3_3-Nemotron-Super-49B-v1', + id: 'nvidia/Llama-3_3-Nemotron-Super-49B-v1', + organization: 'nvidia', + pricing: { + units: [ + { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 262_144, + displayName: 'Qwen3-30B-A3B-Thinking-2507', + id: 'Qwen/Qwen3-30B-A3B-Thinking-2507', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 262_144, + displayName: 'Qwen3-30B-A3B-Instruct-2507', + id: 'Qwen/Qwen3-30B-A3B-Instruct-2507', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 262_144, + displayName: 'Qwen3-Coder-30B-A3B-Instruct', + id: 'Qwen/Qwen3-Coder-30B-A3B-Instruct', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 131_072, + displayName: 'Meta-Llama-Guard-3-8B', + id: 'meta-llama/Llama-Guard-3-8B', + organization: 'meta', + pricing: { + units: [ + { name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + vision: true, + }, + contextWindowTokens: 32_768, + displayName: 'Qwen2-VL-72B-Instruct', + id: 'Qwen/Qwen2-VL-72B-Instruct', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + vision: true, + }, + contextWindowTokens: 131_072, + displayName: 'Mistral-Small-3.1-24B-Instruct-2503', + id: 'mistralai/Mistral-Small-3.1-24B-Instruct-2503', + organization: 'mistralai', + pricing: { + units: [ + { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + vision: true, + }, + contextWindowTokens: 110_000, + displayName: 'Gemma-3-27b-it', + id: 'google/gemma-3-27b-it', + organization: 'google', + pricing: { + units: [ + { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + vision: true, + }, + contextWindowTokens: 110_000, + displayName: 'Gemma-3-27b-it (fast)', + id: 'google/gemma-3-27b-it-fast', + organization: 'google', + pricing: { + units: [ + { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + vision: true, + }, + contextWindowTokens: 32_000, + displayName: 'Qwen2.5-VL-72B-Instruct', + id: 'Qwen/Qwen2.5-VL-72B-Instruct', + organization: 'Qwen', + pricing: { + units: [ + { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + type: 'chat', + }, +]; + +// 下述模型待验证 + +// export const nebiusImageModels: AIImageModelCard[] = [ +// { +// contextWindowTokens: 0, +// displayName: 'FLUX.1-dev', +// id: 'black-forest-labs/flux-dev', +// pricing: { +// units: [ +// { name: 'imageGeneration', rate: 0.007, strategy: 'fixed', unit: 'image' }, +// ], +// }, +// type: 'image', +// }, +// { +// contextWindowTokens: 0, +// displayName: 'FLUX.1-schnell', +// id: 'black-forest-labs/flux-schnell', +// pricing: { +// units: [ +// { name: 'imageGeneration', rate: 0.0013, strategy: 'fixed', unit: 'image' }, +// ], +// }, +// type: 'image', +// }, +// { +// contextWindowTokens: 0, +// displayName: 'Stable Diffusion XL 1.0', +// id: 'stability-ai/sdxl', +// pricing: { +// units: [ +// { name: 'imageGeneration', rate: 0.003, strategy: 'fixed', unit: 'image' }, +// ], +// }, +// type: 'image', +// }, +// ]; + +// export const nebiusEmbeddingModels: AIEmbeddingModelCard[] = [ +// { +// contextWindowTokens: 32_768, +// displayName: 'BGE-ICL', +// id: 'BAAI/bge-en-icl', +// maxDimension: 3072, +// pricing: { +// units: [ +// { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' }, +// ], +// }, +// type: 'embedding', +// }, +// { +// contextWindowTokens: 8192, +// displayName: 'bge-multilingual-gemma2', +// id: 'BAAI/bge-multilingual-gemma2', +// maxDimension: 3072, +// pricing: { +// units: [ +// { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' }, +// ], +// }, +// type: 'embedding', +// }, +// { +// contextWindowTokens: 32_768, +// displayName: 'e5-mistral-7b-instruct', +// id: 'intfloat/e5-mistral-7b-instruct', +// maxDimension: 1536, +// pricing: { +// units: [ +// { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' }, +// ], +// }, +// type: 'embedding', +// }, +// { +// contextWindowTokens: 40_960, +// displayName: 'Qwen3-Embedding-8B', +// id: 'Qwen/Qwen3-Embedding-8B', +// maxDimension: 3072, +// pricing: { +// units: [ +// { name: 'textInput', rate: 0.01, strategy: 'fixed', unit: 'millionTokens' }, +// ], +// }, +// type: 'embedding', +// }, +// ]; + +export const allModels = [...nebiusChatModels]; + +export default allModels; diff --git a/packages/model-bank/src/aiModels/openrouter.ts b/packages/model-bank/src/aiModels/openrouter.ts index 6ffd708b1bf..2b74d6c07c2 100644 --- a/packages/model-bank/src/aiModels/openrouter.ts +++ b/packages/model-bank/src/aiModels/openrouter.ts @@ -3,7 +3,7 @@ import { AIChatModelCard } from '../types/aiModel'; // https://openrouter.ai/docs/api-reference/list-available-models const openrouterChatModels: AIChatModelCard[] = [ { - contextWindowTokens: 128_000, + contextWindowTokens: 2_000_000, description: '根据上下文长度、主题和复杂性,你的请求将发送到 Llama 3 70B Instruct、Claude 3.5 Sonnet(自我调节)或 GPT-4o。', displayName: 'Auto (best for prompt)', @@ -184,7 +184,7 @@ const openrouterChatModels: AIChatModelCard[] = [ abilities: { reasoning: true, }, - contextWindowTokens: 40_960, + contextWindowTokens: 131_072, description: 'Qwen3-235B-A22B 是由 Qwen 开发的 235B 参数专家混合 (MoE) 模型,每次前向传递激活 22B 参数。它支持在用于复杂推理、数学和代码任务的“思考”模式与用于一般对话效率的“非思考”模式之间无缝切换。该模型展示了强大的推理能力、多语言支持(100 多种语言和方言)、高级指令遵循和代理工具调用能力。它原生处理 32K 令牌上下文窗口,并使用基于 YaRN 的扩展扩展到 131K 令牌。', displayName: 'Qwen3 235B A22B (Free)', @@ -237,41 +237,11 @@ const openrouterChatModels: AIChatModelCard[] = [ }, type: 'chat', }, - { - abilities: { - reasoning: true, - }, - contextWindowTokens: 32_000, - description: - 'GLM-Z1-9B-0414 是由 THUDM 开发的 GLM-4 系列中的 9B 参数语言模型。它采用了最初应用于更大 GLM-Z1 模型的技术,包括扩展强化学习、成对排名对齐以及对数学、代码和逻辑等推理密集型任务的训练。尽管其规模较小,但它在通用推理任务上表现出强大的性能,并在其权重级别中优于许多开源模型。', - displayName: 'GLM Z1 9B (Free)', - id: 'thudm/glm-z1-9b:free', - type: 'chat', - }, - { - contextWindowTokens: 32_000, - description: - 'GLM-4-9B-0414 是 THUDM 开发的 GLM-4 系列中的 90 亿参数语言模型。GLM-4-9B-0414 使用与其较大的 32B 对应模型相同的强化学习和对齐策略进行训练,相对于其规模实现了高性能,使其适用于仍需要强大语言理解和生成能力的资源受限部署。', - displayName: 'GLM 4 9B (Free)', - id: 'thudm/glm-4-9b:free', - type: 'chat', - }, { abilities: { reasoning: true, }, contextWindowTokens: 32_768, - description: - 'GLM-Z1-32B-0414 是 GLM-4-32B 的增强推理变体,专为深度数学、逻辑和面向代码的问题解决而构建。它应用扩展强化学习(任务特定和基于通用成对偏好)来提高复杂多步骤任务的性能。与基础 GLM-4-32B 模型相比,Z1 显著提升了结构化推理和形式化领域的能力。\n\n该模型支持通过提示工程强制执行“思考”步骤,并为长格式输出提供改进的连贯性。它针对代理工作流进行了优化,并支持长上下文(通过 YaRN)、JSON 工具调用和用于稳定推理的细粒度采样配置。非常适合需要深思熟虑、多步骤推理或形式化推导的用例。', - displayName: 'GLM Z1 32B (Free)', - id: 'thudm/glm-z1-32b:free', - type: 'chat', - }, - { - abilities: { - reasoning: true, - }, - contextWindowTokens: 32_000, description: 'GLM-Z1-32B-0414 是 GLM-4-32B 的增强推理变体,专为深度数学、逻辑和面向代码的问题解决而构建。它应用扩展强化学习(任务特定和基于通用成对偏好)来提高复杂多步骤任务的性能。与基础 GLM-4-32B 模型相比,Z1 显著提升了结构化推理和形式化领域的能力。\n\n该模型支持通过提示工程强制执行“思考”步骤,并为长格式输出提供改进的连贯性。它针对代理工作流进行了优化,并支持长上下文(通过 YaRN)、JSON 工具调用和用于稳定推理的细粒度采样配置。非常适合需要深思熟虑、多步骤推理或形式化推导的用例。', displayName: 'GLM Z1 32B', @@ -288,7 +258,7 @@ const openrouterChatModels: AIChatModelCard[] = [ abilities: { reasoning: true, }, - contextWindowTokens: 32_768, + contextWindowTokens: 32_000, description: 'GLM-4-32B-0414 是一个 32B 双语(中英)开放权重语言模型,针对代码生成、函数调用和代理式任务进行了优化。它在 15T 高质量和重推理数据上进行了预训练,并使用人类偏好对齐、拒绝采样和强化学习进一步完善。该模型在复杂推理、工件生成和结构化输出任务方面表现出色,在多个基准测试中达到了与 GPT-4o 和 DeepSeek-V3-0324 相当的性能。', displayName: 'GLM 4 32B (Free)', @@ -715,7 +685,7 @@ const openrouterChatModels: AIChatModelCard[] = [ type: 'chat', }, { - contextWindowTokens: 64_000, + contextWindowTokens: 163_840, description: 'DeepSeek V3 是一个 685B 参数的专家混合模型,是 DeepSeek 团队旗舰聊天模型系列的最新迭代。\n\n它继承了 [DeepSeek V3](/deepseek/deepseek-chat-v3) 模型,并在各种任务上表现出色。', displayName: 'DeepSeek V3 0324', @@ -1018,7 +988,7 @@ const openrouterChatModels: AIChatModelCard[] = [ abilities: { functionCall: true, }, - contextWindowTokens: 32_768, + contextWindowTokens: 131_072, description: 'Llama 3.3 是 Llama 系列最先进的多语言开源大型语言模型,以极低成本体验媲美 405B 模型的性能。基于 Transformer 结构,并通过监督微调(SFT)和人类反馈强化学习(RLHF)提升有用性和安全性。其指令调优版本专为多语言对话优化,在多项行业基准上表现优于众多开源和封闭聊天模型。知识截止日期为 2023 年 12 月', displayName: 'Llama 3.3 70B Instruct', @@ -1035,7 +1005,7 @@ const openrouterChatModels: AIChatModelCard[] = [ abilities: { functionCall: true, }, - contextWindowTokens: 32_768, + contextWindowTokens: 65_536, description: 'Llama 3.3 是 Llama 系列最先进的多语言开源大型语言模型,以极低成本体验媲美 405B 模型的性能。基于 Transformer 结构,并通过监督微调(SFT)和人类反馈强化学习(RLHF)提升有用性和安全性。其指令调优版本专为多语言对话优化,在多项行业基准上表现优于众多开源和封闭聊天模型。知识截止日期为 2023 年 12 月', displayName: 'Llama 3.3 70B Instruct (Free)', @@ -1050,7 +1020,7 @@ const openrouterChatModels: AIChatModelCard[] = [ type: 'chat', }, { - contextWindowTokens: 32_768, + contextWindowTokens: 131_072, description: 'LLaMA 3.1 提供多语言支持,是业界领先的生成模型之一。', displayName: 'Llama 3.1 8B (Free)', id: 'meta-llama/llama-3.1-8b-instruct:free', diff --git a/packages/model-runtime/src/index.ts b/packages/model-runtime/src/index.ts index e948027cf0a..413154896b9 100644 --- a/packages/model-runtime/src/index.ts +++ b/packages/model-runtime/src/index.ts @@ -13,6 +13,7 @@ export { LobeMinimaxAI } from './minimax'; export { LobeMistralAI } from './mistral'; export { ModelRuntime } from './ModelRuntime'; export { LobeMoonshotAI } from './moonshot'; +export { LobeNebiusAI } from './nebius'; export { LobeOllamaAI } from './ollama'; export { LobeOpenAI } from './openai'; export { LobeOpenRouterAI } from './openrouter'; diff --git a/packages/model-runtime/src/nebius/index.ts b/packages/model-runtime/src/nebius/index.ts new file mode 100644 index 00000000000..cdf0b6746b7 --- /dev/null +++ b/packages/model-runtime/src/nebius/index.ts @@ -0,0 +1,78 @@ +import { ModelProvider } from '../types'; +import { processMultiProviderModelList } from '../utils/modelParse'; +import { createOpenAICompatibleRuntime } from '../utils/openaiCompatibleFactory'; + +export interface NebiusModelCard { + id: string; +} + +export const LobeNebiusAI = createOpenAICompatibleRuntime({ + baseURL: 'https://api.studio.nebius.com/v1', + chatCompletion: { + handlePayload: (payload) => { + const { model, ...rest } = payload; + + return { + ...rest, + model, + stream: true, + } as any; + }, + }, + debug: { + chatCompletion: () => process.env.DEBUG_NEBIUS_CHAT_COMPLETION === '1', + }, + models: async ({ client }) => { + const base = (client as any).baseURL || 'https://api.studio.nebius.com/v1'; + const url = `${base.replace(/\/+$/, '')}/models?verbose=true`; + + const res = await fetch(url, { + headers: { + Accept: 'application/json', + Authorization: `Bearer ${client.apiKey}`, + }, + method: 'GET', + }); + + if (!res.ok) { + throw new Error(`Failed to fetch Nebius models: ${res.status} ${res.statusText}`); + } + + const body = (await res.json()) as any; + const rawList = body?.data ?? []; + + const standardList = rawList.map((m: any) => { + const modality = m.architecture?.modality; + let inferredType: string | undefined = undefined; + + if (typeof modality === 'string' && modality.includes('->')) { + const parts = modality.split('->'); + const right = parts[1]?.trim().toLowerCase(); + if (right === 'image') { + inferredType = 'image'; + } + if (right === 'embedding') { + inferredType = 'embedding'; + } + } + + return { + contextWindowTokens: m.context_length ?? undefined, + description: m.description ?? '', + displayName: m.name ?? m.id, + functionCall: m.features?.includes('function-calling'), + id: m.id, + pricing: { + input: m.pricing.prompt * 1_000_000, + output: m.pricing.completion * 1_000_000, + }, + reasoning: m.features?.includes('reasoning'), + type: inferredType, + vision: m.features?.includes('vision'), + }; + }); + + return processMultiProviderModelList(standardList, 'nebius'); + }, + provider: ModelProvider.Nebius, +}); diff --git a/packages/model-runtime/src/runtimeMap.ts b/packages/model-runtime/src/runtimeMap.ts index ca7882a1fdc..6dbb7ef9374 100644 --- a/packages/model-runtime/src/runtimeMap.ts +++ b/packages/model-runtime/src/runtimeMap.ts @@ -29,6 +29,7 @@ import { LobeMinimaxAI } from './minimax'; import { LobeMistralAI } from './mistral'; import { LobeModelScopeAI } from './modelscope'; import { LobeMoonshotAI } from './moonshot'; +import { LobeNebiusAI } from './nebius'; import { LobeNovitaAI } from './novita'; import { LobeNvidiaAI } from './nvidia'; import { LobeOllamaAI } from './ollama'; @@ -89,6 +90,7 @@ export const providerRuntimeMap = { mistral: LobeMistralAI, modelscope: LobeModelScopeAI, moonshot: LobeMoonshotAI, + nebius: LobeNebiusAI, novita: LobeNovitaAI, nvidia: LobeNvidiaAI, ollama: LobeOllamaAI, diff --git a/packages/model-runtime/src/types/type.ts b/packages/model-runtime/src/types/type.ts index 9aee729cddc..d8df11b5733 100644 --- a/packages/model-runtime/src/types/type.ts +++ b/packages/model-runtime/src/types/type.ts @@ -59,6 +59,7 @@ export enum ModelProvider { Mistral = 'mistral', ModelScope = 'modelscope', Moonshot = 'moonshot', + Nebius = 'nebius', Novita = 'novita', Nvidia = 'nvidia', Ollama = 'ollama', diff --git a/packages/types/src/user/settings/keyVaults.ts b/packages/types/src/user/settings/keyVaults.ts index d7cb8fb6b3b..9f15ed431ed 100644 --- a/packages/types/src/user/settings/keyVaults.ts +++ b/packages/types/src/user/settings/keyVaults.ts @@ -68,6 +68,7 @@ export interface UserKeyVaults extends SearchEngineKeyVaults { mistral?: OpenAICompatibleKeyVault; modelscope?: OpenAICompatibleKeyVault; moonshot?: OpenAICompatibleKeyVault; + nebius?: OpenAICompatibleKeyVault; novita?: OpenAICompatibleKeyVault; nvidia?: OpenAICompatibleKeyVault; ollama?: OpenAICompatibleKeyVault; diff --git a/src/config/llm.ts b/src/config/llm.ts index 09ad0af26dd..441ac8a8ce7 100644 --- a/src/config/llm.ts +++ b/src/config/llm.ts @@ -87,6 +87,9 @@ export const getLLMConfig = () => { ENABLED_STEPFUN: z.boolean(), STEPFUN_API_KEY: z.string().optional(), + ENABLED_NEBIUS: z.boolean(), + NEBIUS_API_KEY: z.string().optional(), + ENABLED_NOVITA: z.boolean(), NOVITA_API_KEY: z.string().optional(), @@ -364,6 +367,9 @@ export const getLLMConfig = () => { ENABLED_AIHUBMIX: !!process.env.AIHUBMIX_API_KEY, AIHUBMIX_API_KEY: process.env.AIHUBMIX_API_KEY, + + ENABLED_NEBIUS: !!process.env.NEBIUS_API_KEY, + NEBIUS_API_KEY: process.env.NEBIUS_API_KEY, }, }); }; diff --git a/src/config/modelProviders/index.ts b/src/config/modelProviders/index.ts index 44a68642cf8..f2f60f5e1b1 100644 --- a/src/config/modelProviders/index.ts +++ b/src/config/modelProviders/index.ts @@ -31,6 +31,7 @@ import MinimaxProvider from './minimax'; import MistralProvider from './mistral'; import ModelScopeProvider from './modelscope'; import MoonshotProvider from './moonshot'; +import NebiusProvider from './nebius'; import NovitaProvider from './novita'; import NvidiaProvider from './nvidia'; import OllamaProvider from './ollama'; @@ -175,6 +176,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [ InfiniAIProvider, AkashChatProvider, QiniuProvider, + NebiusProvider, ]; export const filterEnabledModels = (provider: ModelProviderCard) => { @@ -218,6 +220,7 @@ export { default as MinimaxProviderCard } from './minimax'; export { default as MistralProviderCard } from './mistral'; export { default as ModelScopeProviderCard } from './modelscope'; export { default as MoonshotProviderCard } from './moonshot'; +export { default as NebiusProviderCard } from './nebius'; export { default as NovitaProviderCard } from './novita'; export { default as NvidiaProviderCard } from './nvidia'; export { default as OllamaProviderCard } from './ollama'; diff --git a/src/config/modelProviders/nebius.ts b/src/config/modelProviders/nebius.ts new file mode 100644 index 00000000000..68e8a52649e --- /dev/null +++ b/src/config/modelProviders/nebius.ts @@ -0,0 +1,20 @@ +import { ModelProviderCard } from '@/types/llm'; + +const Nebius: ModelProviderCard = { + chatModels: [], + checkModel: 'Qwen/Qwen2.5-Coder-7B', + description: 'Nebius 通过构建大规模GPU集群和垂直整合的云平台,为全球AI创新者提供高性能基础设施。', + id: 'nebius', + modelsUrl: 'https://studio.nebius.com/', + name: 'Nebius', + settings: { + proxyUrl: { + placeholder: 'https://api.studio.nebius.com/v1', + }, + sdkType: 'openai', + showModelFetcher: true, + }, + url: 'https://nebius.com/', +}; + +export default Nebius; diff --git a/src/features/ChatInput/ActionBar/Model/ControlsForm.tsx b/src/features/ChatInput/ActionBar/Model/ControlsForm.tsx index 00905954f91..113316e8e0e 100644 --- a/src/features/ChatInput/ActionBar/Model/ControlsForm.tsx +++ b/src/features/ChatInput/ActionBar/Model/ControlsForm.tsx @@ -1,6 +1,6 @@ import { Form } from '@lobehub/ui'; import type { FormItemProps } from '@lobehub/ui'; -import { Form as AntdForm, Switch } from 'antd'; +import { Form as AntdForm, Switch, Grid } from 'antd'; import isEqual from 'fast-deep-equal'; import Link from 'next/link'; import { memo } from 'react'; @@ -32,11 +32,21 @@ const ControlsForm = memo(() => { const modelExtendParams = useAiInfraStore(aiModelSelectors.modelExtendParams(model, provider)); + const screens = Grid.useBreakpoint(); + const isNarrow = !screens.sm; + + const descWide = { display: 'inline-block', width: 300 } as const; + const descNarrow = { + display: 'block', + maxWidth: '100%', + whiteSpace: 'normal', + } as const; + const items = [ { children: , desc: ( - + 单条对话生成成本最高可降低 90%,响应速度提升 4 倍( { ), label: t('extendParams.disableContextCaching.title'), + layout: isNarrow ? 'vertical' : 'horizontal', minWidth: undefined, name: 'disableContextCaching', }, { children: , desc: ( - + 基于 Claude Thinking 机制限制( { ), label: t('extendParams.enableReasoning.title'), - layout: 'horizontal', + layout: isNarrow ? 'vertical' : 'horizontal', minWidth: undefined, name: 'enableReasoning', }, @@ -132,14 +143,16 @@ const ControlsForm = memo(() => { }, { children: , - desc: t('extendParams.urlContext.desc'), + desc: isNarrow ? ( + {t('extendParams.urlContext.desc')} + ) : ( + t('extendParams.urlContext.desc') + ), label: t('extendParams.urlContext.title'), - layout: 'horizontal', + layout: isNarrow ? 'vertical' : 'horizontal', minWidth: undefined, name: 'urlContext', - style: { - width: 445, - }, + style: isNarrow ? undefined : { width: 445 }, tag: 'urlContext', }, {