From 4dbf5a9e627d3d214bedc65080c442b5264283cb Mon Sep 17 00:00:00 2001 From: gongchao Date: Wed, 24 Sep 2025 22:42:04 +0800 Subject: [PATCH 01/11] feat: support zai as inference provider --- packages/inference/README.md | 1 + .../inference/src/lib/getProviderHelper.ts | 5 ++ packages/inference/src/providers/consts.ts | 1 + packages/inference/src/providers/zai.ts | 39 ++++++++++++++ packages/inference/src/types.ts | 1 + .../inference/test/InferenceClient.spec.ts | 52 +++++++++++++++++++ 6 files changed, 99 insertions(+) create mode 100644 packages/inference/src/providers/zai.ts diff --git a/packages/inference/README.md b/packages/inference/README.md index ed43e0644f..f6493dbb0a 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -64,6 +64,7 @@ Currently, we support the following providers: - [Cohere](https://cohere.com) - [Cerebras](https://cerebras.ai/) - [Groq](https://groq.com) +- [ZAI](https://z.ai/) To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. The default value of the `provider` parameter is "auto", which will select the first of the providers available for the model, sorted by your preferred order in https://hf.co/settings/inference-providers. diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index 3e95eceb8c..c483a2d939 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -49,6 +49,7 @@ import * as Replicate from "../providers/replicate.js"; import * as Sambanova from "../providers/sambanova.js"; import * as Scaleway from "../providers/scaleway.js"; import * as Together from "../providers/together.js"; +import * as Zai from "../providers/zai.js"; import type { InferenceProvider, InferenceProviderOrPolicy, InferenceTask } from "../types.js"; import { InferenceClientInputError } from "../errors.js"; @@ -160,6 +161,10 @@ export const PROVIDERS: Record ZAI model ID here: + * + * https://huggingface.co/api/partners/zai/models + * + * This is a publicly available mapping. + * + * If you want to try to run inference for a new model locally before it's registered on huggingface.co, + * you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes. + * + * - If you work at zai and want to update this mapping, please use the model mapping API we provide on huggingface.co + * - If you're a community member and want to add a new supported HF model to zai, please open an issue on the present repo + * and we will tag zai team members. + * + * Thanks! + */ +import { BaseConversationalTask, BaseTextGenerationTask } from "./providerHelper.js"; + +const ZAI_API_BASE_URL = "https://api.z.ai/api/paas/v4"; + +export class ZaiTextGenerationTask extends BaseTextGenerationTask { + constructor() { + super("zai", ZAI_API_BASE_URL); + } + + override makeRoute(): string { + return "/chat/completions"; + } +} + +export class ZaiConversationalTask extends BaseConversationalTask { + constructor() { + super("zai", ZAI_API_BASE_URL); + } + + override makeRoute(): string { + return "/chat/completions"; + } +} diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts index b31843b99b..55a6ff2e15 100644 --- a/packages/inference/src/types.ts +++ b/packages/inference/src/types.ts @@ -63,6 +63,7 @@ export const INFERENCE_PROVIDERS = [ "sambanova", "scaleway", "together", + "zai", ] as const; export const PROVIDERS_OR_POLICIES = [...INFERENCE_PROVIDERS, "auto"] as const; diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts index 1cc60a43a9..a13df68604 100644 --- a/packages/inference/test/InferenceClient.spec.ts +++ b/packages/inference/test/InferenceClient.spec.ts @@ -2134,6 +2134,58 @@ describe.skip("InferenceClient", () => { }, TIMEOUT ); + describe.concurrent( + "ZAI", + () => { + const client = new InferenceClient(env.HF_ZAI_KEY ?? "dummy"); + + HARDCODED_MODEL_INFERENCE_MAPPING["zai"] = { + "glm-4.5": { + provider: "zai", + hfModelId: "glm-4.5", + providerId: "glm-4.5", + status: "live", + task: "conversational", + }, + }; + + it("chatCompletion", async () => { + const res = await client.chatCompletion({ + model: "glm-4.5", + provider: "zai", + messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], + }); + if (res.choices && res.choices.length > 0) { + const completion = res.choices[0].message?.content; + expect(completion).toContain("two"); + } + }); + + it("chatCompletion stream", async () => { + const stream = client.chatCompletionStream({ + model: "glm-4.5", + provider: "zai", + messages: [{ role: "user", content: "Say 'this is a test'" }], + stream: true, + }) as AsyncGenerator; + + let fullResponse = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const content = chunk.choices[0].delta?.content; + if (content) { + fullResponse += content; + } + } + } + + // Verify we got a meaningful response + expect(fullResponse).toBeTruthy(); + expect(fullResponse.length).toBeGreaterThan(0); + }); + }, + TIMEOUT + ); describe.concurrent( "OVHcloud", () => { From ce9062863fb9c70034bc7473c1d1b4711a4c29bb Mon Sep 17 00:00:00 2001 From: gongchao Date: Wed, 24 Sep 2025 23:03:29 +0800 Subject: [PATCH 02/11] feat: support zai as inference provider --- packages/inference/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/inference/README.md b/packages/inference/README.md index f6493dbb0a..a05ae9c654 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -100,6 +100,7 @@ Only a subset of models are supported when requesting third-party providers. You - [Cerebras supported models](https://huggingface.co/api/partners/cerebras/models) - [Groq supported models](https://console.groq.com/docs/models) - [Novita AI supported models](https://huggingface.co/api/partners/novita/models) +- [ZAI supported models](https://huggingface.co/api/partners/zai/models) ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be more tricky for other tasks like "text-to-image" or "automatic-speech-recognition" where there exists no standard API. Let us know if any help is needed or if we can make things easier for you! From 4eda47a8692982bc415df933dcba287149486980 Mon Sep 17 00:00:00 2001 From: gongchao Date: Thu, 25 Sep 2025 10:17:55 +0800 Subject: [PATCH 03/11] feat: support zai as inference provider --- packages/inference/src/providers/zai.ts | 7 +++++++ packages/inference/test/InferenceClient.spec.ts | 8 ++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/packages/inference/src/providers/zai.ts b/packages/inference/src/providers/zai.ts index 999d8e3569..1b9a7343ca 100644 --- a/packages/inference/src/providers/zai.ts +++ b/packages/inference/src/providers/zai.ts @@ -15,6 +15,7 @@ * Thanks! */ import { BaseConversationalTask, BaseTextGenerationTask } from "./providerHelper.js"; +import type { HeaderParams } from "../types.js"; const ZAI_API_BASE_URL = "https://api.z.ai/api/paas/v4"; @@ -23,6 +24,12 @@ export class ZaiTextGenerationTask extends BaseTextGenerationTask { super("zai", ZAI_API_BASE_URL); } + override prepareHeaders(params: HeaderParams, binary: boolean): Record { + const headers = super.prepareHeaders(params, binary); + headers["x-source-channel"] = "hugging_face"; + return headers; + } + override makeRoute(): string { return "/chat/completions"; } diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts index a13df68604..3476adb025 100644 --- a/packages/inference/test/InferenceClient.spec.ts +++ b/packages/inference/test/InferenceClient.spec.ts @@ -2140,9 +2140,9 @@ describe.skip("InferenceClient", () => { const client = new InferenceClient(env.HF_ZAI_KEY ?? "dummy"); HARDCODED_MODEL_INFERENCE_MAPPING["zai"] = { - "glm-4.5": { + "zai-org/glm-4.5": { provider: "zai", - hfModelId: "glm-4.5", + hfModelId: "zai-org/glm-4.5", providerId: "glm-4.5", status: "live", task: "conversational", @@ -2151,7 +2151,7 @@ describe.skip("InferenceClient", () => { it("chatCompletion", async () => { const res = await client.chatCompletion({ - model: "glm-4.5", + model: "zai-org/glm-4.5", provider: "zai", messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], }); @@ -2163,7 +2163,7 @@ describe.skip("InferenceClient", () => { it("chatCompletion stream", async () => { const stream = client.chatCompletionStream({ - model: "glm-4.5", + model: "zai-org/glm-4.5", provider: "zai", messages: [{ role: "user", content: "Say 'this is a test'" }], stream: true, From ccd45cae706ff4605a9dfcb74c95161d7a2fa3f4 Mon Sep 17 00:00:00 2001 From: gongchao Date: Thu, 25 Sep 2025 14:42:42 +0800 Subject: [PATCH 04/11] feat: support zai as inference provider --- packages/inference/test/InferenceClient.spec.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts index 3476adb025..0fb9c754f6 100644 --- a/packages/inference/test/InferenceClient.spec.ts +++ b/packages/inference/test/InferenceClient.spec.ts @@ -2140,9 +2140,9 @@ describe.skip("InferenceClient", () => { const client = new InferenceClient(env.HF_ZAI_KEY ?? "dummy"); HARDCODED_MODEL_INFERENCE_MAPPING["zai"] = { - "zai-org/glm-4.5": { + "zai-org/GLM-4.5": { provider: "zai", - hfModelId: "zai-org/glm-4.5", + hfModelId: "zai-org/GLM-4.5", providerId: "glm-4.5", status: "live", task: "conversational", @@ -2151,7 +2151,7 @@ describe.skip("InferenceClient", () => { it("chatCompletion", async () => { const res = await client.chatCompletion({ - model: "zai-org/glm-4.5", + model: "zai-org/GLM-4.5", provider: "zai", messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], }); @@ -2163,7 +2163,7 @@ describe.skip("InferenceClient", () => { it("chatCompletion stream", async () => { const stream = client.chatCompletionStream({ - model: "zai-org/glm-4.5", + model: "zai-org/GLM-4.5", provider: "zai", messages: [{ role: "user", content: "Say 'this is a test'" }], stream: true, From 3a4b909e563ef7e86ffc600045e22be25f0579b9 Mon Sep 17 00:00:00 2001 From: tomsun28 Date: Thu, 25 Sep 2025 16:30:40 +0800 Subject: [PATCH 05/11] feat: support zai as inference provider --- packages/inference/src/lib/getProviderHelper.ts | 1 - packages/inference/src/providers/zai.ts | 14 ++------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index bd2e10d5da..f49b1661d8 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -167,7 +167,6 @@ export const PROVIDERS: Record Date: Thu, 25 Sep 2025 17:17:53 +0800 Subject: [PATCH 06/11] Update packages/inference/src/providers/zai.ts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: célina --- packages/inference/src/providers/zai.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/inference/src/providers/zai.ts b/packages/inference/src/providers/zai.ts index 518170d84e..43cff6856f 100644 --- a/packages/inference/src/providers/zai.ts +++ b/packages/inference/src/providers/zai.ts @@ -27,6 +27,7 @@ export class ZaiConversationalTask extends BaseConversationalTask { override prepareHeaders(params: HeaderParams, binary: boolean): Record { const headers = super.prepareHeaders(params, binary); headers["x-source-channel"] = "hugging_face"; + headers["accept-language"] = "en-US,en"; return headers; } From de7ea60103e7f028018b14f6868671abe4eaac7c Mon Sep 17 00:00:00 2001 From: Tom Date: Fri, 26 Sep 2025 01:18:00 +0800 Subject: [PATCH 07/11] Update packages/inference/src/providers/zai.ts Co-authored-by: Simon Brandeis <33657802+SBrandeis@users.noreply.github.com> --- packages/inference/src/providers/zai.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/inference/src/providers/zai.ts b/packages/inference/src/providers/zai.ts index 43cff6856f..0eceda9909 100644 --- a/packages/inference/src/providers/zai.ts +++ b/packages/inference/src/providers/zai.ts @@ -17,7 +17,7 @@ import { BaseConversationalTask } from "./providerHelper.js"; import type { HeaderParams } from "../types.js"; -const ZAI_API_BASE_URL = "https://api.z.ai/api/paas/v4"; +const ZAI_API_BASE_URL = "https://api.z.ai"; export class ZaiConversationalTask extends BaseConversationalTask { constructor() { From 0d0e6d1c9e1a3a6e384753d567228c4fe630d5f6 Mon Sep 17 00:00:00 2001 From: Tom Date: Fri, 26 Sep 2025 01:18:09 +0800 Subject: [PATCH 08/11] Update packages/inference/src/providers/zai.ts Co-authored-by: Simon Brandeis <33657802+SBrandeis@users.noreply.github.com> --- packages/inference/src/providers/zai.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/inference/src/providers/zai.ts b/packages/inference/src/providers/zai.ts index 0eceda9909..e40d3fdf3a 100644 --- a/packages/inference/src/providers/zai.ts +++ b/packages/inference/src/providers/zai.ts @@ -32,6 +32,6 @@ export class ZaiConversationalTask extends BaseConversationalTask { } override makeRoute(): string { - return "/chat/completions"; + return "/api/paas/v4/chat/completions"; } } From 0763c96a11eb563ade0897a8f47e7a39f65fd2aa Mon Sep 17 00:00:00 2001 From: tomsun28 Date: Fri, 26 Sep 2025 18:34:46 +0800 Subject: [PATCH 09/11] feat: support zai-org as inference provider --- packages/inference/src/lib/getProviderHelper.ts | 2 +- packages/inference/src/providers/consts.ts | 2 +- packages/inference/src/providers/zai.ts | 2 +- packages/inference/src/types.ts | 2 +- packages/inference/test/InferenceClient.spec.ts | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index f49b1661d8..eedbf25561 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -165,7 +165,7 @@ export const PROVIDERS: Record { diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts index 532a1338e4..c218ebbfdb 100644 --- a/packages/inference/src/types.ts +++ b/packages/inference/src/types.ts @@ -64,7 +64,7 @@ export const INFERENCE_PROVIDERS = [ "sambanova", "scaleway", "together", - "zai", + "zai-org", ] as const; export const PROVIDERS_OR_POLICIES = [...INFERENCE_PROVIDERS, "auto"] as const; diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts index 226671ad57..52d27253d6 100644 --- a/packages/inference/test/InferenceClient.spec.ts +++ b/packages/inference/test/InferenceClient.spec.ts @@ -2139,9 +2139,9 @@ describe.skip("InferenceClient", () => { () => { const client = new InferenceClient(env.HF_ZAI_KEY ?? "dummy"); - HARDCODED_MODEL_INFERENCE_MAPPING["zai"] = { + HARDCODED_MODEL_INFERENCE_MAPPING["zai-org"] = { "zai-org/GLM-4.5": { - provider: "zai", + provider: "zai-org", hfModelId: "zai-org/GLM-4.5", providerId: "glm-4.5", status: "live", @@ -2152,7 +2152,7 @@ describe.skip("InferenceClient", () => { it("chatCompletion", async () => { const res = await client.chatCompletion({ model: "zai-org/GLM-4.5", - provider: "zai", + provider: "zai-org", messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], }); if (res.choices && res.choices.length > 0) { @@ -2164,7 +2164,7 @@ describe.skip("InferenceClient", () => { it("chatCompletion stream", async () => { const stream = client.chatCompletionStream({ model: "zai-org/GLM-4.5", - provider: "zai", + provider: "zai-org", messages: [{ role: "user", content: "Say 'this is a test'" }], stream: true, }) as AsyncGenerator; From 42926d270374fe55d25144a8ae50ea212f102901 Mon Sep 17 00:00:00 2001 From: tomsun28 Date: Fri, 26 Sep 2025 18:38:10 +0800 Subject: [PATCH 10/11] feat: support zai-org as inference provider --- packages/inference/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/inference/README.md b/packages/inference/README.md index be73e7ff02..28fe8b097f 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -65,7 +65,7 @@ Currently, we support the following providers: - [Cohere](https://cohere.com) - [Cerebras](https://cerebras.ai/) - [Groq](https://groq.com) -- [ZAI](https://z.ai/) +- [Z.ai](https://z.ai/) To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. The default value of the `provider` parameter is "auto", which will select the first of the providers available for the model, sorted by your preferred order in https://hf.co/settings/inference-providers. @@ -101,7 +101,7 @@ Only a subset of models are supported when requesting third-party providers. You - [Cerebras supported models](https://huggingface.co/api/partners/cerebras/models) - [Groq supported models](https://console.groq.com/docs/models) - [Novita AI supported models](https://huggingface.co/api/partners/novita/models) -- [ZAI supported models](https://huggingface.co/api/partners/zai/models) +- [Z.ai supported models](https://huggingface.co/api/partners/zai/models) ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be more tricky for other tasks like "text-to-image" or "automatic-speech-recognition" where there exists no standard API. Let us know if any help is needed or if we can make things easier for you! From df4703a0883a25196d7396160654a7e1a263b5af Mon Sep 17 00:00:00 2001 From: SBrandeis Date: Fri, 26 Sep 2025 12:57:24 +0200 Subject: [PATCH 11/11] rename --- packages/inference/README.md | 2 +- packages/inference/src/lib/getProviderHelper.ts | 2 +- packages/inference/src/providers/{zai.ts => zai-org.ts} | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename packages/inference/src/providers/{zai.ts => zai-org.ts} (95%) diff --git a/packages/inference/README.md b/packages/inference/README.md index 28fe8b097f..9e686d9089 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -101,7 +101,7 @@ Only a subset of models are supported when requesting third-party providers. You - [Cerebras supported models](https://huggingface.co/api/partners/cerebras/models) - [Groq supported models](https://console.groq.com/docs/models) - [Novita AI supported models](https://huggingface.co/api/partners/novita/models) -- [Z.ai supported models](https://huggingface.co/api/partners/zai/models) +- [Z.ai supported models](https://huggingface.co/api/partners/zai-org/models) ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be more tricky for other tasks like "text-to-image" or "automatic-speech-recognition" where there exists no standard API. Let us know if any help is needed or if we can make things easier for you! diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index eedbf25561..d863d5dc5c 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -50,7 +50,7 @@ import * as Replicate from "../providers/replicate.js"; import * as Sambanova from "../providers/sambanova.js"; import * as Scaleway from "../providers/scaleway.js"; import * as Together from "../providers/together.js"; -import * as Zai from "../providers/zai.js"; +import * as Zai from "../providers/zai-org.js"; import type { InferenceProvider, InferenceProviderOrPolicy, InferenceTask } from "../types.js"; import { InferenceClientInputError } from "../errors.js"; diff --git a/packages/inference/src/providers/zai.ts b/packages/inference/src/providers/zai-org.ts similarity index 95% rename from packages/inference/src/providers/zai.ts rename to packages/inference/src/providers/zai-org.ts index 7976400ad5..4246da5514 100644 --- a/packages/inference/src/providers/zai.ts +++ b/packages/inference/src/providers/zai-org.ts @@ -1,7 +1,7 @@ /** * See the registered mapping of HF model ID => ZAI model ID here: * - * https://huggingface.co/api/partners/zai/models + * https://huggingface.co/api/partners/zai-org/models * * This is a publicly available mapping. *