huggingface · Wauplin · Jul 2, 2025 · Jul 2, 2025 · Jul 2, 2025
diff --git a/packages/responses-server/examples/structured_output.js b/packages/responses-server/examples/structured_output.js
@@ -0,0 +1,32 @@
+import OpenAI from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const Step = z.object({
+	explanation: z.string(),
+	output: z.string(),
+});
+
+const MathReasoning = z.object({
+	steps: z.array(Step),
+	final_answer: z.string(),
+});
+
+const response = await openai.responses.parse({
+	model: "Qwen/Qwen2.5-VL-72B-Instruct",
+	provider: "nebius",
+	input: [
+		{
+			role: "system",
+			content: "You are a helpful math tutor. Guide the user through the solution step by step.",
+		},
+		{ role: "user", content: "how can I solve 8x + 7 = -23" },
+	],
+	text: {
+		format: zodTextFormat(MathReasoning, "math_reasoning"),
+	},
+});
+
+console.log(response.output_parsed);
diff --git a/packages/responses-server/examples/structured_output_streaming.js b/packages/responses-server/examples/structured_output_streaming.js
@@ -0,0 +1,36 @@
+import { OpenAI } from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+
+const CalendarEvent = z.object({
+	name: z.string(),
+	date: z.string(),
+	participants: z.array(z.string()),
+});
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+const stream = openai.responses
+	.stream({
+		model: "Qwen/Qwen2.5-VL-72B-Instruct",
+		provider: "nebius",
+		instructions: "Extract the event information.",
+		input: "Alice and Bob are going to a science fair on Friday.",
+		text: {
+			format: zodTextFormat(CalendarEvent, "calendar_event"),
+		},
+	})
+	.on("response.refusal.delta", (event) => {
+		process.stdout.write(event.delta);
+	})
+	.on("response.output_text.delta", (event) => {
+		process.stdout.write(event.delta);
+	})
+	.on("response.output_text.done", () => {
+		process.stdout.write("\n");
+	})
+	.on("response.error", (event) => {
+		console.error(event.error);
+	});
+
+const result = await stream.finalResponse();
+console.log(result.output_parsed);
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
@@ -3,7 +3,11 @@ import { type ValidatedRequest } from "../middleware/validation.js";
 import { type CreateResponseParams } from "../schemas.js";
 import { generateUniqueId } from "../lib/generateUniqueId.js";
 import { InferenceClient } from "@huggingface/inference";
-import type { ChatCompletionInputMessage, ChatCompletionInputMessageChunkType } from "@huggingface/tasks";
+import type {
+	ChatCompletionInputMessage,
+	ChatCompletionInputMessageChunkType,
+	ChatCompletionInput,
+} from "@huggingface/tasks";
 
 import type {
 	Response,
@@ -69,13 +73,28 @@ export const postCreateResponse = async (
 		messages.push({ role: "user", content: req.body.input });
 	}
 
-	const payload = {
+	const payload: ChatCompletionInput = {
 		model: req.body.model,
+		provider: req.body.provider,
 		messages: messages,
 		max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
 		temperature: req.body.temperature,
 		top_p: req.body.top_p,
 		stream: req.body.stream,
+		response_format: req.body.text?.format
+			? {
+					type: req.body.text.format.type,
+					json_schema:
+						req.body.text.format.type === "json_schema"
+							? {
+									description: req.body.text.format.description,
+									name: req.body.text.format.name,
+									schema: req.body.text.format.schema,
+									strict: req.body.text.format.strict,
+							  }
+							: undefined,
+			  }
+			: undefined,
 	};
 
 	const responseObject: Omit<
@@ -225,12 +244,7 @@ export const postCreateResponse = async (
 	}
 
 	try {
-		const chatCompletionResponse = await client.chatCompletion({
-			model: req.body.model,
-			messages: messages,
-			temperature: req.body.temperature,
-			top_p: req.body.top_p,
-		});
+		const chatCompletionResponse = await client.chatCompletion(payload);
 
 		responseObject.status = "completed";
 		responseObject.output = chatCompletionResponse.choices[0].message.content

diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
@@ -83,6 +83,7 @@ export const createResponseParamsSchema = z.object({
 		.nullable()
 		.default(null),
 	model: z.string(),
+	provider: z.string().optional(),
 	// previous_response_id: z.string().nullable().default(null),
 	// reasoning: z.object({
 	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),
@@ -91,7 +92,28 @@ export const createResponseParamsSchema = z.object({
 	// store: z.boolean().default(true),
 	stream: z.boolean().default(false),
 	temperature: z.number().min(0).max(2).default(1),
-	// text:
+	text: z
+		.object({
+			format: z.union([
+				z.object({
+					type: z.literal("text"),
+				}),
+				z.object({
+					type: z.literal("json_object"),
+				}),
+				z.object({
+					type: z.literal("json_schema"),
+					name: z
+						.string()
+						.max(64, "Must be at most 64 characters")
+						.regex(/^[a-zA-Z0-9_-]+$/, "Only letters, numbers, underscores, and dashes are allowed"),
+					description: z.string().optional(),
+					schema: z.record(z.any()),
+					strict: z.boolean().default(false),
+				}),
+			]),
+		})
+		.optional(),
 	// tool_choice:
 	// tools:
 	// top_logprobs: z.number().min(0).max(20).nullable().default(null),