From 6294dd8d4a448a2cb8d3cfb125a91d9771567452 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Wed, 2 Jul 2025 11:54:44 +0200 Subject: [PATCH 1/2] [Responses API] Structured output --- .../examples/structured_output.js | 32 +++++++++++++++++++ .../responses-server/src/routes/responses.ts | 30 ++++++++++++----- packages/responses-server/src/schemas.ts | 24 +++++++++++++- 3 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 packages/responses-server/examples/structured_output.js diff --git a/packages/responses-server/examples/structured_output.js b/packages/responses-server/examples/structured_output.js new file mode 100644 index 0000000000..f8b3eccf05 --- /dev/null +++ b/packages/responses-server/examples/structured_output.js @@ -0,0 +1,32 @@ +import OpenAI from "openai"; +import { zodTextFormat } from "openai/helpers/zod"; +import { z } from "zod"; + +const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN }); + +const Step = z.object({ + explanation: z.string(), + output: z.string(), +}); + +const MathReasoning = z.object({ + steps: z.array(Step), + final_answer: z.string(), +}); + +const response = await openai.responses.parse({ + model: "Qwen/Qwen2.5-VL-72B-Instruct", + provider: "cerebras", + input: [ + { + role: "system", + content: "You are a helpful math tutor. Guide the user through the solution step by step.", + }, + { role: "user", content: "how can I solve 8x + 7 = -23" }, + ], + text: { + format: zodTextFormat(MathReasoning, "math_reasoning"), + }, +}); + +console.log(response.output_parsed); diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts index 40350ee770..663383df25 100644 --- a/packages/responses-server/src/routes/responses.ts +++ b/packages/responses-server/src/routes/responses.ts @@ -3,7 +3,11 @@ import { type ValidatedRequest } from "../middleware/validation.js"; import { type CreateResponseParams } from "../schemas.js"; import { generateUniqueId } from "../lib/generateUniqueId.js"; import { InferenceClient } from "@huggingface/inference"; -import type { ChatCompletionInputMessage, ChatCompletionInputMessageChunkType } from "@huggingface/tasks"; +import type { + ChatCompletionInputMessage, + ChatCompletionInputMessageChunkType, + ChatCompletionInput, +} from "@huggingface/tasks"; import type { Response, @@ -69,13 +73,28 @@ export const postCreateResponse = async ( messages.push({ role: "user", content: req.body.input }); } - const payload = { + const payload: ChatCompletionInput = { model: req.body.model, + provider: req.body.provider, messages: messages, max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens, temperature: req.body.temperature, top_p: req.body.top_p, stream: req.body.stream, + response_format: req.body.text?.format + ? { + type: req.body.text.format.type, + json_schema: + req.body.text.format.type === "json_schema" + ? { + description: req.body.text.format.description, + name: req.body.text.format.name, + schema: req.body.text.format.schema, + strict: req.body.text.format.strict, + } + : undefined, + } + : undefined, }; const responseObject: Omit< @@ -225,12 +244,7 @@ export const postCreateResponse = async ( } try { - const chatCompletionResponse = await client.chatCompletion({ - model: req.body.model, - messages: messages, - temperature: req.body.temperature, - top_p: req.body.top_p, - }); + const chatCompletionResponse = await client.chatCompletion(payload); responseObject.status = "completed"; responseObject.output = chatCompletionResponse.choices[0].message.content diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts index 0439e68509..65b437c671 100644 --- a/packages/responses-server/src/schemas.ts +++ b/packages/responses-server/src/schemas.ts @@ -83,6 +83,7 @@ export const createResponseParamsSchema = z.object({ .nullable() .default(null), model: z.string(), + provider: z.string().optional(), // previous_response_id: z.string().nullable().default(null), // reasoning: z.object({ // effort: z.enum(["low", "medium", "high"]).default("medium"), @@ -91,7 +92,28 @@ export const createResponseParamsSchema = z.object({ // store: z.boolean().default(true), stream: z.boolean().default(false), temperature: z.number().min(0).max(2).default(1), - // text: + text: z + .object({ + format: z.union([ + z.object({ + type: z.literal("text"), + }), + z.object({ + type: z.literal("json_object"), + }), + z.object({ + type: z.literal("json_schema"), + name: z + .string() + .max(64, "Must be at most 64 characters") + .regex(/^[a-zA-Z0-9_-]+$/, "Only letters, numbers, underscores, and dashes are allowed"), + description: z.string().optional(), + schema: z.record(z.any()), + strict: z.boolean().default(false), + }), + ]), + }) + .optional(), // tool_choice: // tools: // top_logprobs: z.number().min(0).max(20).nullable().default(null), From 87d5478b96308e56fba9bcde57606e102a32409f Mon Sep 17 00:00:00 2001 From: Wauplin Date: Wed, 2 Jul 2025 12:12:26 +0200 Subject: [PATCH 2/2] add streaming example --- .../examples/structured_output.js | 2 +- .../examples/structured_output_streaming.js | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 packages/responses-server/examples/structured_output_streaming.js diff --git a/packages/responses-server/examples/structured_output.js b/packages/responses-server/examples/structured_output.js index f8b3eccf05..e1496b2006 100644 --- a/packages/responses-server/examples/structured_output.js +++ b/packages/responses-server/examples/structured_output.js @@ -16,7 +16,7 @@ const MathReasoning = z.object({ const response = await openai.responses.parse({ model: "Qwen/Qwen2.5-VL-72B-Instruct", - provider: "cerebras", + provider: "nebius", input: [ { role: "system", diff --git a/packages/responses-server/examples/structured_output_streaming.js b/packages/responses-server/examples/structured_output_streaming.js new file mode 100644 index 0000000000..bdd8c1cf1e --- /dev/null +++ b/packages/responses-server/examples/structured_output_streaming.js @@ -0,0 +1,36 @@ +import { OpenAI } from "openai"; +import { zodTextFormat } from "openai/helpers/zod"; +import { z } from "zod"; + +const CalendarEvent = z.object({ + name: z.string(), + date: z.string(), + participants: z.array(z.string()), +}); + +const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN }); +const stream = openai.responses + .stream({ + model: "Qwen/Qwen2.5-VL-72B-Instruct", + provider: "nebius", + instructions: "Extract the event information.", + input: "Alice and Bob are going to a science fair on Friday.", + text: { + format: zodTextFormat(CalendarEvent, "calendar_event"), + }, + }) + .on("response.refusal.delta", (event) => { + process.stdout.write(event.delta); + }) + .on("response.output_text.delta", (event) => { + process.stdout.write(event.delta); + }) + .on("response.output_text.done", () => { + process.stdout.write("\n"); + }) + .on("response.error", (event) => { + console.error(event.error); + }); + +const result = await stream.finalResponse(); +console.log(result.output_parsed);