huggingface · Wauplin · Jul 1, 2025 · Jul 1, 2025
diff --git a/packages/responses-server/package.json b/packages/responses-server/package.json
@@ -53,11 +53,11 @@
 		"@huggingface/inference": "workspace:^",
 		"@huggingface/tasks": "workspace:^",
 		"express": "^4.18.2",
+		"openai": "^5.8.2",
 		"zod": "^3.22.4"
 	},
 	"devDependencies": {
 		"@types/express": "^4.17.21",
-		"openai": "^5.8.2",
 		"tsx": "^4.7.0"
 	}
 }
diff --git a/packages/responses-server/pnpm-lock.yaml b/packages/responses-server/pnpm-lock.yaml
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
@@ -1,12 +1,14 @@
 import { type Response as ExpressResponse } from "express";
 import { type ValidatedRequest } from "../middleware/validation.js";
-import { type CreateResponse, type Response } from "../schemas.js";
+import { type CreateResponseParams } from "../schemas.js";
 import { generateUniqueId } from "../lib/generateUniqueId.js";
 import { InferenceClient } from "@huggingface/inference";
 import type { ChatCompletionInputMessage, ChatCompletionInputMessageChunkType } from "@huggingface/tasks";
 
+import { type Response as OpenAIResponse } from "openai/resources/responses/responses";
+
 export const postCreateResponse = async (
-	req: ValidatedRequest<CreateResponse>,
+	req: ValidatedRequest<CreateResponseParams>,
 	res: ExpressResponse
 ): Promise<void> => {
 	const apiKey = req.headers.authorization?.split(" ")[1];
@@ -60,7 +62,10 @@ export const postCreateResponse = async (
 			top_p: req.body.top_p,
 		});
 
-		const responseObject: Response = {
+		const responseObject: Omit<
+			OpenAIResponse,
+			"incomplete_details" | "metadata" | "output_text" | "parallel_tool_calls" | "tool_choice" | "tools"
+		> = {
 			object: "response",
 			id: generateUniqueId("resp"),
 			status: "completed",
@@ -81,6 +86,7 @@ export const postCreateResponse = async (
 								{
 									type: "output_text",
 									text: chatCompletionResponse.choices[0].message.content,
+									annotations: [],
 								},
 							],
 						},

diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
@@ -4,7 +4,7 @@ import { z } from "zod";
  * https://platform.openai.com/docs/api-reference/responses/create
  * commented out properties are not supported by the server
  */
-export const createResponseSchema = z.object({
+export const createResponseParamsSchema = z.object({
 	// background: z.boolean().default(false),
 	// include:
 	input: z.union([
@@ -22,15 +22,15 @@ export const createResponseSchema = z.object({
 							}),
 							z.object({
 								type: z.literal("input_image"),
-								// file_id: z.string().optional(),
+								// file_id: z.string().nullable(),
 								image_url: z.string(),
 								// detail: z.enum(["auto", "low", "high"]).default("auto"),
 							}),
 							// z.object({
 							// 	type: z.literal("input_file"),
-							// 	file_data: z.string().optional(),
-							// 	file_id: z.string().optional(),
-							// 	filename: z.string().optional(),
+							// 	file_data: z.string().nullable(),
+							// 	file_id: z.string().nullable(),
+							// 	filename: z.string().nullable(),
 							// }),
 						])
 					),
@@ -46,97 +46,26 @@ export const createResponseSchema = z.object({
 			// ])
 		),
 	]),
-	instructions: z.string().optional(),
-	// max_output_tokens: z.number().min(0).optional(),
-	// max_tool_calls: z.number().min(0).optional(),
-	// metadata: z.record(z.string().max(64), z.string().max(512)).optional(), // + 16 items max
+	instructions: z.string().nullable(),
+	// max_output_tokens: z.number().min(0).nullable(),
+	// max_tool_calls: z.number().min(0).nullable(),
+	// metadata: z.record(z.string().max(64), z.string().max(512)).nullable(), // + 16 items max
 	model: z.string(),
-	// previous_response_id: z.string().optional(),
+	// previous_response_id: z.string().nullable(),
 	// reasoning: z.object({
 	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),
-	// 	summary: z.enum(["auto", "concise", "detailed"]).optional(),
+	// 	summary: z.enum(["auto", "concise", "detailed"]).nullable(),
 	// }),
 	// store: z.boolean().default(true),
 	// stream: z.boolean().default(false),
 	temperature: z.number().min(0).max(2).default(1),
 	// text:
 	// tool_choice:
 	// tools:
-	// top_logprobs: z.number().min(0).max(20).optional(),
+	// top_logprobs: z.number().min(0).max(20).nullable(),
 	top_p: z.number().min(0).max(1).default(1),
 	// truncation: z.enum(["auto", "disabled"]).default("disabled"),
 	// user
 });
 
-export type CreateResponse = z.infer<typeof createResponseSchema>;
-
-export const responseSchema = z.object({
-	object: z.literal("response"),
-	created_at: z.number(),
-	error: z
-		.object({
-			code: z.string(),
-			message: z.string(),
-		})
-		.nullable(),
-	id: z.string(),
-	status: z.enum(["completed", "failed", "in_progress", "cancelled", "queued", "incomplete"]),
-	// incomplete_details: z.object({ reason: z.string() }).optional(),
-	instructions: z.string().optional(),
-	// max_output_tokens: z.number().min(0).optional(),
-	// max_tool_calls: z.number().min(0).optional(),
-	// metadata: z.record(z.string().max(64), z.string().max(512)).optional(), // + 16 items max
-	model: z.string(),
-	output: z.array(
-		z.object({
-			type: z.enum(["message"]),
-			id: z.string(),
-			status: z.enum(["completed", "failed"]),
-			role: z.enum(["assistant"]),
-			content: z.array(
-				z.union([
-					z.object({
-						type: z.literal("output_text"),
-						text: z.string(),
-						// annotations:
-						// logprobs:
-					}),
-					z.object({
-						type: z.literal("refusal"),
-						refusal: z.string(),
-					}),
-				])
-			),
-		})
-		// in practice, should be a z.union of the following:
-		// File search tool call
-		// Function tool call
-		// Web search tool call
-		// Computer tool call
-		// Reasoning
-		// Image generation call
-		// Code interpreter tool call
-		// Local shell call
-		// MCP tool call
-		// MCP list tools
-		// MCP approval request
-	),
-	// parallel_tool_calls: z.boolean(),
-	// previous_response_id: z.string().optional(),
-	// reasoning: z.object({
-	// 	effort: z.enum(["low", "medium", "high"]).optional(),
-	// 	summary: z.enum(["auto", "concise", "detailed"]).optional(),
-	// }),
-	// store: z.boolean(),
-	temperature: z.number(),
-	// text:
-	// tool_choice:
-	// tools:
-	// top_logprobs: z.number().int().min(0).max(20).optional(),
-	top_p: z.number(),
-	// truncation: z.enum(["auto", "disabled"]).default("disabled"),
-	// usage: ...
-	// user
-});
-
-export type Response = z.infer<typeof responseSchema>;
+export type CreateResponseParams = z.infer<typeof createResponseParamsSchema>;
diff --git a/packages/responses-server/src/server.ts b/packages/responses-server/src/server.ts
@@ -1,5 +1,5 @@
 import express, { type Express } from "express";
-import { createResponseSchema } from "./schemas.js";
+import { createResponseParamsSchema } from "./schemas.js";
 import { validateBody } from "./middleware/validation.js";
 import { requestLogger } from "./middleware/logging.js";
 import { postCreateResponse } from "./routes/index.js";
@@ -16,7 +16,7 @@ export const createApp = (): Express => {
 		res.send("hello world");
 	});
 
-	app.post("/v1/responses", validateBody(createResponseSchema), postCreateResponse);
+	app.post("/v1/responses", validateBody(createResponseParamsSchema), postCreateResponse);
 
 	return app;
 };