From 4ba0492c3c37201a914a0f17aafba189952a75ec Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Mon, 30 Jun 2025 16:55:06 +0200
Subject: [PATCH 01/12] first commit

---
 packages/responses-server/.eslintignore       |   1 +
 packages/responses-server/.gitignore          |   1 +
 packages/responses-server/.prettierignore     |   4 +
 packages/responses-server/README.md           |  47 +
 packages/responses-server/package.json        |  60 ++
 packages/responses-server/pnpm-lock.yaml      | 974 ++++++++++++++++++
 .../responses-server/scripts/dummy-call.js    |  29 +
 packages/responses-server/src/index.ts        |  26 +
 .../src/middleware/logging.ts                 |  41 +
 .../src/middleware/validation.ts              |  41 +
 packages/responses-server/src/routes/index.ts |   1 +
 .../responses-server/src/routes/responses.ts  |  11 +
 .../responses-server/src/schemas/responses.ts |   7 +
 packages/responses-server/src/server.ts       |  22 +
 packages/responses-server/tsconfig.json       |  21 +
 pnpm-workspace.yaml                           |   1 +
 16 files changed, 1287 insertions(+)
 create mode 100644 packages/responses-server/.eslintignore
 create mode 100644 packages/responses-server/.gitignore
 create mode 100644 packages/responses-server/.prettierignore
 create mode 100644 packages/responses-server/README.md
 create mode 100644 packages/responses-server/package.json
 create mode 100644 packages/responses-server/pnpm-lock.yaml
 create mode 100755 packages/responses-server/scripts/dummy-call.js
 create mode 100644 packages/responses-server/src/index.ts
 create mode 100644 packages/responses-server/src/middleware/logging.ts
 create mode 100644 packages/responses-server/src/middleware/validation.ts
 create mode 100644 packages/responses-server/src/routes/index.ts
 create mode 100644 packages/responses-server/src/routes/responses.ts
 create mode 100644 packages/responses-server/src/schemas/responses.ts
 create mode 100644 packages/responses-server/src/server.ts
 create mode 100644 packages/responses-server/tsconfig.json

diff --git a/packages/responses-server/.eslintignore b/packages/responses-server/.eslintignore
new file mode 100644
index 0000000000..9edb9afc9d
--- /dev/null
+++ b/packages/responses-server/.eslintignore
@@ -0,0 +1 @@
+dist 
\ No newline at end of file
diff --git a/packages/responses-server/.gitignore b/packages/responses-server/.gitignore
new file mode 100644
index 0000000000..8e5bbf044f
--- /dev/null
+++ b/packages/responses-server/.gitignore
@@ -0,0 +1 @@
+*.py
\ No newline at end of file
diff --git a/packages/responses-server/.prettierignore b/packages/responses-server/.prettierignore
new file mode 100644
index 0000000000..d95d49a2ec
--- /dev/null
+++ b/packages/responses-server/.prettierignore
@@ -0,0 +1,4 @@
+pnpm-lock.yaml
+# In order to avoid code samples to have tabs, they don't display well on npm
+README.md
+dist 
\ No newline at end of file
diff --git a/packages/responses-server/README.md b/packages/responses-server/README.md
new file mode 100644
index 0000000000..272553606e
--- /dev/null
+++ b/packages/responses-server/README.md
@@ -0,0 +1,47 @@
+# @huggingface/responses-server
+
+A lightweight Express.js server supporting Responses API on top of Inference Provider Chat Completion API.
+
+## 📁 Project Structure
+
+```
+responses-server/
+├── src/
+│   ├── index.ts           
+│   ├── server.ts     # Express app configuration (e.g. route definition)
+│   ├── routes/       # Routes implementation
+│   ├── middleware/   # Middlewares (validation + logging)
+│   └── schemas/      # Zod validation schemas
+├── scripts/          # Utility scripts
+├── package.json      # Package configuration
+```
+
+## 🚀 Quick Start
+
+### Development
+
+```bash
+# Install dependencies
+pnpm install
+
+# Start development server
+pnpm dev
+```
+
+Run a simple POST request with 
+
+```bash
+# Make dummy call
+pnpm dummy
+```
+
+## 🛠️ Available Scripts
+
+- `pnpm dev` - Start development server with hot reload
+- `pnpm start` - Start production server
+- `pnpm build` - Build for production
+- `pnpm dummy` - Run test API call
+- `pnpm lint` - Run ESLint with auto-fix
+- `pnpm format` - Format code with Prettier
+- `pnpm test` - Run tests
+- `pnpm check` - Type check with TypeScript
diff --git a/packages/responses-server/package.json b/packages/responses-server/package.json
new file mode 100644
index 0000000000..a0bef8d30d
--- /dev/null
+++ b/packages/responses-server/package.json
@@ -0,0 +1,60 @@
+{
+	"name": "@huggingface/responses-server",
+	"packageManager": "pnpm@10.10.0",
+	"version": "0.1.0",
+	"type": "module",
+	"description": "Server for handling AI responses",
+	"repository": "https://github.com/huggingface/huggingface.js.git",
+	"publishConfig": {
+		"access": "public"
+	},
+	"main": "./dist/index.js",
+	"module": "./dist/index.mjs",
+	"types": "./dist/index.d.ts",
+	"exports": {
+		".": {
+			"types": "./dist/index.d.ts",
+			"require": "./dist/index.js",
+			"import": "./dist/index.mjs"
+		}
+	},
+	"engines": {
+		"node": ">=18"
+	},
+	"source": "index.ts",
+	"scripts": {
+		"build": "tsup src/*.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration",
+		"check": "tsc",
+		"dev": "tsx watch src/index.ts",
+		"dummy": "node scripts/dummy-call.js",
+		"format": "prettier --write .",
+		"format:check": "prettier --check .",
+		"lint": "eslint --quiet --fix --ext .cjs,.ts .",
+		"lint:check": "eslint --ext .cjs,.ts .",
+		"prepublishOnly": "pnpm run build",
+		"prepare": "pnpm run build",
+		"start": "node dist/index.js"
+	},
+	"files": [
+		"src",
+		"dist",
+		"tsconfig.json"
+	],
+	"keywords": [
+		"huggingface",
+		"ai",
+		"llm",
+		"responses-api",
+		"server"
+	],
+	"author": "Hugging Face",
+	"license": "MIT",
+	"dependencies": {
+		"express": "^4.18.2",
+		"zod": "^3.22.4"
+	},
+	"devDependencies": {
+		"@types/express": "^4.17.21",
+		"tsx": "^4.7.0"
+	}
+}
diff --git a/packages/responses-server/pnpm-lock.yaml b/packages/responses-server/pnpm-lock.yaml
new file mode 100644
index 0000000000..c63d12089c
--- /dev/null
+++ b/packages/responses-server/pnpm-lock.yaml
@@ -0,0 +1,974 @@
+lockfileVersion: '9.0'
+
+settings:
+  autoInstallPeers: true
+  excludeLinksFromLockfile: false
+
+importers:
+
+  .:
+    dependencies:
+      express:
+        specifier: ^4.18.2
+        version: 4.21.2
+      zod:
+        specifier: ^3.22.4
+        version: 3.25.67
+    devDependencies:
+      '@types/express':
+        specifier: ^4.17.21
+        version: 4.17.23
+      tsx:
+        specifier: ^4.7.0
+        version: 4.20.3
+
+packages:
+
+  '@esbuild/aix-ppc64@0.25.5':
+    resolution: {integrity: sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==}
+    engines: {node: '>=18'}
+    cpu: [ppc64]
+    os: [aix]
+
+  '@esbuild/android-arm64@0.25.5':
+    resolution: {integrity: sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [android]
+
+  '@esbuild/android-arm@0.25.5':
+    resolution: {integrity: sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==}
+    engines: {node: '>=18'}
+    cpu: [arm]
+    os: [android]
+
+  '@esbuild/android-x64@0.25.5':
+    resolution: {integrity: sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [android]
+
+  '@esbuild/darwin-arm64@0.25.5':
+    resolution: {integrity: sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [darwin]
+
+  '@esbuild/darwin-x64@0.25.5':
+    resolution: {integrity: sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [darwin]
+
+  '@esbuild/freebsd-arm64@0.25.5':
+    resolution: {integrity: sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [freebsd]
+
+  '@esbuild/freebsd-x64@0.25.5':
+    resolution: {integrity: sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [freebsd]
+
+  '@esbuild/linux-arm64@0.25.5':
+    resolution: {integrity: sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@esbuild/linux-arm@0.25.5':
+    resolution: {integrity: sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==}
+    engines: {node: '>=18'}
+    cpu: [arm]
+    os: [linux]
+
+  '@esbuild/linux-ia32@0.25.5':
+    resolution: {integrity: sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==}
+    engines: {node: '>=18'}
+    cpu: [ia32]
+    os: [linux]
+
+  '@esbuild/linux-loong64@0.25.5':
+    resolution: {integrity: sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==}
+    engines: {node: '>=18'}
+    cpu: [loong64]
+    os: [linux]
+
+  '@esbuild/linux-mips64el@0.25.5':
+    resolution: {integrity: sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==}
+    engines: {node: '>=18'}
+    cpu: [mips64el]
+    os: [linux]
+
+  '@esbuild/linux-ppc64@0.25.5':
+    resolution: {integrity: sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==}
+    engines: {node: '>=18'}
+    cpu: [ppc64]
+    os: [linux]
+
+  '@esbuild/linux-riscv64@0.25.5':
+    resolution: {integrity: sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==}
+    engines: {node: '>=18'}
+    cpu: [riscv64]
+    os: [linux]
+
+  '@esbuild/linux-s390x@0.25.5':
+    resolution: {integrity: sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==}
+    engines: {node: '>=18'}
+    cpu: [s390x]
+    os: [linux]
+
+  '@esbuild/linux-x64@0.25.5':
+    resolution: {integrity: sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [linux]
+
+  '@esbuild/netbsd-arm64@0.25.5':
+    resolution: {integrity: sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [netbsd]
+
+  '@esbuild/netbsd-x64@0.25.5':
+    resolution: {integrity: sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [netbsd]
+
+  '@esbuild/openbsd-arm64@0.25.5':
+    resolution: {integrity: sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [openbsd]
+
+  '@esbuild/openbsd-x64@0.25.5':
+    resolution: {integrity: sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [openbsd]
+
+  '@esbuild/sunos-x64@0.25.5':
+    resolution: {integrity: sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [sunos]
+
+  '@esbuild/win32-arm64@0.25.5':
+    resolution: {integrity: sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [win32]
+
+  '@esbuild/win32-ia32@0.25.5':
+    resolution: {integrity: sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==}
+    engines: {node: '>=18'}
+    cpu: [ia32]
+    os: [win32]
+
+  '@esbuild/win32-x64@0.25.5':
+    resolution: {integrity: sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [win32]
+
+  '@types/body-parser@1.19.6':
+    resolution: {integrity: sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==}
+
+  '@types/connect@3.4.38':
+    resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==}
+
+  '@types/express-serve-static-core@4.19.6':
+    resolution: {integrity: sha512-N4LZ2xG7DatVqhCZzOGb1Yi5lMbXSZcmdLDe9EzSndPV2HpWYWzRbaerl2n27irrm94EPpprqa8KpskPT085+A==}
+
+  '@types/express@4.17.23':
+    resolution: {integrity: sha512-Crp6WY9aTYP3qPi2wGDo9iUe/rceX01UMhnF1jmwDcKCFM6cx7YhGP/Mpr3y9AASpfHixIG0E6azCcL5OcDHsQ==}
+
+  '@types/http-errors@2.0.5':
+    resolution: {integrity: sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==}
+
+  '@types/mime@1.3.5':
+    resolution: {integrity: sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==}
+
+  '@types/node@24.0.7':
+    resolution: {integrity: sha512-YIEUUr4yf8q8oQoXPpSlnvKNVKDQlPMWrmOcgzoduo7kvA2UF0/BwJ/eMKFTiTtkNL17I0M6Xe2tvwFU7be6iw==}
+
+  '@types/qs@6.14.0':
+    resolution: {integrity: sha512-eOunJqu0K1923aExK6y8p6fsihYEn/BYuQ4g0CxAAgFc4b/ZLN4CrsRZ55srTdqoiLzU2B2evC+apEIxprEzkQ==}
+
+  '@types/range-parser@1.2.7':
+    resolution: {integrity: sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==}
+
+  '@types/send@0.17.5':
+    resolution: {integrity: sha512-z6F2D3cOStZvuk2SaP6YrwkNO65iTZcwA2ZkSABegdkAh/lf+Aa/YQndZVfmEXT5vgAp6zv06VQ3ejSVjAny4w==}
+
+  '@types/serve-static@1.15.8':
+    resolution: {integrity: sha512-roei0UY3LhpOJvjbIP6ZZFngyLKl5dskOtDhxY5THRSpO+ZI+nzJ+m5yUMzGrp89YRa7lvknKkMYjqQFGwA7Sg==}
+
+  accepts@1.3.8:
+    resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==}
+    engines: {node: '>= 0.6'}
+
+  array-flatten@1.1.1:
+    resolution: {integrity: sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==}
+
+  body-parser@1.20.3:
+    resolution: {integrity: sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==}
+    engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
+
+  bytes@3.1.2:
+    resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
+    engines: {node: '>= 0.8'}
+
+  call-bind-apply-helpers@1.0.2:
+    resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==}
+    engines: {node: '>= 0.4'}
+
+  call-bound@1.0.4:
+    resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==}
+    engines: {node: '>= 0.4'}
+
+  content-disposition@0.5.4:
+    resolution: {integrity: sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==}
+    engines: {node: '>= 0.6'}
+
+  content-type@1.0.5:
+    resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==}
+    engines: {node: '>= 0.6'}
+
+  cookie-signature@1.0.6:
+    resolution: {integrity: sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==}
+
+  cookie@0.7.1:
+    resolution: {integrity: sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==}
+    engines: {node: '>= 0.6'}
+
+  debug@2.6.9:
+    resolution: {integrity: sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==}
+    peerDependencies:
+      supports-color: '*'
+    peerDependenciesMeta:
+      supports-color:
+        optional: true
+
+  depd@2.0.0:
+    resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
+    engines: {node: '>= 0.8'}
+
+  destroy@1.2.0:
+    resolution: {integrity: sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==}
+    engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
+
+  dunder-proto@1.0.1:
+    resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
+    engines: {node: '>= 0.4'}
+
+  ee-first@1.1.1:
+    resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
+
+  encodeurl@1.0.2:
+    resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
+    engines: {node: '>= 0.8'}
+
+  encodeurl@2.0.0:
+    resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==}
+    engines: {node: '>= 0.8'}
+
+  es-define-property@1.0.1:
+    resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==}
+    engines: {node: '>= 0.4'}
+
+  es-errors@1.3.0:
+    resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==}
+    engines: {node: '>= 0.4'}
+
+  es-object-atoms@1.1.1:
+    resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==}
+    engines: {node: '>= 0.4'}
+
+  esbuild@0.25.5:
+    resolution: {integrity: sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==}
+    engines: {node: '>=18'}
+    hasBin: true
+
+  escape-html@1.0.3:
+    resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==}
+
+  etag@1.8.1:
+    resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
+    engines: {node: '>= 0.6'}
+
+  express@4.21.2:
+    resolution: {integrity: sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==}
+    engines: {node: '>= 0.10.0'}
+
+  finalhandler@1.3.1:
+    resolution: {integrity: sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==}
+    engines: {node: '>= 0.8'}
+
+  forwarded@0.2.0:
+    resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
+    engines: {node: '>= 0.6'}
+
+  fresh@0.5.2:
+    resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==}
+    engines: {node: '>= 0.6'}
+
+  fsevents@2.3.3:
+    resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
+    engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
+    os: [darwin]
+
+  function-bind@1.1.2:
+    resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
+
+  get-intrinsic@1.3.0:
+    resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==}
+    engines: {node: '>= 0.4'}
+
+  get-proto@1.0.1:
+    resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==}
+    engines: {node: '>= 0.4'}
+
+  get-tsconfig@4.10.1:
+    resolution: {integrity: sha512-auHyJ4AgMz7vgS8Hp3N6HXSmlMdUyhSUrfBF16w153rxtLIEOE+HGqaBppczZvnHLqQJfiHotCYpNhl0lUROFQ==}
+
+  gopd@1.2.0:
+    resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
+    engines: {node: '>= 0.4'}
+
+  has-symbols@1.1.0:
+    resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==}
+    engines: {node: '>= 0.4'}
+
+  hasown@2.0.2:
+    resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
+    engines: {node: '>= 0.4'}
+
+  http-errors@2.0.0:
+    resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==}
+    engines: {node: '>= 0.8'}
+
+  iconv-lite@0.4.24:
+    resolution: {integrity: sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==}
+    engines: {node: '>=0.10.0'}
+
+  inherits@2.0.4:
+    resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
+
+  ipaddr.js@1.9.1:
+    resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==}
+    engines: {node: '>= 0.10'}
+
+  math-intrinsics@1.1.0:
+    resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
+    engines: {node: '>= 0.4'}
+
+  media-typer@0.3.0:
+    resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==}
+    engines: {node: '>= 0.6'}
+
+  merge-descriptors@1.0.3:
+    resolution: {integrity: sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==}
+
+  methods@1.1.2:
+    resolution: {integrity: sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==}
+    engines: {node: '>= 0.6'}
+
+  mime-db@1.52.0:
+    resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==}
+    engines: {node: '>= 0.6'}
+
+  mime-types@2.1.35:
+    resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==}
+    engines: {node: '>= 0.6'}
+
+  mime@1.6.0:
+    resolution: {integrity: sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==}
+    engines: {node: '>=4'}
+    hasBin: true
+
+  ms@2.0.0:
+    resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==}
+
+  ms@2.1.3:
+    resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
+
+  negotiator@0.6.3:
+    resolution: {integrity: sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==}
+    engines: {node: '>= 0.6'}
+
+  object-inspect@1.13.4:
+    resolution: {integrity: sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==}
+    engines: {node: '>= 0.4'}
+
+  on-finished@2.4.1:
+    resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
+    engines: {node: '>= 0.8'}
+
+  parseurl@1.3.3:
+    resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
+    engines: {node: '>= 0.8'}
+
+  path-to-regexp@0.1.12:
+    resolution: {integrity: sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==}
+
+  proxy-addr@2.0.7:
+    resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
+    engines: {node: '>= 0.10'}
+
+  qs@6.13.0:
+    resolution: {integrity: sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==}
+    engines: {node: '>=0.6'}
+
+  range-parser@1.2.1:
+    resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==}
+    engines: {node: '>= 0.6'}
+
+  raw-body@2.5.2:
+    resolution: {integrity: sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==}
+    engines: {node: '>= 0.8'}
+
+  resolve-pkg-maps@1.0.0:
+    resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==}
+
+  safe-buffer@5.2.1:
+    resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
+
+  safer-buffer@2.1.2:
+    resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
+
+  send@0.19.0:
+    resolution: {integrity: sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==}
+    engines: {node: '>= 0.8.0'}
+
+  serve-static@1.16.2:
+    resolution: {integrity: sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==}
+    engines: {node: '>= 0.8.0'}
+
+  setprototypeof@1.2.0:
+    resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==}
+
+  side-channel-list@1.0.0:
+    resolution: {integrity: sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==}
+    engines: {node: '>= 0.4'}
+
+  side-channel-map@1.0.1:
+    resolution: {integrity: sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==}
+    engines: {node: '>= 0.4'}
+
+  side-channel-weakmap@1.0.2:
+    resolution: {integrity: sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==}
+    engines: {node: '>= 0.4'}
+
+  side-channel@1.1.0:
+    resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==}
+    engines: {node: '>= 0.4'}
+
+  statuses@2.0.1:
+    resolution: {integrity: sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==}
+    engines: {node: '>= 0.8'}
+
+  toidentifier@1.0.1:
+    resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
+    engines: {node: '>=0.6'}
+
+  tsx@4.20.3:
+    resolution: {integrity: sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==}
+    engines: {node: '>=18.0.0'}
+    hasBin: true
+
+  type-is@1.6.18:
+    resolution: {integrity: sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==}
+    engines: {node: '>= 0.6'}
+
+  undici-types@7.8.0:
+    resolution: {integrity: sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==}
+
+  unpipe@1.0.0:
+    resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==}
+    engines: {node: '>= 0.8'}
+
+  utils-merge@1.0.1:
+    resolution: {integrity: sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==}
+    engines: {node: '>= 0.4.0'}
+
+  vary@1.1.2:
+    resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
+    engines: {node: '>= 0.8'}
+
+  zod@3.25.67:
+    resolution: {integrity: sha512-idA2YXwpCdqUSKRCACDE6ItZD9TZzy3OZMtpfLoh6oPR47lipysRrJfjzMqFxQ3uJuUPyUeWe1r9vLH33xO/Qw==}
+
+snapshots:
+
+  '@esbuild/aix-ppc64@0.25.5':
+    optional: true
+
+  '@esbuild/android-arm64@0.25.5':
+    optional: true
+
+  '@esbuild/android-arm@0.25.5':
+    optional: true
+
+  '@esbuild/android-x64@0.25.5':
+    optional: true
+
+  '@esbuild/darwin-arm64@0.25.5':
+    optional: true
+
+  '@esbuild/darwin-x64@0.25.5':
+    optional: true
+
+  '@esbuild/freebsd-arm64@0.25.5':
+    optional: true
+
+  '@esbuild/freebsd-x64@0.25.5':
+    optional: true
+
+  '@esbuild/linux-arm64@0.25.5':
+    optional: true
+
+  '@esbuild/linux-arm@0.25.5':
+    optional: true
+
+  '@esbuild/linux-ia32@0.25.5':
+    optional: true
+
+  '@esbuild/linux-loong64@0.25.5':
+    optional: true
+
+  '@esbuild/linux-mips64el@0.25.5':
+    optional: true
+
+  '@esbuild/linux-ppc64@0.25.5':
+    optional: true
+
+  '@esbuild/linux-riscv64@0.25.5':
+    optional: true
+
+  '@esbuild/linux-s390x@0.25.5':
+    optional: true
+
+  '@esbuild/linux-x64@0.25.5':
+    optional: true
+
+  '@esbuild/netbsd-arm64@0.25.5':
+    optional: true
+
+  '@esbuild/netbsd-x64@0.25.5':
+    optional: true
+
+  '@esbuild/openbsd-arm64@0.25.5':
+    optional: true
+
+  '@esbuild/openbsd-x64@0.25.5':
+    optional: true
+
+  '@esbuild/sunos-x64@0.25.5':
+    optional: true
+
+  '@esbuild/win32-arm64@0.25.5':
+    optional: true
+
+  '@esbuild/win32-ia32@0.25.5':
+    optional: true
+
+  '@esbuild/win32-x64@0.25.5':
+    optional: true
+
+  '@types/body-parser@1.19.6':
+    dependencies:
+      '@types/connect': 3.4.38
+      '@types/node': 24.0.7
+
+  '@types/connect@3.4.38':
+    dependencies:
+      '@types/node': 24.0.7
+
+  '@types/express-serve-static-core@4.19.6':
+    dependencies:
+      '@types/node': 24.0.7
+      '@types/qs': 6.14.0
+      '@types/range-parser': 1.2.7
+      '@types/send': 0.17.5
+
+  '@types/express@4.17.23':
+    dependencies:
+      '@types/body-parser': 1.19.6
+      '@types/express-serve-static-core': 4.19.6
+      '@types/qs': 6.14.0
+      '@types/serve-static': 1.15.8
+
+  '@types/http-errors@2.0.5': {}
+
+  '@types/mime@1.3.5': {}
+
+  '@types/node@24.0.7':
+    dependencies:
+      undici-types: 7.8.0
+
+  '@types/qs@6.14.0': {}
+
+  '@types/range-parser@1.2.7': {}
+
+  '@types/send@0.17.5':
+    dependencies:
+      '@types/mime': 1.3.5
+      '@types/node': 24.0.7
+
+  '@types/serve-static@1.15.8':
+    dependencies:
+      '@types/http-errors': 2.0.5
+      '@types/node': 24.0.7
+      '@types/send': 0.17.5
+
+  accepts@1.3.8:
+    dependencies:
+      mime-types: 2.1.35
+      negotiator: 0.6.3
+
+  array-flatten@1.1.1: {}
+
+  body-parser@1.20.3:
+    dependencies:
+      bytes: 3.1.2
+      content-type: 1.0.5
+      debug: 2.6.9
+      depd: 2.0.0
+      destroy: 1.2.0
+      http-errors: 2.0.0
+      iconv-lite: 0.4.24
+      on-finished: 2.4.1
+      qs: 6.13.0
+      raw-body: 2.5.2
+      type-is: 1.6.18
+      unpipe: 1.0.0
+    transitivePeerDependencies:
+      - supports-color
+
+  bytes@3.1.2: {}
+
+  call-bind-apply-helpers@1.0.2:
+    dependencies:
+      es-errors: 1.3.0
+      function-bind: 1.1.2
+
+  call-bound@1.0.4:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      get-intrinsic: 1.3.0
+
+  content-disposition@0.5.4:
+    dependencies:
+      safe-buffer: 5.2.1
+
+  content-type@1.0.5: {}
+
+  cookie-signature@1.0.6: {}
+
+  cookie@0.7.1: {}
+
+  debug@2.6.9:
+    dependencies:
+      ms: 2.0.0
+
+  depd@2.0.0: {}
+
+  destroy@1.2.0: {}
+
+  dunder-proto@1.0.1:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-errors: 1.3.0
+      gopd: 1.2.0
+
+  ee-first@1.1.1: {}
+
+  encodeurl@1.0.2: {}
+
+  encodeurl@2.0.0: {}
+
+  es-define-property@1.0.1: {}
+
+  es-errors@1.3.0: {}
+
+  es-object-atoms@1.1.1:
+    dependencies:
+      es-errors: 1.3.0
+
+  esbuild@0.25.5:
+    optionalDependencies:
+      '@esbuild/aix-ppc64': 0.25.5
+      '@esbuild/android-arm': 0.25.5
+      '@esbuild/android-arm64': 0.25.5
+      '@esbuild/android-x64': 0.25.5
+      '@esbuild/darwin-arm64': 0.25.5
+      '@esbuild/darwin-x64': 0.25.5
+      '@esbuild/freebsd-arm64': 0.25.5
+      '@esbuild/freebsd-x64': 0.25.5
+      '@esbuild/linux-arm': 0.25.5
+      '@esbuild/linux-arm64': 0.25.5
+      '@esbuild/linux-ia32': 0.25.5
+      '@esbuild/linux-loong64': 0.25.5
+      '@esbuild/linux-mips64el': 0.25.5
+      '@esbuild/linux-ppc64': 0.25.5
+      '@esbuild/linux-riscv64': 0.25.5
+      '@esbuild/linux-s390x': 0.25.5
+      '@esbuild/linux-x64': 0.25.5
+      '@esbuild/netbsd-arm64': 0.25.5
+      '@esbuild/netbsd-x64': 0.25.5
+      '@esbuild/openbsd-arm64': 0.25.5
+      '@esbuild/openbsd-x64': 0.25.5
+      '@esbuild/sunos-x64': 0.25.5
+      '@esbuild/win32-arm64': 0.25.5
+      '@esbuild/win32-ia32': 0.25.5
+      '@esbuild/win32-x64': 0.25.5
+
+  escape-html@1.0.3: {}
+
+  etag@1.8.1: {}
+
+  express@4.21.2:
+    dependencies:
+      accepts: 1.3.8
+      array-flatten: 1.1.1
+      body-parser: 1.20.3
+      content-disposition: 0.5.4
+      content-type: 1.0.5
+      cookie: 0.7.1
+      cookie-signature: 1.0.6
+      debug: 2.6.9
+      depd: 2.0.0
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      etag: 1.8.1
+      finalhandler: 1.3.1
+      fresh: 0.5.2
+      http-errors: 2.0.0
+      merge-descriptors: 1.0.3
+      methods: 1.1.2
+      on-finished: 2.4.1
+      parseurl: 1.3.3
+      path-to-regexp: 0.1.12
+      proxy-addr: 2.0.7
+      qs: 6.13.0
+      range-parser: 1.2.1
+      safe-buffer: 5.2.1
+      send: 0.19.0
+      serve-static: 1.16.2
+      setprototypeof: 1.2.0
+      statuses: 2.0.1
+      type-is: 1.6.18
+      utils-merge: 1.0.1
+      vary: 1.1.2
+    transitivePeerDependencies:
+      - supports-color
+
+  finalhandler@1.3.1:
+    dependencies:
+      debug: 2.6.9
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      on-finished: 2.4.1
+      parseurl: 1.3.3
+      statuses: 2.0.1
+      unpipe: 1.0.0
+    transitivePeerDependencies:
+      - supports-color
+
+  forwarded@0.2.0: {}
+
+  fresh@0.5.2: {}
+
+  fsevents@2.3.3:
+    optional: true
+
+  function-bind@1.1.2: {}
+
+  get-intrinsic@1.3.0:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-define-property: 1.0.1
+      es-errors: 1.3.0
+      es-object-atoms: 1.1.1
+      function-bind: 1.1.2
+      get-proto: 1.0.1
+      gopd: 1.2.0
+      has-symbols: 1.1.0
+      hasown: 2.0.2
+      math-intrinsics: 1.1.0
+
+  get-proto@1.0.1:
+    dependencies:
+      dunder-proto: 1.0.1
+      es-object-atoms: 1.1.1
+
+  get-tsconfig@4.10.1:
+    dependencies:
+      resolve-pkg-maps: 1.0.0
+
+  gopd@1.2.0: {}
+
+  has-symbols@1.1.0: {}
+
+  hasown@2.0.2:
+    dependencies:
+      function-bind: 1.1.2
+
+  http-errors@2.0.0:
+    dependencies:
+      depd: 2.0.0
+      inherits: 2.0.4
+      setprototypeof: 1.2.0
+      statuses: 2.0.1
+      toidentifier: 1.0.1
+
+  iconv-lite@0.4.24:
+    dependencies:
+      safer-buffer: 2.1.2
+
+  inherits@2.0.4: {}
+
+  ipaddr.js@1.9.1: {}
+
+  math-intrinsics@1.1.0: {}
+
+  media-typer@0.3.0: {}
+
+  merge-descriptors@1.0.3: {}
+
+  methods@1.1.2: {}
+
+  mime-db@1.52.0: {}
+
+  mime-types@2.1.35:
+    dependencies:
+      mime-db: 1.52.0
+
+  mime@1.6.0: {}
+
+  ms@2.0.0: {}
+
+  ms@2.1.3: {}
+
+  negotiator@0.6.3: {}
+
+  object-inspect@1.13.4: {}
+
+  on-finished@2.4.1:
+    dependencies:
+      ee-first: 1.1.1
+
+  parseurl@1.3.3: {}
+
+  path-to-regexp@0.1.12: {}
+
+  proxy-addr@2.0.7:
+    dependencies:
+      forwarded: 0.2.0
+      ipaddr.js: 1.9.1
+
+  qs@6.13.0:
+    dependencies:
+      side-channel: 1.1.0
+
+  range-parser@1.2.1: {}
+
+  raw-body@2.5.2:
+    dependencies:
+      bytes: 3.1.2
+      http-errors: 2.0.0
+      iconv-lite: 0.4.24
+      unpipe: 1.0.0
+
+  resolve-pkg-maps@1.0.0: {}
+
+  safe-buffer@5.2.1: {}
+
+  safer-buffer@2.1.2: {}
+
+  send@0.19.0:
+    dependencies:
+      debug: 2.6.9
+      depd: 2.0.0
+      destroy: 1.2.0
+      encodeurl: 1.0.2
+      escape-html: 1.0.3
+      etag: 1.8.1
+      fresh: 0.5.2
+      http-errors: 2.0.0
+      mime: 1.6.0
+      ms: 2.1.3
+      on-finished: 2.4.1
+      range-parser: 1.2.1
+      statuses: 2.0.1
+    transitivePeerDependencies:
+      - supports-color
+
+  serve-static@1.16.2:
+    dependencies:
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      parseurl: 1.3.3
+      send: 0.19.0
+    transitivePeerDependencies:
+      - supports-color
+
+  setprototypeof@1.2.0: {}
+
+  side-channel-list@1.0.0:
+    dependencies:
+      es-errors: 1.3.0
+      object-inspect: 1.13.4
+
+  side-channel-map@1.0.1:
+    dependencies:
+      call-bound: 1.0.4
+      es-errors: 1.3.0
+      get-intrinsic: 1.3.0
+      object-inspect: 1.13.4
+
+  side-channel-weakmap@1.0.2:
+    dependencies:
+      call-bound: 1.0.4
+      es-errors: 1.3.0
+      get-intrinsic: 1.3.0
+      object-inspect: 1.13.4
+      side-channel-map: 1.0.1
+
+  side-channel@1.1.0:
+    dependencies:
+      es-errors: 1.3.0
+      object-inspect: 1.13.4
+      side-channel-list: 1.0.0
+      side-channel-map: 1.0.1
+      side-channel-weakmap: 1.0.2
+
+  statuses@2.0.1: {}
+
+  toidentifier@1.0.1: {}
+
+  tsx@4.20.3:
+    dependencies:
+      esbuild: 0.25.5
+      get-tsconfig: 4.10.1
+    optionalDependencies:
+      fsevents: 2.3.3
+
+  type-is@1.6.18:
+    dependencies:
+      media-typer: 0.3.0
+      mime-types: 2.1.35
+
+  undici-types@7.8.0: {}
+
+  unpipe@1.0.0: {}
+
+  utils-merge@1.0.1: {}
+
+  vary@1.1.2: {}
+
+  zod@3.25.67: {}
diff --git a/packages/responses-server/scripts/dummy-call.js b/packages/responses-server/scripts/dummy-call.js
new file mode 100755
index 0000000000..774af1192a
--- /dev/null
+++ b/packages/responses-server/scripts/dummy-call.js
@@ -0,0 +1,29 @@
+#!/usr/bin/env node
+
+import { request } from "http";
+
+const data = JSON.stringify({ input: "" });
+
+const req = request(
+	{
+		hostname: "localhost",
+		port: 3000,
+		path: "/v1/responses",
+		method: "POST",
+		headers: {
+			"Content-Type": "application/json",
+			"Content-Length": Buffer.byteLength(data),
+		},
+	},
+	(res) => {
+		console.log(`Status: ${res.statusCode}`);
+
+		let body = "";
+		res.on("data", (chunk) => (body += chunk));
+		res.on("end", () => console.log(body));
+	}
+);
+
+req.on("error", (e) => console.error(`Problem with request: ${e.message}`));
+req.write(data);
+req.end();
diff --git a/packages/responses-server/src/index.ts b/packages/responses-server/src/index.ts
new file mode 100644
index 0000000000..5572c948ef
--- /dev/null
+++ b/packages/responses-server/src/index.ts
@@ -0,0 +1,26 @@
+import { createApp } from "./server.js";
+
+const app = createApp();
+const port = process.env.PORT || 3000;
+
+// Start server
+app.listen(port, () => {
+	console.log(`🚀 Server started at ${new Date().toISOString()}`);
+	console.log(`🌐 Server is running on http://localhost:${port}`);
+	console.log("─".repeat(60));
+});
+
+// Graceful shutdown logging
+process.on("SIGINT", () => {
+	console.log("─".repeat(60));
+	console.log(`🛑 Server shutting down at ${new Date().toISOString()}`);
+	process.exit(0);
+});
+
+process.on("SIGTERM", () => {
+	console.log("─".repeat(60));
+	console.log(`🛑 Server shutting down at ${new Date().toISOString()}`);
+	process.exit(0);
+});
+
+export default app;
diff --git a/packages/responses-server/src/middleware/logging.ts b/packages/responses-server/src/middleware/logging.ts
new file mode 100644
index 0000000000..f9c58a59ab
--- /dev/null
+++ b/packages/responses-server/src/middleware/logging.ts
@@ -0,0 +1,41 @@
+/**
+ * AI-generated file using Cursor + Claude 4
+ */
+
+import { type Request, type Response, type NextFunction } from "express";
+
+/**
+ * Middleware to log all HTTP requests with duration, status code, method, and route
+ * @returns Express middleware function
+ */
+export function requestLogger() {
+	return (req: Request, res: Response, next: NextFunction): void => {
+		const start = Date.now();
+		const { method, url } = req;
+
+		// Log request start
+		console.log(`[${new Date().toISOString()}] 📥 ${method} ${url}`);
+
+		// Override res.end to capture response details
+		const originalEnd = res.end;
+		res.end = function (chunk?: unknown, encoding?: BufferEncoding, cb?: () => void) {
+			const duration = Date.now() - start;
+			const statusCode = res.statusCode;
+			const statusEmoji =
+				statusCode >= 200 && statusCode < 300
+					? "✅"
+					: statusCode >= 400 && statusCode < 500
+					  ? "⚠️"
+					  : statusCode >= 500
+					    ? "❌"
+					    : "ℹ️";
+
+			console.log(`[${new Date().toISOString()}] ${statusEmoji} ${statusCode} ${method} ${url} (${duration}ms)`);
+
+			// Call the original end method with proper return type
+			return originalEnd.call(this, chunk, encoding, cb);
+		};
+
+		next();
+	};
+}
diff --git a/packages/responses-server/src/middleware/validation.ts b/packages/responses-server/src/middleware/validation.ts
new file mode 100644
index 0000000000..24ed771467
--- /dev/null
+++ b/packages/responses-server/src/middleware/validation.ts
@@ -0,0 +1,41 @@
+/**
+ * AI-generated file using Cursor + Claude 4
+ */
+
+import { type Request, type Response, type NextFunction } from "express";
+import { z } from "zod";
+
+/**
+ * Middleware to validate request body against a Zod schema
+ * @param schema - Zod schema to validate against
+ * @returns Express middleware function
+ */
+export function validateBody<T extends z.ZodTypeAny>(schema: T) {
+	return (req: Request, res: Response, next: NextFunction): void => {
+		try {
+			const validatedBody = schema.parse(req.body);
+			req.body = validatedBody;
+			next();
+		} catch (error) {
+			if (error instanceof z.ZodError) {
+				res.status(400).json({
+					success: false,
+					error: "Validation failed",
+					details: error.errors,
+				});
+			} else {
+				res.status(500).json({
+					success: false,
+					error: "Internal server error",
+				});
+			}
+		}
+	};
+}
+
+/**
+ * Type helper to create a properly typed request with validated body
+ */
+export interface ValidatedRequest<T> extends Request {
+	body: T;
+}
diff --git a/packages/responses-server/src/routes/index.ts b/packages/responses-server/src/routes/index.ts
new file mode 100644
index 0000000000..9d9aa777ae
--- /dev/null
+++ b/packages/responses-server/src/routes/index.ts
@@ -0,0 +1 @@
+export { handleResponses } from "./responses.js";
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
new file mode 100644
index 0000000000..28717024b1
--- /dev/null
+++ b/packages/responses-server/src/routes/responses.ts
@@ -0,0 +1,11 @@
+import { type Response } from "express";
+import { type ValidatedRequest } from "../middleware/validation.js";
+import { type ResponsesInput } from "../schemas/responses.js";
+
+export const handleResponses = (req: ValidatedRequest<ResponsesInput>, res: Response): void => {
+	res.json({
+		success: true,
+		input: req.body.input,
+		message: "Request processed successfully",
+	});
+};
diff --git a/packages/responses-server/src/schemas/responses.ts b/packages/responses-server/src/schemas/responses.ts
new file mode 100644
index 0000000000..1445a0eb24
--- /dev/null
+++ b/packages/responses-server/src/schemas/responses.ts
@@ -0,0 +1,7 @@
+import { z } from "zod";
+
+export const responsesSchema = z.object({
+	input: z.string(),
+});
+
+export type ResponsesInput = z.infer<typeof responsesSchema>;
diff --git a/packages/responses-server/src/server.ts b/packages/responses-server/src/server.ts
new file mode 100644
index 0000000000..4bb4ee8760
--- /dev/null
+++ b/packages/responses-server/src/server.ts
@@ -0,0 +1,22 @@
+import express, { type Express } from "express";
+import { responsesSchema } from "./schemas/responses.js";
+import { validateBody } from "./middleware/validation.js";
+import { requestLogger } from "./middleware/logging.js";
+import { handleResponses } from "./routes/index.js";
+
+export const createApp = (): Express => {
+	const app: Express = express();
+
+	// Middleware
+	app.use(requestLogger());
+	app.use(express.json());
+
+	// Routes
+	app.get("/", (req, res) => {
+		res.send("hello world");
+	});
+
+	app.post("/v1/responses", validateBody(responsesSchema), handleResponses);
+
+	return app;
+};
diff --git a/packages/responses-server/tsconfig.json b/packages/responses-server/tsconfig.json
new file mode 100644
index 0000000000..8274efe5ca
--- /dev/null
+++ b/packages/responses-server/tsconfig.json
@@ -0,0 +1,21 @@
+{
+	"compilerOptions": {
+		"allowSyntheticDefaultImports": true,
+		"lib": ["ES2022", "DOM"],
+		"module": "CommonJS",
+		"moduleResolution": "node",
+		"target": "ES2022",
+		"forceConsistentCasingInFileNames": true,
+		"strict": true,
+		"noImplicitAny": true,
+		"strictNullChecks": true,
+		"skipLibCheck": true,
+		"noImplicitOverride": true,
+		"outDir": "./dist",
+		"declaration": true,
+		"declarationMap": true,
+		"resolveJsonModule": true
+	},
+	"include": ["src", "test"],
+	"exclude": ["dist"]
+}
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
index 08e651bb73..1a6988430d 100644
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -14,3 +14,4 @@ packages:
   - "packages/ollama-utils"
   - "packages/mcp-client"
   - "packages/tiny-agents"
+  - "packages/responses-server"

From abc0d885267a1b48213a11435e6f9b0ded9cf8fa Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Tue, 1 Jul 2025 10:20:16 +0200
Subject: [PATCH 02/12] working text only api

---
 packages/responses-server/.gitignore          |   1 -
 packages/responses-server/README.md           |  23 ++-
 .../responses-server/examples/text_multi.js   |  20 +++
 .../responses-server/examples/text_single.js  |  12 ++
 packages/responses-server/package.json        |   7 +-
 packages/responses-server/pnpm-lock.yaml      |  25 +++
 .../responses-server/scripts/dummy-call.js    |  29 ----
 .../responses-server/scripts/run-example.js   |  23 +++
 .../src/lib/generateUniqueId.ts               |   5 +
 .../src/middleware/logging.ts                 |  77 +++++++---
 .../src/middleware/validation.ts              |   3 +-
 packages/responses-server/src/routes/index.ts |   2 +-
 .../responses-server/src/routes/responses.ts  | 105 +++++++++++--
 packages/responses-server/src/schemas.ts      | 142 ++++++++++++++++++
 .../responses-server/src/schemas/responses.ts |   7 -
 packages/responses-server/src/server.ts       |   6 +-
 packages/tasks/src/tasks/index.ts             |  11 +-
 17 files changed, 397 insertions(+), 101 deletions(-)
 delete mode 100644 packages/responses-server/.gitignore
 create mode 100644 packages/responses-server/examples/text_multi.js
 create mode 100755 packages/responses-server/examples/text_single.js
 delete mode 100755 packages/responses-server/scripts/dummy-call.js
 create mode 100644 packages/responses-server/scripts/run-example.js
 create mode 100644 packages/responses-server/src/lib/generateUniqueId.ts
 create mode 100644 packages/responses-server/src/schemas.ts
 delete mode 100644 packages/responses-server/src/schemas/responses.ts

diff --git a/packages/responses-server/.gitignore b/packages/responses-server/.gitignore
deleted file mode 100644
index 8e5bbf044f..0000000000
--- a/packages/responses-server/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.py
\ No newline at end of file
diff --git a/packages/responses-server/README.md b/packages/responses-server/README.md
index 272553606e..fbce68da1d 100644
--- a/packages/responses-server/README.md
+++ b/packages/responses-server/README.md
@@ -28,20 +28,17 @@ pnpm install
 pnpm dev
 ```
 
-Run a simple POST request with 
+### Run examples
+
+Some example scripts are implemented in ./examples.
+
+You can run them using
 
 ```bash
-# Make dummy call
-pnpm dummy
-```
+# Run ./examples/text_single.js
+pnpm run example text_single
 
-## 🛠️ Available Scripts
+# Run ./examples/text_multi.js
+pnpm run example text_multi
+```
 
-- `pnpm dev` - Start development server with hot reload
-- `pnpm start` - Start production server
-- `pnpm build` - Build for production
-- `pnpm dummy` - Run test API call
-- `pnpm lint` - Run ESLint with auto-fix
-- `pnpm format` - Format code with Prettier
-- `pnpm test` - Run tests
-- `pnpm check` - Type check with TypeScript
diff --git a/packages/responses-server/examples/text_multi.js b/packages/responses-server/examples/text_multi.js
new file mode 100644
index 0000000000..35432162ae
--- /dev/null
+++ b/packages/responses-server/examples/text_multi.js
@@ -0,0 +1,20 @@
+import OpenAI from "openai";
+
+const client = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const response = await client.responses.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	input: [
+		{
+			role: "developer",
+			content: "Talk like a pirate.",
+		},
+		{
+			role: "user",
+			content: "Are semicolons optional in JavaScript?",
+		},
+	],
+});
+
+console.log(response);
+console.log(response.output_text);
diff --git a/packages/responses-server/examples/text_single.js b/packages/responses-server/examples/text_single.js
new file mode 100755
index 0000000000..b9906efcde
--- /dev/null
+++ b/packages/responses-server/examples/text_single.js
@@ -0,0 +1,12 @@
+import OpenAI from "openai";
+
+const client = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const response = await client.responses.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	instructions: "You are a helpful assistant.",
+	input: "Tell me a three sentence bedtime story about a unicorn.",
+});
+
+console.log(response);
+console.log(response.output_text);
diff --git a/packages/responses-server/package.json b/packages/responses-server/package.json
index a0bef8d30d..6523b35772 100644
--- a/packages/responses-server/package.json
+++ b/packages/responses-server/package.json
@@ -26,14 +26,14 @@
 		"build": "tsup src/*.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration",
 		"check": "tsc",
 		"dev": "tsx watch src/index.ts",
-		"dummy": "node scripts/dummy-call.js",
 		"format": "prettier --write .",
 		"format:check": "prettier --check .",
 		"lint": "eslint --quiet --fix --ext .cjs,.ts .",
 		"lint:check": "eslint --ext .cjs,.ts .",
 		"prepublishOnly": "pnpm run build",
 		"prepare": "pnpm run build",
-		"start": "node dist/index.js"
+		"start": "node dist/index.js",
+		"example": "node scripts/run-example.js"
 	},
 	"files": [
 		"src",
@@ -50,11 +50,14 @@
 	"author": "Hugging Face",
 	"license": "MIT",
 	"dependencies": {
+		"@huggingface/inference": "workspace:^",
+		"@huggingface/tasks": "workspace:^",
 		"express": "^4.18.2",
 		"zod": "^3.22.4"
 	},
 	"devDependencies": {
 		"@types/express": "^4.17.21",
+		"openai": "^5.8.2",
 		"tsx": "^4.7.0"
 	}
 }
diff --git a/packages/responses-server/pnpm-lock.yaml b/packages/responses-server/pnpm-lock.yaml
index c63d12089c..7ee51b3a6d 100644
--- a/packages/responses-server/pnpm-lock.yaml
+++ b/packages/responses-server/pnpm-lock.yaml
@@ -8,6 +8,12 @@ importers:
 
   .:
     dependencies:
+      '@huggingface/inference':
+        specifier: workspace:^
+        version: link:../inference
+      '@huggingface/tasks':
+        specifier: workspace:^
+        version: link:../tasks
       express:
         specifier: ^4.18.2
         version: 4.21.2
@@ -18,6 +24,9 @@ importers:
       '@types/express':
         specifier: ^4.17.21
         version: 4.17.23
+      openai:
+        specifier: ^5.8.2
+        version: 5.8.2(zod@3.25.67)
       tsx:
         specifier: ^4.7.0
         version: 4.20.3
@@ -408,6 +417,18 @@ packages:
     resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
     engines: {node: '>= 0.8'}
 
+  openai@5.8.2:
+    resolution: {integrity: sha512-8C+nzoHYgyYOXhHGN6r0fcb4SznuEn1R7YZMvlqDbnCuE0FM2mm3T1HiYW6WIcMS/F1Of2up/cSPjLPaWt0X9Q==}
+    hasBin: true
+    peerDependencies:
+      ws: ^8.18.0
+      zod: ^3.23.8
+    peerDependenciesMeta:
+      ws:
+        optional: true
+      zod:
+        optional: true
+
   parseurl@1.3.3:
     resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
     engines: {node: '>= 0.8'}
@@ -862,6 +883,10 @@ snapshots:
     dependencies:
       ee-first: 1.1.1
 
+  openai@5.8.2(zod@3.25.67):
+    optionalDependencies:
+      zod: 3.25.67
+
   parseurl@1.3.3: {}
 
   path-to-regexp@0.1.12: {}
diff --git a/packages/responses-server/scripts/dummy-call.js b/packages/responses-server/scripts/dummy-call.js
deleted file mode 100755
index 774af1192a..0000000000
--- a/packages/responses-server/scripts/dummy-call.js
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env node
-
-import { request } from "http";
-
-const data = JSON.stringify({ input: "" });
-
-const req = request(
-	{
-		hostname: "localhost",
-		port: 3000,
-		path: "/v1/responses",
-		method: "POST",
-		headers: {
-			"Content-Type": "application/json",
-			"Content-Length": Buffer.byteLength(data),
-		},
-	},
-	(res) => {
-		console.log(`Status: ${res.statusCode}`);
-
-		let body = "";
-		res.on("data", (chunk) => (body += chunk));
-		res.on("end", () => console.log(body));
-	}
-);
-
-req.on("error", (e) => console.error(`Problem with request: ${e.message}`));
-req.write(data);
-req.end();
diff --git a/packages/responses-server/scripts/run-example.js b/packages/responses-server/scripts/run-example.js
new file mode 100644
index 0000000000..f3bcc0d9b0
--- /dev/null
+++ b/packages/responses-server/scripts/run-example.js
@@ -0,0 +1,23 @@
+#!/usr/bin/env node
+import { spawnSync } from "child_process";
+import fs from "fs";
+import path from "path";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const [, , exampleName] = process.argv;
+if (!exampleName) {
+	console.error("Usage: run-example.js <example_name>");
+	process.exit(1);
+}
+
+const examplePath = path.resolve(__dirname, "../examples", `${exampleName}.js`);
+if (!fs.existsSync(examplePath)) {
+	console.error(`Example script not found: ${examplePath}`);
+	process.exit(1);
+}
+
+const result = spawnSync("node", [examplePath], { stdio: "inherit" });
+process.exit(result.status);
diff --git a/packages/responses-server/src/lib/generateUniqueId.ts b/packages/responses-server/src/lib/generateUniqueId.ts
new file mode 100644
index 0000000000..a2227139fa
--- /dev/null
+++ b/packages/responses-server/src/lib/generateUniqueId.ts
@@ -0,0 +1,5 @@
+import { randomBytes } from "crypto";
+
+export function generateUniqueId(): string {
+	return randomBytes(16).toString("hex");
+}
diff --git a/packages/responses-server/src/middleware/logging.ts b/packages/responses-server/src/middleware/logging.ts
index f9c58a59ab..96cf707a7c 100644
--- a/packages/responses-server/src/middleware/logging.ts
+++ b/packages/responses-server/src/middleware/logging.ts
@@ -1,40 +1,67 @@
 /**
  * AI-generated file using Cursor + Claude 4
+ *
+ * Middleware to log all HTTP requests with duration, status code, method, and route
  */
-
 import { type Request, type Response, type NextFunction } from "express";
 
+interface LogContext {
+	timestamp: string;
+	method: string;
+	url: string;
+	statusCode?: number;
+	duration?: number;
+}
+
+function formatLogMessage(context: LogContext): string {
+	const { timestamp, method, url, statusCode, duration } = context;
+
+	if (statusCode === undefined) {
+		return `[${timestamp}] 📥 ${method} ${url}`;
+	}
+
+	const statusEmoji =
+		statusCode >= 200 && statusCode < 300
+			? "✅"
+			: statusCode >= 400 && statusCode < 500
+			  ? "⚠️"
+			  : statusCode >= 500
+			    ? "❌"
+			    : "ℹ️";
+	return `[${timestamp}] ${statusEmoji} ${statusCode} ${method} ${url} (${duration}ms)`;
+}
+
 /**
  * Middleware to log all HTTP requests with duration, status code, method, and route
- * @returns Express middleware function
  */
 export function requestLogger() {
 	return (req: Request, res: Response, next: NextFunction): void => {
-		const start = Date.now();
+		const startTime = Date.now();
 		const { method, url } = req;
 
-		// Log request start
-		console.log(`[${new Date().toISOString()}] 📥 ${method} ${url}`);
-
-		// Override res.end to capture response details
-		const originalEnd = res.end;
-		res.end = function (chunk?: unknown, encoding?: BufferEncoding, cb?: () => void) {
-			const duration = Date.now() - start;
-			const statusCode = res.statusCode;
-			const statusEmoji =
-				statusCode >= 200 && statusCode < 300
-					? "✅"
-					: statusCode >= 400 && statusCode < 500
-					  ? "⚠️"
-					  : statusCode >= 500
-					    ? "❌"
-					    : "ℹ️";
-
-			console.log(`[${new Date().toISOString()}] ${statusEmoji} ${statusCode} ${method} ${url} (${duration}ms)`);
-
-			// Call the original end method with proper return type
-			return originalEnd.call(this, chunk, encoding, cb);
-		};
+		// Log incoming request
+		console.log(
+			formatLogMessage({
+				timestamp: new Date().toISOString(),
+				method,
+				url,
+			})
+		);
+
+		// Listen for when the response finishes
+		res.on("finish", () => {
+			const duration = Date.now() - startTime;
+
+			console.log(
+				formatLogMessage({
+					timestamp: new Date().toISOString(),
+					method,
+					url,
+					statusCode: res.statusCode,
+					duration,
+				})
+			);
+		});
 
 		next();
 	};
diff --git a/packages/responses-server/src/middleware/validation.ts b/packages/responses-server/src/middleware/validation.ts
index 24ed771467..40c84a43bf 100644
--- a/packages/responses-server/src/middleware/validation.ts
+++ b/packages/responses-server/src/middleware/validation.ts
@@ -18,9 +18,10 @@ export function validateBody<T extends z.ZodTypeAny>(schema: T) {
 			next();
 		} catch (error) {
 			if (error instanceof z.ZodError) {
+				console.log(req.body);
 				res.status(400).json({
 					success: false,
-					error: "Validation failed",
+					error: error.errors,
 					details: error.errors,
 				});
 			} else {
diff --git a/packages/responses-server/src/routes/index.ts b/packages/responses-server/src/routes/index.ts
index 9d9aa777ae..19955457de 100644
--- a/packages/responses-server/src/routes/index.ts
+++ b/packages/responses-server/src/routes/index.ts
@@ -1 +1 @@
-export { handleResponses } from "./responses.js";
+export { postCreateResponse } from "./responses.js";
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index 28717024b1..5e625ddaa7 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -1,11 +1,98 @@
-import { type Response } from "express";
+import { type Response as ExpressResponse } from "express";
 import { type ValidatedRequest } from "../middleware/validation.js";
-import { type ResponsesInput } from "../schemas/responses.js";
-
-export const handleResponses = (req: ValidatedRequest<ResponsesInput>, res: Response): void => {
-	res.json({
-		success: true,
-		input: req.body.input,
-		message: "Request processed successfully",
-	});
+import { type CreateResponse, type Response } from "../schemas.js";
+import { generateUniqueId } from "../lib/generateUniqueId.js";
+import { InferenceClient } from "@huggingface/inference";
+import type { ChatCompletionInputMessage, ChatCompletionInputMessageChunkType } from "@huggingface/tasks";
+
+export const postCreateResponse = async (
+	req: ValidatedRequest<CreateResponse>,
+	res: ExpressResponse
+): Promise<void> => {
+	const apiKey = req.headers.authorization?.split(" ")[1];
+
+	if (!apiKey) {
+		res.status(401).json({
+			success: false,
+			error: "Unauthorized",
+		});
+		return;
+	}
+
+	const client = new InferenceClient(apiKey);
+	const messages: ChatCompletionInputMessage[] = req.body.instructions
+		? [{ role: "system", content: req.body.instructions }]
+		: [];
+
+	if (Array.isArray(req.body.input)) {
+		messages.push(
+			...req.body.input.map((item) => ({
+				role: item.role,
+				content:
+					typeof item.content === "string"
+						? item.content
+						: item.content.map((content) => {
+								if (content.type === "input_image") {
+									return {
+										type: "image_url" as ChatCompletionInputMessageChunkType,
+										image_url: {
+											url: content.image_url,
+										},
+									};
+								}
+								// content.type must be "input_text" at this point
+								return {
+									type: "text" as ChatCompletionInputMessageChunkType,
+									text: content.text,
+								};
+						  }),
+			}))
+		);
+	} else {
+		messages.push({ role: "user", content: req.body.input });
+	}
+
+	try {
+		const chatCompletionResponse = await client.chatCompletion({
+			model: req.body.model,
+			messages: messages,
+			temperature: req.body.temperature,
+			top_p: req.body.top_p,
+		});
+
+		const responseObject: Response = {
+			object: "response",
+			id: chatCompletionResponse.id,
+			status: "completed",
+			instructions: req.body.instructions,
+			model: req.body.model,
+			temperature: req.body.temperature,
+			top_p: req.body.top_p,
+			created_at: chatCompletionResponse.created,
+			output: chatCompletionResponse.choices[0].message.content
+				? [
+						{
+							id: "msg_" + generateUniqueId(),
+							type: "message",
+							role: "assistant",
+							status: "completed",
+							content: [
+								{
+									type: "output_text",
+									text: chatCompletionResponse.choices[0].message.content,
+								},
+							],
+						},
+				  ]
+				: [],
+		};
+
+		res.json(responseObject);
+	} catch (error) {
+		console.error(error);
+		res.status(500).json({
+			success: false,
+			error: error instanceof Error ? error.message : "Unknown error",
+		});
+	}
 };
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
new file mode 100644
index 0000000000..66df4c99bd
--- /dev/null
+++ b/packages/responses-server/src/schemas.ts
@@ -0,0 +1,142 @@
+import { z } from "zod";
+
+/**
+ * https://platform.openai.com/docs/api-reference/responses/create
+ * commented out properties are not supported by the server
+ */
+export const createResponseSchema = z.object({
+	// background: z.boolean().default(false),
+	// include:
+	input: z.union([
+		z.string(),
+		z.array(
+			// z.union([
+			z.object({
+				content: z.union([
+					z.string(),
+					z.array(
+						z.union([
+							z.object({
+								type: z.literal("input_text"),
+								text: z.string(),
+							}),
+							z.object({
+								type: z.literal("input_image"),
+								// file_id: z.string().optional(),
+								image_url: z.string(),
+								// detail: z.enum(["auto", "low", "high"]).default("auto"),
+							}),
+							// z.object({
+							// 	type: z.literal("input_file"),
+							// 	file_data: z.string().optional(),
+							// 	file_id: z.string().optional(),
+							// 	filename: z.string().optional(),
+							// }),
+						])
+					),
+				]),
+				role: z.enum(["user", "assistant", "system", "developer"]),
+				type: z.enum(["message"]).default("message"),
+			})
+			// z.object({}), // An item representing part of the context for the response to be generated by the model
+			// z.object({
+			// 	id: z.string(),
+			// 	type: z.enum(["item_reference"]).default("item_reference"),
+			// }),
+			// ])
+		),
+	]),
+	instructions: z.string().optional(),
+	// max_output_tokens: z.number().min(0).optional(),
+	// max_tool_calls: z.number().min(0).optional(),
+	// metadata: z.record(z.string().max(64), z.string().max(512)).optional(), // + 16 items max
+	model: z.string(),
+	// previous_response_id: z.string().optional(),
+	// reasoning: z.object({
+	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),
+	// 	summary: z.enum(["auto", "concise", "detailed"]).optional(),
+	// }),
+	// store: z.boolean().default(true),
+	// stream: z.boolean().default(false),
+	temperature: z.number().min(0).max(2).default(1),
+	// text:
+	// tool_choice:
+	// tools:
+	// top_logprobs: z.number().min(0).max(20).optional(),
+	top_p: z.number().min(0).max(1).default(1),
+	// truncation: z.enum(["auto", "disabled"]).default("disabled"),
+	// user
+});
+
+export type CreateResponse = z.infer<typeof createResponseSchema>;
+
+export const responseSchema = z.object({
+	object: z.literal("response"),
+	created_at: z.number(),
+	error: z
+		.object({
+			code: z.string(),
+			message: z.string(),
+		})
+		.optional(),
+	id: z.string(),
+	status: z.enum(["completed", "failed", "in_progress", "cancelled", "queued", "incomplete"]),
+	// incomplete_details: z.object({ reason: z.string() }).optional(),
+	instructions: z.string().optional(),
+	// max_output_tokens: z.number().min(0).optional(),
+	// max_tool_calls: z.number().min(0).optional(),
+	// metadata: z.record(z.string().max(64), z.string().max(512)).optional(), // + 16 items max
+	model: z.string(),
+	output: z.array(
+		z.object({
+			type: z.enum(["message"]),
+			id: z.string(),
+			status: z.enum(["completed", "failed"]),
+			role: z.enum(["assistant"]),
+			content: z.array(
+				z.union([
+					z.object({
+						type: z.literal("output_text"),
+						text: z.string(),
+						// annotations:
+						// logprobs:
+					}),
+					z.object({
+						type: z.literal("refusal"),
+						refusal: z.string(),
+					}),
+				])
+			),
+		})
+		// in practice, should be a z.union of the following:
+		// File search tool call
+		// Function tool call
+		// Web search tool call
+		// Computer tool call
+		// Reasoning
+		// Image generation call
+		// Code interpreter tool call
+		// Local shell call
+		// MCP tool call
+		// MCP list tools
+		// MCP approval request
+	),
+	// parallel_tool_calls: z.boolean(),
+	// previous_response_id: z.string().optional(),
+	// reasoning: z.object({
+	// 	effort: z.enum(["low", "medium", "high"]).optional(),
+	// 	summary: z.enum(["auto", "concise", "detailed"]).optional(),
+	// }),
+	// store: z.boolean(),
+	temperature: z.number(),
+	// text:
+	// tool_choice:
+	// tools:
+	// top_logprobs: z.number().int().min(0).max(20).optional(),
+	top_p: z.number(),
+	// truncation: z.enum(["auto", "disabled"]).default("disabled"),
+	// usage: ...
+	// user
+});
+
+export type Response = z.infer<typeof responseSchema>;
diff --git a/packages/responses-server/src/schemas/responses.ts b/packages/responses-server/src/schemas/responses.ts
deleted file mode 100644
index 1445a0eb24..0000000000
--- a/packages/responses-server/src/schemas/responses.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-import { z } from "zod";
-
-export const responsesSchema = z.object({
-	input: z.string(),
-});
-
-export type ResponsesInput = z.infer<typeof responsesSchema>;
diff --git a/packages/responses-server/src/server.ts b/packages/responses-server/src/server.ts
index 4bb4ee8760..a4fdb6007a 100644
--- a/packages/responses-server/src/server.ts
+++ b/packages/responses-server/src/server.ts
@@ -1,8 +1,8 @@
 import express, { type Express } from "express";
-import { responsesSchema } from "./schemas/responses.js";
+import { createResponseSchema } from "./schemas.js";
 import { validateBody } from "./middleware/validation.js";
 import { requestLogger } from "./middleware/logging.js";
-import { handleResponses } from "./routes/index.js";
+import { postCreateResponse } from "./routes/index.js";
 
 export const createApp = (): Express => {
 	const app: Express = express();
@@ -16,7 +16,7 @@ export const createApp = (): Express => {
 		res.send("hello world");
 	});
 
-	app.post("/v1/responses", validateBody(responsesSchema), handleResponses);
+	app.post("/v1/responses", validateBody(createResponseSchema), postCreateResponse);
 
 	return app;
 };
diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts
index 7bc1568816..269aa45dee 100644
--- a/packages/tasks/src/tasks/index.ts
+++ b/packages/tasks/src/tasks/index.ts
@@ -48,16 +48,7 @@ import videoTextToText from "./video-text-to-text/data.js";
 
 export type * from "./audio-classification/inference.js";
 export type * from "./automatic-speech-recognition/inference.js";
-export type {
-	ChatCompletionInput,
-	ChatCompletionInputMessage,
-	ChatCompletionOutput,
-	ChatCompletionOutputComplete,
-	ChatCompletionOutputMessage,
-	ChatCompletionStreamOutput,
-	ChatCompletionStreamOutputChoice,
-	ChatCompletionStreamOutputDelta,
-} from "./chat-completion/inference.js";
+export type * from "./chat-completion/inference.js";
 export type * from "./document-question-answering/inference.js";
 export type * from "./feature-extraction/inference.js";
 export type * from "./fill-mask/inference.js";

From 555054c0716bc33ec8a7bc23d8ff3cd7422e5e1c Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Tue, 1 Jul 2025 10:26:38 +0200
Subject: [PATCH 03/12] image example

---
 packages/responses-server/examples/image.js   | 23 +++++++++++++++++++
 .../responses-server/examples/text_multi.js   |  4 ++--
 .../responses-server/examples/text_single.js  |  4 ++--
 .../src/lib/generateUniqueId.ts               |  5 ++--
 .../responses-server/src/routes/responses.ts  |  5 ++--
 packages/responses-server/src/schemas.ts      |  2 +-
 6 files changed, 34 insertions(+), 9 deletions(-)
 create mode 100644 packages/responses-server/examples/image.js

diff --git a/packages/responses-server/examples/image.js b/packages/responses-server/examples/image.js
new file mode 100644
index 0000000000..7c729d2440
--- /dev/null
+++ b/packages/responses-server/examples/image.js
@@ -0,0 +1,23 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const response = await openai.responses.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	input: [
+		{
+			role: "user",
+			content: [
+				{ type: "input_text", text: "what is in this image?" },
+				{
+					type: "input_image",
+					image_url:
+						"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+				},
+			],
+		},
+	],
+});
+
+console.log(response);
+console.log(response.output_text);
diff --git a/packages/responses-server/examples/text_multi.js b/packages/responses-server/examples/text_multi.js
index 35432162ae..0258805696 100644
--- a/packages/responses-server/examples/text_multi.js
+++ b/packages/responses-server/examples/text_multi.js
@@ -1,8 +1,8 @@
 import OpenAI from "openai";
 
-const client = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
 
-const response = await client.responses.create({
+const response = await openai.responses.create({
 	model: "Qwen/Qwen2.5-VL-7B-Instruct",
 	input: [
 		{
diff --git a/packages/responses-server/examples/text_single.js b/packages/responses-server/examples/text_single.js
index b9906efcde..7abd23a864 100755
--- a/packages/responses-server/examples/text_single.js
+++ b/packages/responses-server/examples/text_single.js
@@ -1,8 +1,8 @@
 import OpenAI from "openai";
 
-const client = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
 
-const response = await client.responses.create({
+const response = await openai.responses.create({
 	model: "Qwen/Qwen2.5-VL-7B-Instruct",
 	instructions: "You are a helpful assistant.",
 	input: "Tell me a three sentence bedtime story about a unicorn.",
diff --git a/packages/responses-server/src/lib/generateUniqueId.ts b/packages/responses-server/src/lib/generateUniqueId.ts
index a2227139fa..6a708b3592 100644
--- a/packages/responses-server/src/lib/generateUniqueId.ts
+++ b/packages/responses-server/src/lib/generateUniqueId.ts
@@ -1,5 +1,6 @@
 import { randomBytes } from "crypto";
 
-export function generateUniqueId(): string {
-	return randomBytes(16).toString("hex");
+export function generateUniqueId(prefix?: string): string {
+	const id = randomBytes(24).toString("hex");
+	return prefix ? `${prefix}_${id}` : id;
 }
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index 5e625ddaa7..68e27980fc 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -62,8 +62,9 @@ export const postCreateResponse = async (
 
 		const responseObject: Response = {
 			object: "response",
-			id: chatCompletionResponse.id,
+			id: generateUniqueId("resp"),
 			status: "completed",
+			error: null,
 			instructions: req.body.instructions,
 			model: req.body.model,
 			temperature: req.body.temperature,
@@ -72,7 +73,7 @@ export const postCreateResponse = async (
 			output: chatCompletionResponse.choices[0].message.content
 				? [
 						{
-							id: "msg_" + generateUniqueId(),
+							id: generateUniqueId("msg"),
 							type: "message",
 							role: "assistant",
 							status: "completed",
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
index 66df4c99bd..0037b7ea25 100644
--- a/packages/responses-server/src/schemas.ts
+++ b/packages/responses-server/src/schemas.ts
@@ -78,7 +78,7 @@ export const responseSchema = z.object({
 			code: z.string(),
 			message: z.string(),
 		})
-		.optional(),
+		.nullable(),
 	id: z.string(),
 	status: z.enum(["completed", "failed", "in_progress", "cancelled", "queued", "incomplete"]),
 	// incomplete_details: z.object({ reason: z.string() }).optional(),

From 942fc8e1273b16d082ed5742c5c48e6dfb550127 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Tue, 1 Jul 2025 10:30:44 +0200
Subject: [PATCH 04/12] comments

---
 packages/responses-server/scripts/run-example.js      | 5 +++++
 packages/responses-server/src/lib/generateUniqueId.ts | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/packages/responses-server/scripts/run-example.js b/packages/responses-server/scripts/run-example.js
index f3bcc0d9b0..8175a5de8d 100644
--- a/packages/responses-server/scripts/run-example.js
+++ b/packages/responses-server/scripts/run-example.js
@@ -1,4 +1,9 @@
 #!/usr/bin/env node
+/**
+ * AI-generated file using Cursor + Claude 4
+ *
+ * Run an example script
+ */
 import { spawnSync } from "child_process";
 import fs from "fs";
 import path from "path";
diff --git a/packages/responses-server/src/lib/generateUniqueId.ts b/packages/responses-server/src/lib/generateUniqueId.ts
index 6a708b3592..e27fc6cafe 100644
--- a/packages/responses-server/src/lib/generateUniqueId.ts
+++ b/packages/responses-server/src/lib/generateUniqueId.ts
@@ -1,3 +1,8 @@
+/**
+ * AI-generated file using Cursor + Claude 4
+ *
+ * Generate a unique ID for the response
+ */
 import { randomBytes } from "crypto";
 
 export function generateUniqueId(prefix?: string): string {

From 2090cf7c5b3b680b034416e52d1292420b67a32c Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Tue, 1 Jul 2025 17:20:38 +0200
Subject: [PATCH 05/12] return openai-defined type (#1580)

PR on top of https://github.com/huggingface/huggingface.js/pull/1576.

I'm starting to think that it makes sense to define Zod schemas for
inputs as we need to validate user's inputs. But that for outputs we
"only" need static type checking and therefore we could reuse the types
defined in https://github.com/openai/openai-node.

**Benefits:** no need to redefine stuff manually. It's easy to make
mistakes (a parameter that shouldn't be nullable, that could be an
array, etc.) when translating from the specs to our codebase. If static
typing don't complain then we can assume "it's good".
Also less code to maintain.

**Drawback:** less flexibility. We don't own the stack and things might
get updated in the wild. It's less a problem in this context as it's a
server and not a client (and therefore we manage the dependency
updates).

Overall I do think it's better to import from openai. Since we won't
implement everything at first, it's fine to use `Omit<...,
"key-that-we-dont-implement">` which **explicitly** removes a feature
(better than implicit non-definition)


---

**EDIT:** and it's fine to use them for now and if it's ever blocking in
the future, then we redefine stuff ourselves.
---
 packages/responses-server/package.json        |  2 +-
 packages/responses-server/pnpm-lock.yaml      |  6 +-
 .../responses-server/src/routes/responses.ts  | 12 ++-
 packages/responses-server/src/schemas.ts      | 97 +++----------------
 packages/responses-server/src/server.ts       |  4 +-
 5 files changed, 28 insertions(+), 93 deletions(-)

diff --git a/packages/responses-server/package.json b/packages/responses-server/package.json
index 6523b35772..9e30447382 100644
--- a/packages/responses-server/package.json
+++ b/packages/responses-server/package.json
@@ -53,11 +53,11 @@
 		"@huggingface/inference": "workspace:^",
 		"@huggingface/tasks": "workspace:^",
 		"express": "^4.18.2",
+		"openai": "^5.8.2",
 		"zod": "^3.22.4"
 	},
 	"devDependencies": {
 		"@types/express": "^4.17.21",
-		"openai": "^5.8.2",
 		"tsx": "^4.7.0"
 	}
 }
diff --git a/packages/responses-server/pnpm-lock.yaml b/packages/responses-server/pnpm-lock.yaml
index 7ee51b3a6d..b3c8d02c5a 100644
--- a/packages/responses-server/pnpm-lock.yaml
+++ b/packages/responses-server/pnpm-lock.yaml
@@ -17,6 +17,9 @@ importers:
       express:
         specifier: ^4.18.2
         version: 4.21.2
+      openai:
+        specifier: ^5.8.2
+        version: 5.8.2(zod@3.25.67)
       zod:
         specifier: ^3.22.4
         version: 3.25.67
@@ -24,9 +27,6 @@ importers:
       '@types/express':
         specifier: ^4.17.21
         version: 4.17.23
-      openai:
-        specifier: ^5.8.2
-        version: 5.8.2(zod@3.25.67)
       tsx:
         specifier: ^4.7.0
         version: 4.20.3
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index 68e27980fc..7350b90bf0 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -1,12 +1,14 @@
 import { type Response as ExpressResponse } from "express";
 import { type ValidatedRequest } from "../middleware/validation.js";
-import { type CreateResponse, type Response } from "../schemas.js";
+import { type CreateResponseParams } from "../schemas.js";
 import { generateUniqueId } from "../lib/generateUniqueId.js";
 import { InferenceClient } from "@huggingface/inference";
 import type { ChatCompletionInputMessage, ChatCompletionInputMessageChunkType } from "@huggingface/tasks";
 
+import { type Response as OpenAIResponse } from "openai/resources/responses/responses";
+
 export const postCreateResponse = async (
-	req: ValidatedRequest<CreateResponse>,
+	req: ValidatedRequest<CreateResponseParams>,
 	res: ExpressResponse
 ): Promise<void> => {
 	const apiKey = req.headers.authorization?.split(" ")[1];
@@ -60,7 +62,10 @@ export const postCreateResponse = async (
 			top_p: req.body.top_p,
 		});
 
-		const responseObject: Response = {
+		const responseObject: Omit<
+			OpenAIResponse,
+			"incomplete_details" | "metadata" | "output_text" | "parallel_tool_calls" | "tool_choice" | "tools"
+		> = {
 			object: "response",
 			id: generateUniqueId("resp"),
 			status: "completed",
@@ -81,6 +86,7 @@ export const postCreateResponse = async (
 								{
 									type: "output_text",
 									text: chatCompletionResponse.choices[0].message.content,
+									annotations: [],
 								},
 							],
 						},
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
index 0037b7ea25..4e47301aec 100644
--- a/packages/responses-server/src/schemas.ts
+++ b/packages/responses-server/src/schemas.ts
@@ -4,7 +4,7 @@ import { z } from "zod";
  * https://platform.openai.com/docs/api-reference/responses/create
  * commented out properties are not supported by the server
  */
-export const createResponseSchema = z.object({
+export const createResponseParamsSchema = z.object({
 	// background: z.boolean().default(false),
 	// include:
 	input: z.union([
@@ -22,15 +22,15 @@ export const createResponseSchema = z.object({
 							}),
 							z.object({
 								type: z.literal("input_image"),
-								// file_id: z.string().optional(),
+								// file_id: z.string().nullable(),
 								image_url: z.string(),
 								// detail: z.enum(["auto", "low", "high"]).default("auto"),
 							}),
 							// z.object({
 							// 	type: z.literal("input_file"),
-							// 	file_data: z.string().optional(),
-							// 	file_id: z.string().optional(),
-							// 	filename: z.string().optional(),
+							// 	file_data: z.string().nullable(),
+							// 	file_id: z.string().nullable(),
+							// 	filename: z.string().nullable(),
 							// }),
 						])
 					),
@@ -46,15 +46,15 @@ export const createResponseSchema = z.object({
 			// ])
 		),
 	]),
-	instructions: z.string().optional(),
-	// max_output_tokens: z.number().min(0).optional(),
-	// max_tool_calls: z.number().min(0).optional(),
-	// metadata: z.record(z.string().max(64), z.string().max(512)).optional(), // + 16 items max
+	instructions: z.string().nullable(),
+	// max_output_tokens: z.number().min(0).nullable(),
+	// max_tool_calls: z.number().min(0).nullable(),
+	// metadata: z.record(z.string().max(64), z.string().max(512)).nullable(), // + 16 items max
 	model: z.string(),
-	// previous_response_id: z.string().optional(),
+	// previous_response_id: z.string().nullable(),
 	// reasoning: z.object({
 	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),
-	// 	summary: z.enum(["auto", "concise", "detailed"]).optional(),
+	// 	summary: z.enum(["auto", "concise", "detailed"]).nullable(),
 	// }),
 	// store: z.boolean().default(true),
 	// stream: z.boolean().default(false),
@@ -62,81 +62,10 @@ export const createResponseSchema = z.object({
 	// text:
 	// tool_choice:
 	// tools:
-	// top_logprobs: z.number().min(0).max(20).optional(),
+	// top_logprobs: z.number().min(0).max(20).nullable(),
 	top_p: z.number().min(0).max(1).default(1),
 	// truncation: z.enum(["auto", "disabled"]).default("disabled"),
 	// user
 });
 
-export type CreateResponse = z.infer<typeof createResponseSchema>;
-
-export const responseSchema = z.object({
-	object: z.literal("response"),
-	created_at: z.number(),
-	error: z
-		.object({
-			code: z.string(),
-			message: z.string(),
-		})
-		.nullable(),
-	id: z.string(),
-	status: z.enum(["completed", "failed", "in_progress", "cancelled", "queued", "incomplete"]),
-	// incomplete_details: z.object({ reason: z.string() }).optional(),
-	instructions: z.string().optional(),
-	// max_output_tokens: z.number().min(0).optional(),
-	// max_tool_calls: z.number().min(0).optional(),
-	// metadata: z.record(z.string().max(64), z.string().max(512)).optional(), // + 16 items max
-	model: z.string(),
-	output: z.array(
-		z.object({
-			type: z.enum(["message"]),
-			id: z.string(),
-			status: z.enum(["completed", "failed"]),
-			role: z.enum(["assistant"]),
-			content: z.array(
-				z.union([
-					z.object({
-						type: z.literal("output_text"),
-						text: z.string(),
-						// annotations:
-						// logprobs:
-					}),
-					z.object({
-						type: z.literal("refusal"),
-						refusal: z.string(),
-					}),
-				])
-			),
-		})
-		// in practice, should be a z.union of the following:
-		// File search tool call
-		// Function tool call
-		// Web search tool call
-		// Computer tool call
-		// Reasoning
-		// Image generation call
-		// Code interpreter tool call
-		// Local shell call
-		// MCP tool call
-		// MCP list tools
-		// MCP approval request
-	),
-	// parallel_tool_calls: z.boolean(),
-	// previous_response_id: z.string().optional(),
-	// reasoning: z.object({
-	// 	effort: z.enum(["low", "medium", "high"]).optional(),
-	// 	summary: z.enum(["auto", "concise", "detailed"]).optional(),
-	// }),
-	// store: z.boolean(),
-	temperature: z.number(),
-	// text:
-	// tool_choice:
-	// tools:
-	// top_logprobs: z.number().int().min(0).max(20).optional(),
-	top_p: z.number(),
-	// truncation: z.enum(["auto", "disabled"]).default("disabled"),
-	// usage: ...
-	// user
-});
-
-export type Response = z.infer<typeof responseSchema>;
+export type CreateResponseParams = z.infer<typeof createResponseParamsSchema>;
diff --git a/packages/responses-server/src/server.ts b/packages/responses-server/src/server.ts
index a4fdb6007a..d183b53f24 100644
--- a/packages/responses-server/src/server.ts
+++ b/packages/responses-server/src/server.ts
@@ -1,5 +1,5 @@
 import express, { type Express } from "express";
-import { createResponseSchema } from "./schemas.js";
+import { createResponseParamsSchema } from "./schemas.js";
 import { validateBody } from "./middleware/validation.js";
 import { requestLogger } from "./middleware/logging.js";
 import { postCreateResponse } from "./routes/index.js";
@@ -16,7 +16,7 @@ export const createApp = (): Express => {
 		res.send("hello world");
 	});
 
-	app.post("/v1/responses", validateBody(createResponseSchema), postCreateResponse);
+	app.post("/v1/responses", validateBody(createResponseParamsSchema), postCreateResponse);
 
 	return app;
 };

From e47b3f9fc4baff528b6e4715c63e4615b0597779 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Wed, 2 Jul 2025 10:09:10 +0200
Subject: [PATCH 06/12] [ResponsesAPI] Implement streaming mode (#1582)

Built on top of https://github.com/huggingface/huggingface.js/pull/1576.

This PR adds support for streaming mode to the Responses API.

Tested it using the
[openai-responses-starter-app](https://github.com/openai/openai-responses-starter-app):

[Screencast from 02-07-2025
07:43:52.webm](https://github.com/user-attachments/assets/6eb77c9c-5796-4841-af55-f526da8da847)


```
pnpm run example streaming
```

```js
{
  type: 'response.created',
  response: {
    object: 'response',
    id: 'resp_861131785bfb75f24f944aa7cbc4767b194a2ea320cff258',
    status: 'in_progress',
    error: null,
    instructions: null,
    model: 'Qwen/Qwen2.5-VL-7B-Instruct',
    temperature: 1,
    top_p: 1,
    created_at: 1751383702199,
    output: []
  },
  sequence_number: 0
}
{
  type: 'response.in_progress',
  response: {
    object: 'response',
    id: 'resp_861131785bfb75f24f944aa7cbc4767b194a2ea320cff258',
    status: 'in_progress',
    error: null,
    instructions: null,
    model: 'Qwen/Qwen2.5-VL-7B-Instruct',
    temperature: 1,
    top_p: 1,
    created_at: 1751383702199,
    output: []
  },
  sequence_number: 1
}
{
  type: 'response.output_item.added',
  output_index: 0,
  item: {
    id: 'msg_def4b731a2654f7eab4fb2efdff217079da37154709c0f0b',
    type: 'message',
    role: 'assistant',
    status: 'in_progress',
    content: []
  },
  sequence_number: 2
}
{
  type: 'response.content_part.added',
  item_id: 'msg_def4b731a2654f7eab4fb2efdff217079da37154709c0f0b',
  output_index: 0,
  content_index: 0,
  part: { type: 'output_text', text: '', annotations: [] },
  sequence_number: 3
}
{
  type: 'response.output_text.delta',
  item_id: 'msg_def4b731a2654f7eab4fb2efdff217079da37154709c0f0b',
  output_index: 0,
  content_index: 0,
  delta: 'Double',
  sequence_number: 4
}
{
  type: 'response.output_text.delta',
  item_id: 'msg_def4b731a2654f7eab4fb2efdff217079da37154709c0f0b',
  output_index: 0,
  content_index: 0,
  delta: ' bubble',
  sequence_number: 5
}

...

{
  type: 'response.output_text.delta',
  item_id: 'msg_def4b731a2654f7eab4fb2efdff217079da37154709c0f0b',
  output_index: 0,
  content_index: 0,
  delta: '!',
  sequence_number: 43
}
{
  type: 'response.output_text.done',
  item_id: 'msg_def4b731a2654f7eab4fb2efdff217079da37154709c0f0b',
  output_index: 0,
  content_index: 0,
  text: 'Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath!',
  sequence_number: 44
}
{
  type: 'response.content_part.done',
  item_id: 'msg_def4b731a2654f7eab4fb2efdff217079da37154709c0f0b',
  output_index: 0,
  content_index: 0,
  part: {
    type: 'output_text',
    text: 'Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath! Double bubble bath!',
    annotations: []
  },
  sequence_number: 45
}
{
  type: 'response.output_item.done',
  output_index: 0,
  item: {
    id: 'msg_def4b731a2654f7eab4fb2efdff217079da37154709c0f0b',
    type: 'message',
    role: 'assistant',
    status: 'completed',
    content: [ [Object] ]
  },
  sequence_number: 46
}
{
  type: 'response.completed',
  response: {
    object: 'response',
    id: 'resp_861131785bfb75f24f944aa7cbc4767b194a2ea320cff258',
    status: 'completed',
    error: null,
    instructions: null,
    model: 'Qwen/Qwen2.5-VL-7B-Instruct',
    temperature: 1,
    top_p: 1,
    created_at: 1751383702199,
    output: [ [Object] ]
  },
  sequence_number: 47
}
```
---
 .../responses-server/examples/streaming.js    |  17 ++
 .../responses-server/src/routes/responses.ts  | 248 ++++++++++++++----
 packages/responses-server/src/schemas.ts      |  94 ++++---
 3 files changed, 278 insertions(+), 81 deletions(-)
 create mode 100644 packages/responses-server/examples/streaming.js

diff --git a/packages/responses-server/examples/streaming.js b/packages/responses-server/examples/streaming.js
new file mode 100644
index 0000000000..2d342d67de
--- /dev/null
+++ b/packages/responses-server/examples/streaming.js
@@ -0,0 +1,17 @@
+import { OpenAI } from "openai";
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const stream = await openai.responses.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	input: [
+		{
+			role: "user",
+			content: "Say 'double bubble bath' ten times fast.",
+		},
+	],
+	stream: true,
+});
+
+for await (const event of stream) {
+	console.log(event);
+}
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index 7350b90bf0..ce3181545f 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -5,7 +5,12 @@ import { generateUniqueId } from "../lib/generateUniqueId.js";
 import { InferenceClient } from "@huggingface/inference";
 import type { ChatCompletionInputMessage, ChatCompletionInputMessageChunkType } from "@huggingface/tasks";
 
-import { type Response as OpenAIResponse } from "openai/resources/responses/responses";
+import type {
+	Response,
+	ResponseStreamEvent,
+	ResponseOutputItem,
+	ResponseContentPartAddedEvent,
+} from "openai/resources/responses/responses";
 
 export const postCreateResponse = async (
 	req: ValidatedRequest<CreateResponseParams>,
@@ -33,27 +38,189 @@ export const postCreateResponse = async (
 				content:
 					typeof item.content === "string"
 						? item.content
-						: item.content.map((content) => {
-								if (content.type === "input_image") {
-									return {
-										type: "image_url" as ChatCompletionInputMessageChunkType,
-										image_url: {
-											url: content.image_url,
-										},
-									};
-								}
-								// content.type must be "input_text" at this point
-								return {
-									type: "text" as ChatCompletionInputMessageChunkType,
-									text: content.text,
-								};
-						  }),
+						: item.content
+								.map((content) => {
+									switch (content.type) {
+										case "input_image":
+											return {
+												type: "image_url" as ChatCompletionInputMessageChunkType,
+												image_url: {
+													url: content.image_url,
+												},
+											};
+										case "output_text":
+											return {
+												type: "text" as ChatCompletionInputMessageChunkType,
+												text: content.text,
+											};
+										case "refusal":
+											return undefined;
+										case "input_text":
+											return {
+												type: "text" as ChatCompletionInputMessageChunkType,
+												text: content.text,
+											};
+									}
+								})
+								.filter((item) => item !== undefined),
 			}))
 		);
 	} else {
 		messages.push({ role: "user", content: req.body.input });
 	}
 
+	const payload = {
+		model: req.body.model,
+		messages: messages,
+		temperature: req.body.temperature,
+		top_p: req.body.top_p,
+		stream: req.body.stream,
+	};
+
+	const responseObject: Omit<
+		Response,
+		"incomplete_details" | "metadata" | "output_text" | "parallel_tool_calls" | "tool_choice" | "tools"
+	> = {
+		object: "response",
+		id: generateUniqueId("resp"),
+		status: "in_progress",
+		error: null,
+		instructions: req.body.instructions,
+		model: req.body.model,
+		temperature: req.body.temperature,
+		top_p: req.body.top_p,
+		created_at: new Date().getTime(),
+		output: [],
+	};
+
+	if (req.body.stream) {
+		res.setHeader("Content-Type", "text/event-stream");
+		res.setHeader("Connection", "keep-alive");
+		let sequenceNumber = 0;
+
+		// Emit events in sequence
+		const emitEvent = (event: ResponseStreamEvent) => {
+			res.write(`data: ${JSON.stringify(event)}\n\n`);
+		};
+
+		try {
+			// Response created event
+			emitEvent({
+				type: "response.created",
+				response: responseObject as Response,
+				sequence_number: sequenceNumber++,
+			});
+
+			// Response in progress event
+			emitEvent({
+				type: "response.in_progress",
+				response: responseObject as Response,
+				sequence_number: sequenceNumber++,
+			});
+
+			const stream = client.chatCompletionStream(payload);
+
+			const outputObject: ResponseOutputItem = {
+				id: generateUniqueId("msg"),
+				type: "message",
+				role: "assistant",
+				status: "in_progress",
+				content: [],
+			};
+			responseObject.output = [outputObject];
+
+			// Response output item added event
+			emitEvent({
+				type: "response.output_item.added",
+				output_index: 0,
+				item: outputObject,
+				sequence_number: sequenceNumber++,
+			});
+
+			// Response content part added event
+			const contentPart: ResponseContentPartAddedEvent["part"] = {
+				type: "output_text",
+				text: "",
+				annotations: [],
+			};
+			outputObject.content.push(contentPart);
+
+			emitEvent({
+				type: "response.content_part.added",
+				item_id: outputObject.id,
+				output_index: 0,
+				content_index: 0,
+				part: contentPart,
+				sequence_number: sequenceNumber++,
+			});
+
+			for await (const chunk of stream) {
+				if (chunk.choices[0].delta.content) {
+					contentPart.text += chunk.choices[0].delta.content;
+
+					// Response output text delta event
+					emitEvent({
+						type: "response.output_text.delta",
+						item_id: outputObject.id,
+						output_index: 0,
+						content_index: 0,
+						delta: chunk.choices[0].delta.content,
+						sequence_number: sequenceNumber++,
+					});
+				}
+			}
+
+			// Response output text done event
+			emitEvent({
+				type: "response.output_text.done",
+				item_id: outputObject.id,
+				output_index: 0,
+				content_index: 0,
+				text: contentPart.text,
+				sequence_number: sequenceNumber++,
+			});
+
+			// Response content part done event
+			emitEvent({
+				type: "response.content_part.done",
+				item_id: outputObject.id,
+				output_index: 0,
+				content_index: 0,
+				part: contentPart,
+				sequence_number: sequenceNumber++,
+			});
+
+			// Response output item done event
+			outputObject.status = "completed";
+			emitEvent({
+				type: "response.output_item.done",
+				output_index: 0,
+				item: outputObject,
+				sequence_number: sequenceNumber++,
+			});
+
+			// Response completed event
+			responseObject.status = "completed";
+			emitEvent({
+				type: "response.completed",
+				response: responseObject as Response,
+				sequence_number: sequenceNumber++,
+			});
+		} catch (streamError: any) {
+			console.error("Error in streaming chat completion:", streamError);
+
+			emitEvent({
+				type: "error",
+				code: null,
+				message: streamError.message || "An error occurred while streaming from inference server.",
+				param: null,
+				sequence_number: sequenceNumber++,
+			});
+		}
+		res.end();
+		return;
+	}
+
 	try {
 		const chatCompletionResponse = await client.chatCompletion({
 			model: req.body.model,
@@ -62,37 +229,24 @@ export const postCreateResponse = async (
 			top_p: req.body.top_p,
 		});
 
-		const responseObject: Omit<
-			OpenAIResponse,
-			"incomplete_details" | "metadata" | "output_text" | "parallel_tool_calls" | "tool_choice" | "tools"
-		> = {
-			object: "response",
-			id: generateUniqueId("resp"),
-			status: "completed",
-			error: null,
-			instructions: req.body.instructions,
-			model: req.body.model,
-			temperature: req.body.temperature,
-			top_p: req.body.top_p,
-			created_at: chatCompletionResponse.created,
-			output: chatCompletionResponse.choices[0].message.content
-				? [
-						{
-							id: generateUniqueId("msg"),
-							type: "message",
-							role: "assistant",
-							status: "completed",
-							content: [
-								{
-									type: "output_text",
-									text: chatCompletionResponse.choices[0].message.content,
-									annotations: [],
-								},
-							],
-						},
-				  ]
-				: [],
-		};
+		responseObject.status = "completed";
+		responseObject.output = chatCompletionResponse.choices[0].message.content
+			? [
+					{
+						id: generateUniqueId("msg"),
+						type: "message",
+						role: "assistant",
+						status: "completed",
+						content: [
+							{
+								type: "output_text",
+								text: chatCompletionResponse.choices[0].message.content,
+								annotations: [],
+							},
+						],
+					},
+			  ]
+			: [];
 
 		res.json(responseObject);
 	} catch (error) {
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
index 4e47301aec..c1c8509257 100644
--- a/packages/responses-server/src/schemas.ts
+++ b/packages/responses-server/src/schemas.ts
@@ -4,65 +4,91 @@ import { z } from "zod";
  * https://platform.openai.com/docs/api-reference/responses/create
  * commented out properties are not supported by the server
  */
+
+const inputContentSchema = z.array(
+	z.union([
+		z.object({
+			type: z.literal("input_text"),
+			text: z.string(),
+		}),
+		z.object({
+			type: z.literal("input_image"),
+			// file_id: z.string().nullable().default(null),
+			image_url: z.string(),
+			// detail: z.enum(["auto", "low", "high"]).default("auto"),
+		}),
+		// z.object({
+		// 	type: z.literal("input_file"),
+		// 	file_data: z.string().nullable().default(null),
+		// 	file_id: z.string().nullable().default(null),
+		// 	filename: z.string().nullable().default(null),
+		// }),
+	])
+);
+
 export const createResponseParamsSchema = z.object({
 	// background: z.boolean().default(false),
 	// include:
 	input: z.union([
 		z.string(),
 		z.array(
-			// z.union([
-			z.object({
-				content: z.union([
-					z.string(),
-					z.array(
+			z.union([
+				z.object({
+					content: z.union([z.string(), inputContentSchema]),
+					role: z.enum(["user", "assistant", "system", "developer"]),
+					type: z.enum(["message"]).default("message"),
+				}),
+				z.object({
+					role: z.enum(["user", "system", "developer"]),
+					status: z.enum(["in_progress", "completed", "incomplete"]).nullable().default(null),
+					content: inputContentSchema,
+					type: z.enum(["message"]).default("message"),
+				}),
+				z.object({
+					id: z.string().optional(),
+					role: z.enum(["assistant"]),
+					status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
+					type: z.enum(["message"]).default("message"),
+					content: z.array(
 						z.union([
 							z.object({
-								type: z.literal("input_text"),
+								type: z.literal("output_text"),
 								text: z.string(),
+								annotations: z.array(z.object({})).optional(), // TODO: incomplete
+								logprobs: z.array(z.object({})).optional(), // TODO: incomplete
 							}),
 							z.object({
-								type: z.literal("input_image"),
-								// file_id: z.string().nullable(),
-								image_url: z.string(),
-								// detail: z.enum(["auto", "low", "high"]).default("auto"),
+								type: z.literal("refusal"),
+								refusal: z.string(),
 							}),
-							// z.object({
-							// 	type: z.literal("input_file"),
-							// 	file_data: z.string().nullable(),
-							// 	file_id: z.string().nullable(),
-							// 	filename: z.string().nullable(),
-							// }),
+							// TODO: much more objects: File search tool call, Computer tool call, Computer tool call output, Web search tool call, Function tool call, Function tool call output, Reasoning, Image generation call, Code interpreter tool call, Local shell call, Local shell call output, MCP list tools, MCP approval request, MCP approval response, MCP tool call
 						])
 					),
-				]),
-				role: z.enum(["user", "assistant", "system", "developer"]),
-				type: z.enum(["message"]).default("message"),
-			})
-			// z.object({}), // An item representing part of the context for the response to be generated by the model
-			// z.object({
-			// 	id: z.string(),
-			// 	type: z.enum(["item_reference"]).default("item_reference"),
-			// }),
-			// ])
+				}),
+				// z.object({
+				// 	id: z.string(),
+				// 	type: z.enum(["item_reference"]).default("item_reference"),
+				// }),
+			])
 		),
 	]),
-	instructions: z.string().nullable(),
-	// max_output_tokens: z.number().min(0).nullable(),
-	// max_tool_calls: z.number().min(0).nullable(),
-	// metadata: z.record(z.string().max(64), z.string().max(512)).nullable(), // + 16 items max
+	instructions: z.string().nullable().default(null),
+	// max_output_tokens: z.number().min(0).nullable().default(null),
+	// max_tool_calls: z.number().min(0).nullable().default(null),
+	// metadata: z.record(z.string().max(64), z.string().max(512)).nullable().default(null), // + 16 items max
 	model: z.string(),
-	// previous_response_id: z.string().nullable(),
+	// previous_response_id: z.string().nullable().default(null),
 	// reasoning: z.object({
 	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),
-	// 	summary: z.enum(["auto", "concise", "detailed"]).nullable(),
+	// 	summary: z.enum(["auto", "concise", "detailed"]).nullable().default(null),
 	// }),
 	// store: z.boolean().default(true),
-	// stream: z.boolean().default(false),
+	stream: z.boolean().default(false),
 	temperature: z.number().min(0).max(2).default(1),
 	// text:
 	// tool_choice:
 	// tools:
-	// top_logprobs: z.number().min(0).max(20).nullable(),
+	// top_logprobs: z.number().min(0).max(20).nullable().default(null),
 	top_p: z.number().min(0).max(1).default(1),
 	// truncation: z.enum(["auto", "disabled"]).default("disabled"),
 	// user

From eb8082ae543ebe571c090125c22d650ea656799e Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 2 Jul 2025 10:14:54 +0200
Subject: [PATCH 07/12] Add metadata support

---
 packages/responses-server/src/routes/responses.ts | 3 ++-
 packages/responses-server/src/schemas.ts          | 8 +++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index ce3181545f..7e9a82a61f 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -79,7 +79,7 @@ export const postCreateResponse = async (
 
 	const responseObject: Omit<
 		Response,
-		"incomplete_details" | "metadata" | "output_text" | "parallel_tool_calls" | "tool_choice" | "tools"
+		"incomplete_details" | "output_text" | "parallel_tool_calls" | "tool_choice" | "tools"
 	> = {
 		object: "response",
 		id: generateUniqueId("resp"),
@@ -91,6 +91,7 @@ export const postCreateResponse = async (
 		top_p: req.body.top_p,
 		created_at: new Date().getTime(),
 		output: [],
+		metadata: req.body.metadata,
 	};
 
 	if (req.body.stream) {
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
index c1c8509257..601fa018a4 100644
--- a/packages/responses-server/src/schemas.ts
+++ b/packages/responses-server/src/schemas.ts
@@ -75,7 +75,13 @@ export const createResponseParamsSchema = z.object({
 	instructions: z.string().nullable().default(null),
 	// max_output_tokens: z.number().min(0).nullable().default(null),
 	// max_tool_calls: z.number().min(0).nullable().default(null),
-	// metadata: z.record(z.string().max(64), z.string().max(512)).nullable().default(null), // + 16 items max
+	metadata: z
+		.record(z.string().max(64), z.string().max(512))
+		.refine((val) => Object.keys(val).length <= 16, {
+			message: "Must have at most 16 items",
+		})
+		.nullable()
+		.default(null),
 	model: z.string(),
 	// previous_response_id: z.string().nullable().default(null),
 	// reasoning: z.object({

From 69f0cd044fb33b55b88dea2054ebcff4530f7d14 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 2 Jul 2025 10:45:59 +0200
Subject: [PATCH 08/12] Support max_output_tokens

---
 packages/responses-server/src/routes/responses.ts | 14 ++++++++------
 packages/responses-server/src/schemas.ts          |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index 7e9a82a61f..40350ee770 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -72,6 +72,7 @@ export const postCreateResponse = async (
 	const payload = {
 		model: req.body.model,
 		messages: messages,
+		max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
 		temperature: req.body.temperature,
 		top_p: req.body.top_p,
 		stream: req.body.stream,
@@ -81,17 +82,18 @@ export const postCreateResponse = async (
 		Response,
 		"incomplete_details" | "output_text" | "parallel_tool_calls" | "tool_choice" | "tools"
 	> = {
-		object: "response",
-		id: generateUniqueId("resp"),
-		status: "in_progress",
+		created_at: new Date().getTime(),
 		error: null,
+		id: generateUniqueId("resp"),
 		instructions: req.body.instructions,
+		max_output_tokens: req.body.max_output_tokens,
+		metadata: req.body.metadata,
 		model: req.body.model,
+		object: "response",
+		output: [],
+		status: "in_progress",
 		temperature: req.body.temperature,
 		top_p: req.body.top_p,
-		created_at: new Date().getTime(),
-		output: [],
-		metadata: req.body.metadata,
 	};
 
 	if (req.body.stream) {
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
index 601fa018a4..0439e68509 100644
--- a/packages/responses-server/src/schemas.ts
+++ b/packages/responses-server/src/schemas.ts
@@ -73,7 +73,7 @@ export const createResponseParamsSchema = z.object({
 		),
 	]),
 	instructions: z.string().nullable().default(null),
-	// max_output_tokens: z.number().min(0).nullable().default(null),
+	max_output_tokens: z.number().min(0).nullable().default(null),
 	// max_tool_calls: z.number().min(0).nullable().default(null),
 	metadata: z
 		.record(z.string().max(64), z.string().max(512))

From 9c06344a87f3348417cc2645b69622d12b5d588c Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Wed, 2 Jul 2025 12:17:34 +0200
Subject: [PATCH 09/12] [Responses API] Structured output (#1586)

Built on top of https://github.com/huggingface/huggingface.js/pull/1576.

Based on https://platform.openai.com/docs/guides/structured-outputs

Works both with and without streaming.

## Non-stream

**Run**
```bash
pnpm run example structured_output
```

(which core logic is:)
```js
(...)
const response = await openai.responses.parse({
	model: "Qwen/Qwen2.5-VL-72B-Instruct",
	provider: "nebius",
	input: [
		{
			role: "system",
			content: "You are a helpful math tutor. Guide the user through the solution step by step.",
		},
		{ role: "user", content: "how can I solve 8x + 7 = -23" },
	],
	text: {
		format: zodTextFormat(MathReasoning, "math_reasoning"),
	},
});
(...)
```

**Output:**
```js
{
  steps: [
    {
      explanation: 'To solve for x, we need to isolate it on one side of the equation. We start by subtracting 7 from both sides of the equation.',
      output: '8x + 7 - 7 = -23 - 7'
    },
    {
      explanation: 'Simplify the equation after performing the subtraction.',
      output: '8x = -30'
    },
    {
      explanation: 'Now that we have isolated the term with x, we divide both sides by 8 to get x by itself.',
      output: '8x / 8 = -30 / 8'
    },
    {
      explanation: 'Perform the division to find the value of x.',
      output: 'x = -30 / 8'
    },
    {
      explanation: 'Simplify the fraction if possible.',
      output: 'x = -15 / 4'
    }
  ],
  final_answer: 'The solution is x = -15/4 or x = -3.75.'
}
```

## Stream

**Run**
```bash
pnpm run example structured_output_streaming
```

(which core logic is:)

```js
const stream = openai.responses
	.stream({
		model: "Qwen/Qwen2.5-VL-72B-Instruct",
		provider: "nebius",
		instructions: "Extract the event information.",
		input: "Alice and Bob are going to a science fair on Friday.",
		text: {
			format: zodTextFormat(CalendarEvent, "calendar_event"),
		},
	})
	.on("response.refusal.delta", (event) => {
		process.stdout.write(event.delta);
	})
	.on("response.output_text.delta", (event) => {
		process.stdout.write(event.delta);
	})
	.on("response.output_text.done", () => {
		process.stdout.write("\n");
	})
	.on("response.error", (event) => {
		console.error(event.error);
	});

const result = await stream.finalResponse();
console.log(result.output_parsed);
```

**Output:**

```js
{
  "name": "Science Fair",
  "date": "Friday",
  "participants": ["Alice", "Bob"]
}
{
  name: 'Science Fair',
  date: 'Friday',
  participants: [ 'Alice', 'Bob' ]
}
```
---
 .../examples/structured_output.js             | 32 +++++++++++++++++
 .../examples/structured_output_streaming.js   | 36 +++++++++++++++++++
 .../responses-server/src/routes/responses.ts  | 30 +++++++++++-----
 packages/responses-server/src/schemas.ts      | 24 ++++++++++++-
 4 files changed, 113 insertions(+), 9 deletions(-)
 create mode 100644 packages/responses-server/examples/structured_output.js
 create mode 100644 packages/responses-server/examples/structured_output_streaming.js

diff --git a/packages/responses-server/examples/structured_output.js b/packages/responses-server/examples/structured_output.js
new file mode 100644
index 0000000000..e1496b2006
--- /dev/null
+++ b/packages/responses-server/examples/structured_output.js
@@ -0,0 +1,32 @@
+import OpenAI from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const Step = z.object({
+	explanation: z.string(),
+	output: z.string(),
+});
+
+const MathReasoning = z.object({
+	steps: z.array(Step),
+	final_answer: z.string(),
+});
+
+const response = await openai.responses.parse({
+	model: "Qwen/Qwen2.5-VL-72B-Instruct",
+	provider: "nebius",
+	input: [
+		{
+			role: "system",
+			content: "You are a helpful math tutor. Guide the user through the solution step by step.",
+		},
+		{ role: "user", content: "how can I solve 8x + 7 = -23" },
+	],
+	text: {
+		format: zodTextFormat(MathReasoning, "math_reasoning"),
+	},
+});
+
+console.log(response.output_parsed);
diff --git a/packages/responses-server/examples/structured_output_streaming.js b/packages/responses-server/examples/structured_output_streaming.js
new file mode 100644
index 0000000000..bdd8c1cf1e
--- /dev/null
+++ b/packages/responses-server/examples/structured_output_streaming.js
@@ -0,0 +1,36 @@
+import { OpenAI } from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+
+const CalendarEvent = z.object({
+	name: z.string(),
+	date: z.string(),
+	participants: z.array(z.string()),
+});
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+const stream = openai.responses
+	.stream({
+		model: "Qwen/Qwen2.5-VL-72B-Instruct",
+		provider: "nebius",
+		instructions: "Extract the event information.",
+		input: "Alice and Bob are going to a science fair on Friday.",
+		text: {
+			format: zodTextFormat(CalendarEvent, "calendar_event"),
+		},
+	})
+	.on("response.refusal.delta", (event) => {
+		process.stdout.write(event.delta);
+	})
+	.on("response.output_text.delta", (event) => {
+		process.stdout.write(event.delta);
+	})
+	.on("response.output_text.done", () => {
+		process.stdout.write("\n");
+	})
+	.on("response.error", (event) => {
+		console.error(event.error);
+	});
+
+const result = await stream.finalResponse();
+console.log(result.output_parsed);
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index 40350ee770..663383df25 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -3,7 +3,11 @@ import { type ValidatedRequest } from "../middleware/validation.js";
 import { type CreateResponseParams } from "../schemas.js";
 import { generateUniqueId } from "../lib/generateUniqueId.js";
 import { InferenceClient } from "@huggingface/inference";
-import type { ChatCompletionInputMessage, ChatCompletionInputMessageChunkType } from "@huggingface/tasks";
+import type {
+	ChatCompletionInputMessage,
+	ChatCompletionInputMessageChunkType,
+	ChatCompletionInput,
+} from "@huggingface/tasks";
 
 import type {
 	Response,
@@ -69,13 +73,28 @@ export const postCreateResponse = async (
 		messages.push({ role: "user", content: req.body.input });
 	}
 
-	const payload = {
+	const payload: ChatCompletionInput = {
 		model: req.body.model,
+		provider: req.body.provider,
 		messages: messages,
 		max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
 		temperature: req.body.temperature,
 		top_p: req.body.top_p,
 		stream: req.body.stream,
+		response_format: req.body.text?.format
+			? {
+					type: req.body.text.format.type,
+					json_schema:
+						req.body.text.format.type === "json_schema"
+							? {
+									description: req.body.text.format.description,
+									name: req.body.text.format.name,
+									schema: req.body.text.format.schema,
+									strict: req.body.text.format.strict,
+							  }
+							: undefined,
+			  }
+			: undefined,
 	};
 
 	const responseObject: Omit<
@@ -225,12 +244,7 @@ export const postCreateResponse = async (
 	}
 
 	try {
-		const chatCompletionResponse = await client.chatCompletion({
-			model: req.body.model,
-			messages: messages,
-			temperature: req.body.temperature,
-			top_p: req.body.top_p,
-		});
+		const chatCompletionResponse = await client.chatCompletion(payload);
 
 		responseObject.status = "completed";
 		responseObject.output = chatCompletionResponse.choices[0].message.content
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
index 0439e68509..65b437c671 100644
--- a/packages/responses-server/src/schemas.ts
+++ b/packages/responses-server/src/schemas.ts
@@ -83,6 +83,7 @@ export const createResponseParamsSchema = z.object({
 		.nullable()
 		.default(null),
 	model: z.string(),
+	provider: z.string().optional(),
 	// previous_response_id: z.string().nullable().default(null),
 	// reasoning: z.object({
 	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),
@@ -91,7 +92,28 @@ export const createResponseParamsSchema = z.object({
 	// store: z.boolean().default(true),
 	stream: z.boolean().default(false),
 	temperature: z.number().min(0).max(2).default(1),
-	// text:
+	text: z
+		.object({
+			format: z.union([
+				z.object({
+					type: z.literal("text"),
+				}),
+				z.object({
+					type: z.literal("json_object"),
+				}),
+				z.object({
+					type: z.literal("json_schema"),
+					name: z
+						.string()
+						.max(64, "Must be at most 64 characters")
+						.regex(/^[a-zA-Z0-9_-]+$/, "Only letters, numbers, underscores, and dashes are allowed"),
+					description: z.string().optional(),
+					schema: z.record(z.any()),
+					strict: z.boolean().default(false),
+				}),
+			]),
+		})
+		.optional(),
 	// tool_choice:
 	// tools:
 	// top_logprobs: z.number().min(0).max(20).nullable().default(null),

From 3818ce874cf8b6fab21f8a49a5b4a8842424616b Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Wed, 2 Jul 2025 16:52:01 +0200
Subject: [PATCH 10/12] [Responses API] Function calling (#1587)

Built on top of https://github.com/huggingface/huggingface.js/pull/1576.

Based on https://platform.openai.com/docs/api-reference/responses/create
and
https://platform.openai.com/docs/guides/function-calling?api-mode=responses#streaming

Works both with and without streaming.

**Note:** implementation starts to be a completely messy, especially in
streaming mode. Complexity increases as we add new event types. I do
think a refactoring would be beneficial e.g. with an internal state
object that keeps track of the current state and "knows" what to emit
and when (typically to emit the "done"/"completed" events each time a
new output/content is generated). Food for thoughts for a future PR.

## Non-stream

**Run**
```bash
pnpm run example function
```

**Output**

```js
{
  created_at: 1751467285177,
  error: null,
  id: 'resp_0b2ab98168a9813e0f7373f940221da4ef3211f43c9faac8',
  instructions: null,
  max_output_tokens: null,
  metadata: null,
  model: 'meta-llama/Llama-3.3-70B-Instruct',
  object: 'response',
  output: [
    {
      type: 'function_call',
      id: 'fc_f40ac964165602e2fcb2f955777acff8c4b9359d49eaf79b',
      call_id: '9cd167c7f',
      name: 'get_current_weather',
      arguments: '{"location": "Boston, MA", "unit": "fahrenheit"}',
      status: 'completed'
    }
  ],
  status: 'completed',
  tool_choice: 'auto',
  tools: [
    {
      name: 'get_current_weather',
      parameters: [Object],
      strict: true,
      type: 'function',
      description: 'Get the current weather in a given location'
    }
  ],
  temperature: 1,
  top_p: 1,
  output_text: ''
}
```

## Stream

**Run:**

```
pnpm run example function_streaming
```

**Output:**


```js
{
  type: 'response.created',
  response: {
    created_at: 1751467334073,
    error: null,
    id: 'resp_8d86745178f2b9fc0da000156655956181c76a7701712a05',
    instructions: null,
    max_output_tokens: null,
    metadata: null,
    model: 'meta-llama/Llama-3.3-70B-Instruct',
    object: 'response',
    output: [],
    status: 'in_progress',
    tool_choice: 'auto',
    tools: [ [Object] ],
    temperature: 1,
    top_p: 1
  },
  sequence_number: 0
}
{
  type: 'response.in_progress',
  response: {
    created_at: 1751467334073,
    error: null,
    id: 'resp_8d86745178f2b9fc0da000156655956181c76a7701712a05',
    instructions: null,
    max_output_tokens: null,
    metadata: null,
    model: 'meta-llama/Llama-3.3-70B-Instruct',
    object: 'response',
    output: [],
    status: 'in_progress',
    tool_choice: 'auto',
    tools: [ [Object] ],
    temperature: 1,
    top_p: 1
  },
  sequence_number: 1
}
{
  type: 'response.output_item.added',
  output_index: 0,
  item: {
    type: 'function_call',
    id: 'fc_9bdc8945b9cb6c95c5c248db4203f0707ba9fd338dee2454',
    call_id: '83a9d4baf',
    name: 'get_weather',
    arguments: ''
  },
  sequence_number: 2
}
{
  type: 'response.function_call_arguments.delta',
  item_id: 'fc_9bdc8945b9cb6c95c5c248db4203f0707ba9fd338dee2454',
  output_index: 0,
  delta: '{"latitude": 48.8567, "longitude": 2.3508}',
  sequence_number: 3
}
{
  type: 'response.function_call_arguments.done',
  item_id: 'fc_9bdc8945b9cb6c95c5c248db4203f0707ba9fd338dee2454',
  output_index: 0,
  arguments: '{"latitude": 48.8567, "longitude": 2.3508}',
  sequence_number: 4
}
{
  type: 'response.output_item.done',
  output_index: 0,
  item: {
    type: 'function_call',
    id: 'fc_9bdc8945b9cb6c95c5c248db4203f0707ba9fd338dee2454',
    call_id: '83a9d4baf',
    name: 'get_weather',
    arguments: '{"latitude": 48.8567, "longitude": 2.3508}',
    status: 'completed'
  },
  sequence_number: 5
}
{
  type: 'response.completed',
  response: {
    created_at: 1751467334073,
    error: null,
    id: 'resp_8d86745178f2b9fc0da000156655956181c76a7701712a05',
    instructions: null,
    max_output_tokens: null,
    metadata: null,
    model: 'meta-llama/Llama-3.3-70B-Instruct',
    object: 'response',
    output: [ [Object] ],
    status: 'completed',
    tool_choice: 'auto',
    tools: [ [Object] ],
    temperature: 1,
    top_p: 1
  },
  sequence_number: 6
}
```
---
 .../responses-server/examples/function.js     |  32 ++
 .../examples/function_streaming.js            |  33 ++
 .../responses-server/src/routes/responses.ts  | 285 +++++++++++++-----
 packages/responses-server/src/schemas.ts      |  24 +-
 4 files changed, 301 insertions(+), 73 deletions(-)
 create mode 100644 packages/responses-server/examples/function.js
 create mode 100644 packages/responses-server/examples/function_streaming.js

diff --git a/packages/responses-server/examples/function.js b/packages/responses-server/examples/function.js
new file mode 100644
index 0000000000..26893d5449
--- /dev/null
+++ b/packages/responses-server/examples/function.js
@@ -0,0 +1,32 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const tools = [
+	{
+		type: "function",
+		name: "get_current_weather",
+		description: "Get the current weather in a given location",
+		parameters: {
+			type: "object",
+			properties: {
+				location: {
+					type: "string",
+					description: "The city and state, e.g. San Francisco, CA",
+				},
+				unit: { type: "string", enum: ["celsius", "fahrenheit"] },
+			},
+			required: ["location", "unit"],
+		},
+	},
+];
+
+const response = await openai.responses.create({
+	model: "meta-llama/Llama-3.3-70B-Instruct",
+	provider: "cerebras",
+	tools: tools,
+	input: "What is the weather like in Boston today?",
+	tool_choice: "auto",
+});
+
+console.log(response);
diff --git a/packages/responses-server/examples/function_streaming.js b/packages/responses-server/examples/function_streaming.js
new file mode 100644
index 0000000000..3c6d557ef0
--- /dev/null
+++ b/packages/responses-server/examples/function_streaming.js
@@ -0,0 +1,33 @@
+import { OpenAI } from "openai";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const tools = [
+	{
+		type: "function",
+		name: "get_weather",
+		description: "Get current temperature for provided coordinates in celsius.",
+		parameters: {
+			type: "object",
+			properties: {
+				latitude: { type: "number" },
+				longitude: { type: "number" },
+			},
+			required: ["latitude", "longitude"],
+			additionalProperties: false,
+		},
+		strict: true,
+	},
+];
+
+const stream = await openai.responses.create({
+	model: "meta-llama/Llama-3.3-70B-Instruct",
+	provider: "cerebras",
+	input: [{ role: "user", content: "What's the weather like in Paris today?" }],
+	tools,
+	stream: true,
+});
+
+for await (const event of stream) {
+	console.log(event);
+}
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index 663383df25..bccc4146e2 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -12,10 +12,18 @@ import type {
 import type {
 	Response,
 	ResponseStreamEvent,
-	ResponseOutputItem,
 	ResponseContentPartAddedEvent,
+	ResponseOutputMessage,
+	ResponseFunctionToolCall,
 } from "openai/resources/responses/responses";
 
+class StreamingError extends Error {
+	constructor(message: string) {
+		super(message);
+		this.name = "StreamingError";
+	}
+}
+
 export const postCreateResponse = async (
 	req: ValidatedRequest<CreateResponseParams>,
 	res: ExpressResponse
@@ -74,13 +82,13 @@ export const postCreateResponse = async (
 	}
 
 	const payload: ChatCompletionInput = {
+		// main params
 		model: req.body.model,
 		provider: req.body.provider,
 		messages: messages,
-		max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
-		temperature: req.body.temperature,
-		top_p: req.body.top_p,
 		stream: req.body.stream,
+		// options
+		max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
 		response_format: req.body.text?.format
 			? {
 					type: req.body.text.format.type,
@@ -95,12 +103,33 @@ export const postCreateResponse = async (
 							: undefined,
 			  }
 			: undefined,
+		temperature: req.body.temperature,
+		tool_choice:
+			typeof req.body.tool_choice === "string"
+				? req.body.tool_choice
+				: req.body.tool_choice
+				  ? {
+							type: "function",
+							function: {
+								name: req.body.tool_choice.name,
+							},
+				    }
+				  : undefined,
+		tools: req.body.tools
+			? req.body.tools.map((tool) => ({
+					type: tool.type,
+					function: {
+						name: tool.name,
+						parameters: tool.parameters,
+						description: tool.description,
+						strict: tool.strict,
+					},
+			  }))
+			: undefined,
+		top_p: req.body.top_p,
 	};
 
-	const responseObject: Omit<
-		Response,
-		"incomplete_details" | "output_text" | "parallel_tool_calls" | "tool_choice" | "tools"
-	> = {
+	const responseObject: Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls"> = {
 		created_at: new Date().getTime(),
 		error: null,
 		id: generateUniqueId("resp"),
@@ -110,7 +139,11 @@ export const postCreateResponse = async (
 		model: req.body.model,
 		object: "response",
 		output: [],
+		// parallel_tool_calls: req.body.parallel_tool_calls,
 		status: "in_progress",
+		text: req.body.text,
+		tool_choice: req.body.tool_choice ?? "auto",
+		tools: req.body.tools ?? [],
 		temperature: req.body.temperature,
 		top_p: req.body.top_p,
 	};
@@ -142,45 +175,62 @@ export const postCreateResponse = async (
 
 			const stream = client.chatCompletionStream(payload);
 
-			const outputObject: ResponseOutputItem = {
-				id: generateUniqueId("msg"),
-				type: "message",
-				role: "assistant",
-				status: "in_progress",
-				content: [],
-			};
-			responseObject.output = [outputObject];
+			for await (const chunk of stream) {
+				if (chunk.choices[0].delta.content) {
+					if (responseObject.output.length === 0) {
+						const outputObject: ResponseOutputMessage = {
+							id: generateUniqueId("msg"),
+							type: "message",
+							role: "assistant",
+							status: "in_progress",
+							content: [],
+						};
+						responseObject.output = [outputObject];
 
-			// Response output item added event
-			emitEvent({
-				type: "response.output_item.added",
-				output_index: 0,
-				item: outputObject,
-				sequence_number: sequenceNumber++,
-			});
+						// Response output item added event
+						emitEvent({
+							type: "response.output_item.added",
+							output_index: 0,
+							item: outputObject,
+							sequence_number: sequenceNumber++,
+						});
+					}
 
-			// Response content part added event
-			const contentPart: ResponseContentPartAddedEvent["part"] = {
-				type: "output_text",
-				text: "",
-				annotations: [],
-			};
-			outputObject.content.push(contentPart);
+					const outputObject = responseObject.output.at(-1);
+					if (!outputObject || outputObject.type !== "message") {
+						throw new StreamingError("Not implemented: only single output item type is supported in streaming mode.");
+					}
 
-			emitEvent({
-				type: "response.content_part.added",
-				item_id: outputObject.id,
-				output_index: 0,
-				content_index: 0,
-				part: contentPart,
-				sequence_number: sequenceNumber++,
-			});
+					if (outputObject.content.length === 0) {
+						// Response content part added event
+						const contentPart: ResponseContentPartAddedEvent["part"] = {
+							type: "output_text",
+							text: "",
+							annotations: [],
+						};
+						outputObject.content.push(contentPart);
 
-			for await (const chunk of stream) {
-				if (chunk.choices[0].delta.content) {
-					contentPart.text += chunk.choices[0].delta.content;
+						emitEvent({
+							type: "response.content_part.added",
+							item_id: outputObject.id,
+							output_index: 0,
+							content_index: 0,
+							part: contentPart,
+							sequence_number: sequenceNumber++,
+						});
+					}
+
+					const contentPart = outputObject.content.at(-1);
+					if (!contentPart || contentPart.type !== "output_text") {
+						throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
+					}
+
+					if (contentPart.type !== "output_text") {
+						throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
+					}
 
-					// Response output text delta event
+					// Add text delta
+					contentPart.text += chunk.choices[0].delta.content;
 					emitEvent({
 						type: "response.output_text.delta",
 						item_id: outputObject.id,
@@ -189,37 +239,109 @@ export const postCreateResponse = async (
 						delta: chunk.choices[0].delta.content,
 						sequence_number: sequenceNumber++,
 					});
+				} else if (chunk.choices[0].delta.tool_calls) {
+					if (chunk.choices[0].delta.tool_calls.length > 1) {
+						throw new StreamingError("Not implemented: only single tool call is supported in streaming mode.");
+					}
+
+					if (responseObject.output.length === 0) {
+						if (!chunk.choices[0].delta.tool_calls[0].function.name) {
+							throw new StreamingError("Tool call function name is required.");
+						}
+
+						const outputObject: ResponseFunctionToolCall = {
+							type: "function_call",
+							id: generateUniqueId("fc"),
+							call_id: chunk.choices[0].delta.tool_calls[0].id,
+							name: chunk.choices[0].delta.tool_calls[0].function.name,
+							arguments: "",
+						};
+						responseObject.output = [outputObject];
+
+						// Response output item added event
+						emitEvent({
+							type: "response.output_item.added",
+							output_index: 0,
+							item: outputObject,
+							sequence_number: sequenceNumber++,
+						});
+					}
+
+					const outputObject = responseObject.output.at(-1);
+					if (!outputObject || !outputObject.id || outputObject.type !== "function_call") {
+						throw new StreamingError("Not implemented: can only support single output item type in streaming mode.");
+					}
+
+					outputObject.arguments += chunk.choices[0].delta.tool_calls[0].function.arguments;
+					emitEvent({
+						type: "response.function_call_arguments.delta",
+						item_id: outputObject.id,
+						output_index: 0,
+						delta: chunk.choices[0].delta.tool_calls[0].function.arguments,
+						sequence_number: sequenceNumber++,
+					});
 				}
 			}
 
-			// Response output text done event
-			emitEvent({
-				type: "response.output_text.done",
-				item_id: outputObject.id,
-				output_index: 0,
-				content_index: 0,
-				text: contentPart.text,
-				sequence_number: sequenceNumber++,
-			});
+			const lastOutputItem = responseObject.output.at(-1);
 
-			// Response content part done event
-			emitEvent({
-				type: "response.content_part.done",
-				item_id: outputObject.id,
-				output_index: 0,
-				content_index: 0,
-				part: contentPart,
-				sequence_number: sequenceNumber++,
-			});
+			if (lastOutputItem) {
+				if (lastOutputItem?.type === "message") {
+					const contentPart = lastOutputItem.content.at(-1);
+					if (contentPart?.type === "output_text") {
+						emitEvent({
+							type: "response.output_text.done",
+							item_id: lastOutputItem.id,
+							output_index: responseObject.output.length - 1,
+							content_index: lastOutputItem.content.length - 1,
+							text: contentPart.text,
+							sequence_number: sequenceNumber++,
+						});
 
-			// Response output item done event
-			outputObject.status = "completed";
-			emitEvent({
-				type: "response.output_item.done",
-				output_index: 0,
-				item: outputObject,
-				sequence_number: sequenceNumber++,
-			});
+						emitEvent({
+							type: "response.content_part.done",
+							item_id: lastOutputItem.id,
+							output_index: responseObject.output.length - 1,
+							content_index: lastOutputItem.content.length - 1,
+							part: contentPart,
+							sequence_number: sequenceNumber++,
+						});
+					} else {
+						throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
+					}
+
+					// Response output item done event
+					lastOutputItem.status = "completed";
+					emitEvent({
+						type: "response.output_item.done",
+						output_index: responseObject.output.length - 1,
+						item: lastOutputItem,
+						sequence_number: sequenceNumber++,
+					});
+				} else if (lastOutputItem?.type === "function_call") {
+					if (!lastOutputItem.id) {
+						throw new StreamingError("Function call id is required.");
+					}
+
+					emitEvent({
+						type: "response.function_call_arguments.done",
+						item_id: lastOutputItem.id,
+						output_index: responseObject.output.length - 1,
+						arguments: lastOutputItem.arguments,
+						sequence_number: sequenceNumber++,
+					});
+
+					lastOutputItem.status = "completed";
+					emitEvent({
+						type: "response.output_item.done",
+						output_index: responseObject.output.length - 1,
+						item: lastOutputItem,
+						sequence_number: sequenceNumber++,
+					});
+				} else {
+					throw new StreamingError("Not implemented: only message output is supported in streaming mode.");
+				}
+			}
 
 			// Response completed event
 			responseObject.status = "completed";
@@ -228,13 +350,25 @@ export const postCreateResponse = async (
 				response: responseObject as Response,
 				sequence_number: sequenceNumber++,
 			});
-		} catch (streamError: any) {
+		} catch (streamError) {
 			console.error("Error in streaming chat completion:", streamError);
 
+			let message = "An error occurred while streaming from inference server.";
+			if (streamError instanceof StreamingError) {
+				message = streamError.message;
+			} else if (
+				typeof streamError === "object" &&
+				streamError &&
+				"message" in streamError &&
+				typeof streamError.message === "string"
+			) {
+				message = streamError.message;
+			}
+
 			emitEvent({
 				type: "error",
 				code: null,
-				message: streamError.message || "An error occurred while streaming from inference server.",
+				message,
 				param: null,
 				sequence_number: sequenceNumber++,
 			});
@@ -263,7 +397,16 @@ export const postCreateResponse = async (
 						],
 					},
 			  ]
-			: [];
+			: chatCompletionResponse.choices[0].message.tool_calls
+			  ? chatCompletionResponse.choices[0].message.tool_calls.map((toolCall) => ({
+						type: "function_call",
+						id: generateUniqueId("fc"),
+						call_id: toolCall.id,
+						name: toolCall.function.name,
+						arguments: toolCall.function.arguments,
+						status: "completed",
+			    }))
+			  : [];
 
 		res.json(responseObject);
 	} catch (error) {
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
index 65b437c671..427716c10e 100644
--- a/packages/responses-server/src/schemas.ts
+++ b/packages/responses-server/src/schemas.ts
@@ -83,6 +83,7 @@ export const createResponseParamsSchema = z.object({
 		.nullable()
 		.default(null),
 	model: z.string(),
+	// parallel_tool_calls: z.boolean().default(true), // TODO: how to handle this if chat completion doesn't?
 	provider: z.string().optional(),
 	// previous_response_id: z.string().nullable().default(null),
 	// reasoning: z.object({
@@ -114,8 +115,27 @@ export const createResponseParamsSchema = z.object({
 			]),
 		})
 		.optional(),
-	// tool_choice:
-	// tools:
+	tool_choice: z
+		.union([
+			z.enum(["auto", "none", "required"]),
+			z.object({
+				type: z.enum(["function"]),
+				name: z.string(),
+			}),
+			// TODO: also hosted tool and MCP tool
+		])
+		.optional(),
+	tools: z
+		.array(
+			z.object({
+				name: z.string(),
+				parameters: z.record(z.any()),
+				strict: z.boolean().default(true),
+				type: z.enum(["function"]),
+				description: z.string().optional(),
+			})
+		)
+		.optional(),
 	// top_logprobs: z.number().min(0).max(20).nullable().default(null),
 	top_p: z.number().min(0).max(1).default(1),
 	// truncation: z.enum(["auto", "disabled"]).default("disabled"),

From c0bde43c6e5c2c8403f419bfdfdcfb959a504785 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 2 Jul 2025 16:53:33 +0200
Subject: [PATCH 11/12] rename examples

---
 packages/responses-server/README.md                       | 8 ++++----
 .../examples/{text_multi.js => multi_turn.js}             | 0
 .../responses-server/examples/{text_single.js => text.js} | 0
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename packages/responses-server/examples/{text_multi.js => multi_turn.js} (100%)
 rename packages/responses-server/examples/{text_single.js => text.js} (100%)

diff --git a/packages/responses-server/README.md b/packages/responses-server/README.md
index fbce68da1d..ee1e7aee4c 100644
--- a/packages/responses-server/README.md
+++ b/packages/responses-server/README.md
@@ -35,10 +35,10 @@ Some example scripts are implemented in ./examples.
 You can run them using
 
 ```bash
-# Run ./examples/text_single.js
-pnpm run example text_single
+# Run ./examples/text.js
+pnpm run example text
 
-# Run ./examples/text_multi.js
-pnpm run example text_multi
+# Run ./examples/multi_turn.js
+pnpm run example multi_turn
 ```
 
diff --git a/packages/responses-server/examples/text_multi.js b/packages/responses-server/examples/multi_turn.js
similarity index 100%
rename from packages/responses-server/examples/text_multi.js
rename to packages/responses-server/examples/multi_turn.js
diff --git a/packages/responses-server/examples/text_single.js b/packages/responses-server/examples/text.js
similarity index 100%
rename from packages/responses-server/examples/text_single.js
rename to packages/responses-server/examples/text.js

From 9b4c4a81a570ff3b50546431af46704a9575e36d Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Wed, 2 Jul 2025 18:07:39 +0200
Subject: [PATCH 12/12] [ResponsesAPI] Provider in model + clean input (#1588)

Some tweaks to make it work in a demo:
- provider can be passed like this
`model="cohere@CohereLabs/c4ai-command-a-03-2025"`
- clean up input message if empty message (not supported by some
providers)
- check tools calls is not an empty list
---
 .../responses-server/src/routes/responses.ts  | 75 ++++++++++---------
 packages/responses-server/src/schemas.ts      |  1 -
 2 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
index bccc4146e2..514e843b28 100644
--- a/packages/responses-server/src/routes/responses.ts
+++ b/packages/responses-server/src/routes/responses.ts
@@ -45,46 +45,53 @@ export const postCreateResponse = async (
 
 	if (Array.isArray(req.body.input)) {
 		messages.push(
-			...req.body.input.map((item) => ({
-				role: item.role,
-				content:
-					typeof item.content === "string"
-						? item.content
-						: item.content
-								.map((content) => {
-									switch (content.type) {
-										case "input_image":
-											return {
-												type: "image_url" as ChatCompletionInputMessageChunkType,
-												image_url: {
-													url: content.image_url,
-												},
-											};
-										case "output_text":
-											return {
-												type: "text" as ChatCompletionInputMessageChunkType,
-												text: content.text,
-											};
-										case "refusal":
-											return undefined;
-										case "input_text":
-											return {
-												type: "text" as ChatCompletionInputMessageChunkType,
-												text: content.text,
-											};
-									}
-								})
-								.filter((item) => item !== undefined),
-			}))
+			...req.body.input
+				.map((item) => ({
+					role: item.role,
+					content:
+						typeof item.content === "string"
+							? item.content
+							: item.content
+									.map((content) => {
+										switch (content.type) {
+											case "input_image":
+												return {
+													type: "image_url" as ChatCompletionInputMessageChunkType,
+													image_url: {
+														url: content.image_url,
+													},
+												};
+											case "output_text":
+												return content.text
+													? {
+															type: "text" as ChatCompletionInputMessageChunkType,
+															text: content.text,
+													  }
+													: undefined;
+											case "refusal":
+												return undefined;
+											case "input_text":
+												return {
+													type: "text" as ChatCompletionInputMessageChunkType,
+													text: content.text,
+												};
+										}
+									})
+									.filter((item) => item !== undefined),
+				}))
+				.filter((message) => message.content?.length !== 0)
 		);
 	} else {
 		messages.push({ role: "user", content: req.body.input });
 	}
 
+	const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
+	const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
+
 	const payload: ChatCompletionInput = {
 		// main params
-		model: req.body.model,
-		provider: req.body.provider,
+		model: model,
+		provider: provider,
 		messages: messages,
 		stream: req.body.stream,
 		// options
@@ -239,7 +246,7 @@ export const postCreateResponse = async (
 						delta: chunk.choices[0].delta.content,
 						sequence_number: sequenceNumber++,
 					});
-				} else if (chunk.choices[0].delta.tool_calls) {
+				} else if (chunk.choices[0].delta.tool_calls && chunk.choices[0].delta.tool_calls.length > 0) {
 					if (chunk.choices[0].delta.tool_calls.length > 1) {
 						throw new StreamingError("Not implemented: only single tool call is supported in streaming mode.");
 					}
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
index 427716c10e..5fd12020a0 100644
--- a/packages/responses-server/src/schemas.ts
+++ b/packages/responses-server/src/schemas.ts
@@ -84,7 +84,6 @@ export const createResponseParamsSchema = z.object({
 		.default(null),
 	model: z.string(),
 	// parallel_tool_calls: z.boolean().default(true), // TODO: how to handle this if chat completion doesn't?
-	provider: z.string().optional(),
 	// previous_response_id: z.string().nullable().default(null),
 	// reasoning: z.object({
 	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),