@@ -200,6 +200,7 @@ export class ChatGPTApi implements LLMApi {
200
200
options . config . model . startsWith ( "o1" ) ||
201
201
options . config . model . startsWith ( "o3" ) ||
202
202
options . config . model . startsWith ( "o4-mini" ) ;
203
+ const isGpt5 = options . config . model . startsWith ( "gpt-5" ) ;
203
204
if ( isDalle3 ) {
204
205
const prompt = getMessageTextContent (
205
206
options . messages . slice ( - 1 ) ?. pop ( ) as any ,
@@ -230,15 +231,21 @@ export class ChatGPTApi implements LLMApi {
230
231
messages,
231
232
stream : options . config . stream ,
232
233
model : modelConfig . model ,
233
- temperature : ! isO1OrO3 ? modelConfig . temperature : 1 ,
234
+ temperature : ( ! isO1OrO3 && ! isGpt5 ) ? modelConfig . temperature : 1 ,
234
235
presence_penalty : ! isO1OrO3 ? modelConfig . presence_penalty : 0 ,
235
236
frequency_penalty : ! isO1OrO3 ? modelConfig . frequency_penalty : 0 ,
236
237
top_p : ! isO1OrO3 ? modelConfig . top_p : 1 ,
237
238
// max_tokens: Math.max(modelConfig.max_tokens, 1024),
238
239
// Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
239
240
} ;
240
241
241
- if ( isO1OrO3 ) {
242
+ if ( isGpt5 ) {
243
+ // Remove max_tokens if present
244
+ delete requestPayload . max_tokens ;
245
+ // Add max_completion_tokens (or max_completion_tokens if that's what you meant)
246
+ requestPayload [ "max_completion_tokens" ] = modelConfig . max_tokens ;
247
+
248
+ } else if ( isO1OrO3 ) {
242
249
// by default the o1/o3 models will not attempt to produce output that includes markdown formatting
243
250
// manually add "Formatting re-enabled" developer message to encourage markdown inclusion in model responses
244
251
// (https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/reasoning?tabs=python-secure#markdown-output)
@@ -251,8 +258,9 @@ export class ChatGPTApi implements LLMApi {
251
258
requestPayload [ "max_completion_tokens" ] = modelConfig . max_tokens ;
252
259
}
253
260
261
+
254
262
// add max_tokens to vision model
255
- if ( visionModel && ! isO1OrO3 ) {
263
+ if ( visionModel && ! isO1OrO3 && ! isGpt5 ) {
256
264
requestPayload [ "max_tokens" ] = Math . max ( modelConfig . max_tokens , 4000 ) ;
257
265
}
258
266
}
0 commit comments