update gpt-5

DVampire · DVampire · commit 9d56ad9864cd · 2025-08-12T15:30:51.000+08:00
diff --git a/src/models/__init__.py b/src/models/__init__.py
@@ -6,11 +6,12 @@
                   Model,
                   parse_json_if_needed,
                   agglomerate_stream_deltas,
-                  CODEAGENT_RESPONSE_FORMAT
+                  CODEAGENT_RESPONSE_FORMAT,
                   )
 from .litellm import LiteLLMModel
 from .openaillm import OpenAIServerModel
 from .models import ModelManager
+from .message_manager import MessageManager
 
 model_manager = ModelManager()
 
@@ -23,4 +24,5 @@
     "parse_json_if_needed",
     "model_manager",
     "ModelManager",
+    "MessageManager",
 ]
diff --git a/src/models/litellm.py b/src/models/litellm.py
@@ -36,6 +36,7 @@ def __init__(
         self,
         model_id: Optional[str] = None,
         api_base=None,
+        api_type: str = "chat/completions",
         api_key=None,
         custom_role_conversions: dict[str, str] | None = None,
         flatten_messages_as_text: bool | None = None,
@@ -52,6 +53,7 @@ def __init__(
             model_id = "anthropic/claude-3-5-sonnet-20240620"
         self.model_id = model_id
         self.api_base = api_base
+        self.api_type = api_type
         self.api_key = api_key
         flatten_messages_as_text = (
             flatten_messages_as_text
diff --git a/src/models/message_manager.py b/src/models/message_manager.py
@@ -19,14 +19,16 @@
 ]
 
 class MessageManager():
-    def __init__(self, model_id: str):
+    def __init__(self, model_id: str, api_type: str = "chat/completions"):
         self.model_id = model_id
+        self.api_type = api_type
 
     def get_clean_message_list(self,
             message_list: list[ChatMessage],
             role_conversions: dict[MessageRole, MessageRole] | dict[str, str] = {},
             convert_images_to_image_urls: bool = False,
             flatten_messages_as_text: bool = False,
+            api_type: str = "chat/completions",
     ) -> list[dict[str, Any]]:
         """
         Creates a list of messages to give as input to the LLM. These messages are dictionaries and chat template compatible with transformers LLM chat template.
@@ -38,6 +40,25 @@ def get_clean_message_list(self,
             convert_images_to_image_urls (`bool`, default `False`): Whether to convert images to image URLs.
             flatten_messages_as_text (`bool`, default `False`): Whether to flatten messages as text.
         """
+        api_type = api_type or self.api_type
+        if api_type == "responses":
+            return self._get_responses_message_list(
+                message_list, role_conversions, convert_images_to_image_urls, flatten_messages_as_text
+            )
+        else:
+            return self._get_chat_completions_message_list(
+                message_list, role_conversions, convert_images_to_image_urls, flatten_messages_as_text
+            )
+
+    def _get_chat_completions_message_list(self,
+            message_list: list[ChatMessage],
+            role_conversions: dict[MessageRole, MessageRole] | dict[str, str] = {},
+            convert_images_to_image_urls: bool = False,
+            flatten_messages_as_text: bool = False,
+    ) -> list[dict[str, Any]]:
+        """
+        Creates a list of messages in chat completions format.
+        """
         output_message_list: list[dict[str, Any]] = []
         message_list = deepcopy(message_list)  # Avoid modifying the original list
         for message in message_list:
@@ -87,6 +108,106 @@ def get_clean_message_list(self,
                 )
         return output_message_list
 
+    def _get_responses_message_list(self,
+            message_list: list[ChatMessage],
+            role_conversions: dict[MessageRole, MessageRole] | dict[str, str] = {},
+            convert_images_to_image_urls: bool = False,
+            flatten_messages_as_text: bool = False,
+    ) -> list[dict[str, Any]]:
+        """
+        Creates a list of messages in responses format (OpenAI responses API).
+        """
+        output_message_list: list[dict[str, Any]] = []
+        message_list = deepcopy(message_list)  # Avoid modifying the original list
+        
+        for message in message_list:
+            role = message.role
+            if role not in MessageRole.roles():
+                raise ValueError(f"Incorrect role {role}, only {MessageRole.roles()} are supported for now.")
+
+            if role in role_conversions:
+                message.role = role_conversions[role]  # type: ignore
+            
+            # Handle content processing
+            if isinstance(message.content, list):
+                # Process each content element
+                processed_content = []
+                for element in message.content:
+                    assert isinstance(element, dict), "Error: this element should be a dict:" + str(element)
+                    
+                    if element["type"] == "image":
+                        assert not flatten_messages_as_text, f"Cannot use images with {flatten_messages_as_text=}"
+                        if convert_images_to_image_urls:
+                            processed_content.append({
+                                "type": "image_url",
+                                "image_url": {"url": make_image_url(encode_image_base64(element.pop("image")))},
+                            })
+                        else:
+                            processed_content.append({
+                                "type": "image",
+                                "image": encode_image_base64(element["image"])
+                            })
+                    elif element["type"] == "text":
+                        processed_content.append(element)
+                    else:
+                        processed_content.append(element)
+                
+                content = processed_content
+            else:
+                # Handle string content
+                if flatten_messages_as_text:
+                    content = message.content
+                else:
+                    content = [{"type": "text", "text": message.content}] if message.content else []
+
+            # Handle tool calls for responses format
+            tool_calls = None
+            if message.tool_calls:
+                tool_calls = []
+                for tool_call in message.tool_calls:
+                    tool_calls.append({
+                        "id": tool_call.id,
+                        "type": tool_call.type,
+                        "function": {
+                            "name": tool_call.function.name,
+                            "arguments": tool_call.function.arguments,
+                            "description": tool_call.function.description
+                        }
+                    })
+
+            # Create message in responses format
+            message_dict = {
+                "role": message.role,
+                "content": content,
+            }
+            
+            if tool_calls:
+                message_dict["tool_calls"] = tool_calls
+
+            # Merge consecutive messages with same role
+            if len(output_message_list) > 0 and message.role == output_message_list[-1]["role"]:
+                if flatten_messages_as_text:
+                    if isinstance(content, list) and content and content[0]["type"] == "text":
+                        output_message_list[-1]["content"] += "\n" + content[0]["text"]
+                    else:
+                        output_message_list[-1]["content"] += "\n" + str(content)
+                else:
+                    # Merge content lists
+                    if isinstance(output_message_list[-1]["content"], list) and isinstance(content, list):
+                        output_message_list[-1]["content"].extend(content)
+                    else:
+                        output_message_list[-1]["content"] = content
+                
+                # Merge tool calls
+                if tool_calls and "tool_calls" in output_message_list[-1]:
+                    output_message_list[-1]["tool_calls"].extend(tool_calls)
+                elif tool_calls:
+                    output_message_list[-1]["tool_calls"] = tool_calls
+            else:
+                output_message_list.append(message_dict)
+
+        return output_message_list
+
     def get_tool_json_schema(self,
                              tool: Any,
                              model_id: Optional[str] = None
diff --git a/src/models/models.py b/src/models/models.py
@@ -154,26 +154,31 @@ def _register_openai_models(self, use_local_proxy: bool = False):
             # deep research
             model_name = "o3-deep-research"
             model_id = "o3-deep-research"
-            model = RestfulResponseModel(
-                api_base=self._check_local_api_base(local_api_base_name="SKYWORK_SHUBIAOBIAO_API_BASE",
-                                                    remote_api_base_name="OPENAI_API_BASE"),
-                api_type="responses",
+            client = AsyncOpenAI(
                 api_key=api_key,
+                base_url=self._check_local_api_base(local_api_base_name="SKYWORK_API_BASE",
+                                                    remote_api_base_name="SKYWORK_API_BASE"),
+                http_client=ASYNC_HTTP_CLIENT,
+            )
+            model = LiteLLMModel(
                 model_id=model_id,
-                http_client=HTTP_CLIENT,
+                http_client=client,
                 custom_role_conversions=custom_role_conversions,
             )
             self.registed_models[model_name] = model
-
-            model_name = "o4-mini-deep-research"
-            model_id = "o4-mini-deep-research"
-            model = RestfulResponseModel(
-                api_base=self._check_local_api_base(local_api_base_name="SKYWORK_SHUBIAOBIAO_API_BASE",
-                                                    remote_api_base_name="OPENAI_API_BASE"),
-                api_type="responses",
+            
+            # gpt-5
+            model_name = "gpt-5"
+            model_id = "openai/gpt-5"
+            client = AsyncOpenAI(
                 api_key=api_key,
+                base_url=self._check_local_api_base(local_api_base_name="SKYWORK_AZURE_US_API_BASE",
+                                                    remote_api_base_name="OPENAI_API_BASE"),
+                http_client=ASYNC_HTTP_CLIENT,
+            )
+            model = LiteLLMModel(
                 model_id=model_id,
-                http_client=HTTP_CLIENT,
+                http_client=client,
                 custom_role_conversions=custom_role_conversions,
             )
             self.registed_models[model_name] = model
diff --git a/src/models/openaillm.py b/src/models/openaillm.py
@@ -1,6 +1,4 @@
-import warnings
-from typing import Dict, List, Optional, Any
-from copy import deepcopy
+from typing import Any
 from collections.abc import Generator
 
 from src.models.base import (ApiModel,
diff --git a/src/models/restful.py b/src/models/restful.py
@@ -984,8 +984,6 @@ async def generate(
 
         # Async call to the LiteLLM client for completion
         response = self.client.completion(**completion_kwargs)
-        print(response)
-        exit()
 
         response = ChatCompletion.model_validate(response)
 
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -89,6 +89,10 @@ async def video_generation():
     messages = [
         ChatMessage(role="user", content="What is the capital of France?"),
     ]
+    
+    response = asyncio.run(model_manager.registed_models["gpt-5"](
+        messages=messages,
+    ))
 
     response = asyncio.run(model_manager.registed_models["o3-deep-research"](
         messages=messages,