update

DVampire · DVampire · commit 7f4a55c99a1f · 2025-08-12T12:41:17.000+08:00
diff --git a/README.md b/README.md
@@ -245,7 +245,7 @@ Contributions and suggestions are welcome! Feel free to open issues or submit pu
 ```bibtex
 @misc{zhang2025agentorchestrahierarchicalmultiagentframework,
       title={AgentOrchestra: A Hierarchical Multi-Agent Framework for General-Purpose Task Solving}, 
-      author={Wentao Zhang, Ce Cui, Yilei Zhao, Rui Hu, Yang Liu, Yahui Zhou, Bo An},
+      author={Wentao Zhang, Liang Zeng, Yuzhen Xiao, Yongcong Li, Ce Cui, Yilei Zhao, Rui Hu, Yang Liu, Yahui Zhou, Bo An},
       year={2025},
       eprint={2506.12508},
       archivePrefix={arXiv},
diff --git a/README_CN.md b/README_CN.md
@@ -212,7 +212,7 @@ DeepResearchAgent 主要借鉴了 smolagents 的架构设计，并在此基础
 ```bibtex
 @misc{zhang2025agentorchestrahierarchicalmultiagentframework,
       title={AgentOrchestra: A Hierarchical Multi-Agent Framework for General-Purpose Task Solving}, 
-      author={Wentao Zhang, Ce Cui, Yilei Zhao, Rui Hu, Yang Liu, Yahui Zhou, Bo An},
+      author={Wentao Zhang, Liang Zeng, Yuzhen Xiao, Yongcong Li, Ce Cui, Yilei Zhao, Rui Hu, Yang Liu, Yahui Zhou, Bo An},
       year={2025},
       eprint={2506.12508},
       archivePrefix={arXiv},
diff --git a/docs/index.html b/docs/index.html
@@ -597,6 +597,30 @@ <h3 class="font-semibold text-gray-900">Wentao Zhang</h3>
           <p class="text-gray-600 text-sm">Skywork AI</p>
         </div>
         
+        <div class="text-center">
+          <div class="w-20 h-20 bg-teal-100 rounded-full mx-auto mb-4 flex items-center justify-center">
+            <span class="text-teal-600 font-semibold">LZ</span>
+          </div>
+          <h3 class="font-semibold text-gray-900">Liang Zeng</h3>
+          <p class="text-gray-600 text-sm">Skywork AI</p>
+        </div>
+        
+        <div class="text-center">
+          <div class="w-20 h-20 bg-cyan-100 rounded-full mx-auto mb-4 flex items-center justify-center">
+            <span class="text-cyan-600 font-semibold">YX</span>
+          </div>
+          <h3 class="font-semibold text-gray-900">Yuzhen Xiao</h3>
+          <p class="text-gray-600 text-sm">Skywork AI</p>
+        </div>
+        
+        <div class="text-center">
+          <div class="w-20 h-20 bg-emerald-100 rounded-full mx-auto mb-4 flex items-center justify-center">
+            <span class="text-emerald-600 font-semibold">YCL</span>
+          </div>
+          <h3 class="font-semibold text-gray-900">Yongcong Li</h3>
+          <p class="text-gray-600 text-sm">Skywork AI</p>
+        </div>
+        
         <div class="text-center">
           <div class="w-20 h-20 bg-green-100 rounded-full mx-auto mb-4 flex items-center justify-center">
             <span class="text-green-600 font-semibold">CC</span>
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 python-dotenv>=1.0.1
 rich>=13.9.4
-litellm>=1.73.0
+litellm>=1.75.0
 openai>=1.90.0
 pillow>=10.4.0
 numpy>=2.2.4
diff --git a/src/models/models.py b/src/models/models.py
@@ -15,7 +15,8 @@
                                 RestfulTranscribeModel,
                                 RestfulImagenModel,
                                 RestfulVeoPridictModel,
-                                RestfulVeoFetchModel)
+                                RestfulVeoFetchModel,
+                                RestfulResponseModel)
 from src.utils import Singleton
 from src.proxy.local_proxy import HTTP_CLIENT, ASYNC_HTTP_CLIENT
 
@@ -149,6 +150,33 @@ def _register_openai_models(self, use_local_proxy: bool = False):
                 custom_role_conversions=custom_role_conversions,
             )
             self.registed_models[model_name] = model
+
+            # deep research
+            model_name = "o3-deep-research"
+            model_id = "o3-deep-research"
+            model = RestfulResponseModel(
+                api_base=self._check_local_api_base(local_api_base_name="SKYWORK_SHUBIAOBIAO_API_BASE",
+                                                    remote_api_base_name="OPENAI_API_BASE"),
+                api_type="responses",
+                api_key=api_key,
+                model_id=model_id,
+                http_client=HTTP_CLIENT,
+                custom_role_conversions=custom_role_conversions,
+            )
+            self.registed_models[model_name] = model
+
+            model_name = "o4-mini-deep-research"
+            model_id = "o4-mini-deep-research"
+            model = RestfulResponseModel(
+                api_base=self._check_local_api_base(local_api_base_name="SKYWORK_SHUBIAOBIAO_API_BASE",
+                                                    remote_api_base_name="OPENAI_API_BASE"),
+                api_type="responses",
+                api_key=api_key,
+                model_id=model_id,
+                http_client=HTTP_CLIENT,
+                custom_role_conversions=custom_role_conversions,
+            )
+            self.registed_models[model_name] = model
             
         else:
             logger.info("Using remote API for OpenAI models")
diff --git a/src/models/restful.py b/src/models/restful.py
@@ -1,3 +1,4 @@
+import json
 from typing import Dict, List, Optional, Any
 from collections.abc import Generator
 from openai.types.chat import ChatCompletion
@@ -58,6 +59,62 @@ def completion(self,
 
         return response.json()
 
+class RestfulResponseClient():
+    def __init__(self,
+                 api_base: str,
+                 api_key: str,
+                 api_type: str = "responses",
+                 model_id: str = "o3",
+                 http_client=None):
+        self.api_base = api_base
+        self.api_key = api_key
+        self.api_type = api_type
+        self.model_id = model_id
+
+        self.http_client = http_client
+
+    def completion(self,
+                   model,
+                   input,
+                   tools,
+                   **kwargs):
+
+        headers = {
+            "app_key": self.api_key,
+            "Content-Type": "application/json"
+        }
+
+        model = model.split("/")[-1]
+        data = {
+            "model": model,
+            "input": input,
+            "tools": tools,
+            "stream": False,
+        }
+
+        # Add any additional kwargs to the data
+        if kwargs:
+            data.update(kwargs)
+
+        response = requests.post(
+            f"{self.api_base}/{self.api_type}",
+            json=data,
+            headers=headers,
+        )
+
+        response_text = response.text
+        for line in response_text.split('\n'):
+            if line.strip():
+                try:
+                    json_line = line.strip()
+                    print(json_line)
+                    if json_line.startswith("data: ") and "response.completed" in json_line:
+                        json_line = json_line.replace("data: ", "").strip()
+                        res = json.loads(json_line)
+                        return res
+                except Exception as e:
+                    logger.error(f"Error parsing line: {line}, error: {e}")
+
 
 class RestfulTranscribeClient():
     def __init__(self,
@@ -726,4 +783,226 @@ def __call__(self, *args, **kwargs) -> str:
         Call the model with the given arguments.
         This is a convenience method that calls `generate` with the same arguments.
         """
-        return self.generate(*args, **kwargs)
+        return self.generate(*args, **kwargs)
+
+
+class RestfulResponseModel(ApiModel):
+    """This model connects to an OpenAI-compatible API server.
+
+    Parameters:
+        model_id (`str`):
+            The model identifier to use on the server (e.g. "gpt-3.5-turbo").
+        api_base (`str`, *optional*):
+            The base URL of the OpenAI-compatible API server.
+        api_key (`str`, *optional*):
+            The API key to use for authentication.
+        organization (`str`, *optional*):
+            The organization to use for the API request.
+        project (`str`, *optional*):
+            The project to use for the API request.
+        client_kwargs (`dict[str, Any]`, *optional*):
+            Additional keyword arguments to pass to the OpenAI client (like organization, project, max_retries etc.).
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+        flatten_messages_as_text (`bool`, default `False`):
+            Whether to flatten messages as text.
+        **kwargs:
+            Additional keyword arguments to pass to the OpenAI API.
+    """
+
+    def __init__(
+        self,
+        model_id: str,
+        api_base: Optional[str] = None,
+        api_type: str = "chat/completions",
+        api_key: Optional[str] = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        flatten_messages_as_text: bool = False,
+        http_client=None,
+        **kwargs,
+    ):
+        self.model_id = model_id
+        self.api_base = api_base
+        self.api_key = api_key
+        self.api_type = api_type
+        flatten_messages_as_text = (
+            flatten_messages_as_text
+            if flatten_messages_as_text is not None
+            else model_id.startswith(("ollama", "groq", "cerebras"))
+        )
+
+        self.http_client = http_client
+
+        self.message_manager = MessageManager(model_id=model_id)
+
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=flatten_messages_as_text,
+            **kwargs,
+        )
+
+    def create_client(self):
+        return RestfulResponseClient(api_base=self.api_base,
+                             api_key=self.api_key,
+                             api_type=self.api_type,
+                             model_id=self.model_id,
+                             http_client=self.http_client)
+
+    def _prepare_completion_kwargs(
+            self,
+            messages: list[ChatMessage],
+            stop_sequences: list[str] | None = None,
+            response_format: dict[str, str] | None = None,
+            tools_to_call_from: list[Any] | None = None,
+            custom_role_conversions: dict[str, str] | None = None,
+            convert_images_to_image_urls: bool = False,
+            tool_choice: str | dict | None = "required",  # Configurable tool_choice parameter
+            **kwargs,
+    ) -> dict[str, Any]:
+        """
+        Prepare parameters required for model invocation, handling parameter priorities.
+
+        Parameter priority from high to low:
+        1. Explicitly passed kwargs
+        2. Specific parameters (stop_sequences, response_format, etc.)
+        3. Default values in self.kwargs
+        """
+        # Clean and standardize the message list
+        flatten_messages_as_text = kwargs.pop("flatten_messages_as_text", self.flatten_messages_as_text)
+        messages_as_dicts = self.message_manager.get_clean_message_list(
+            messages,
+            role_conversions=custom_role_conversions or tool_role_conversions,
+            convert_images_to_image_urls=convert_images_to_image_urls,
+            flatten_messages_as_text=flatten_messages_as_text,
+        )
+        # Use self.kwargs as the base configuration
+        completion_kwargs = {
+            **self.kwargs,
+            "input": messages_as_dicts,
+        }
+
+        # Handle specific parameters
+        if stop_sequences is not None:
+            completion_kwargs["stop"] = stop_sequences
+        if response_format is not None:
+            completion_kwargs["response_format"] = response_format
+
+        # Handle tools parameter
+        if tools_to_call_from:
+            tools_config = {
+                "tools": [self.message_manager.get_tool_json_schema(tool, model_id=self.model_id) for tool in
+                          tools_to_call_from],
+            }
+            if tool_choice is not None:
+                tools_config["tool_choice"] = tool_choice
+            completion_kwargs.update(tools_config)
+
+        # Finally, use the passed-in kwargs to override all settings
+        completion_kwargs.update(kwargs)
+
+        completion_kwargs = self.message_manager.get_clean_completion_kwargs(completion_kwargs)
+
+        return completion_kwargs
+
+    def generate_stream(self,
+                        messages: list[ChatMessage],
+                        stop_sequences: list[str] | None = None,
+                        response_format: dict[str, str] | None = None,
+                        tools_to_call_from: list[Any] | None = None,
+                        **kwargs,
+                        )-> Generator[ChatMessageStreamDelta]:
+
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            response_format=response_format,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+
+        for event in self.client.completion(**completion_kwargs, stream=True, stream_options={"include_usage": True}):
+            if getattr(event, "usage", None):
+                self._last_input_token_count = event.usage.prompt_tokens
+                self._last_output_token_count = event.usage.completion_tokens
+                yield ChatMessageStreamDelta(
+                    content="",
+                    token_usage=TokenUsage(
+                        input_tokens=event.usage.prompt_tokens,
+                        output_tokens=event.usage.completion_tokens,
+                    ),
+                )
+            if event.choices:
+                choice = event.choices[0]
+                if choice.delta:
+                    yield ChatMessageStreamDelta(
+                        content=choice.delta.content,
+                        tool_calls=[
+                            ChatMessageToolCallStreamDelta(
+                                index=delta.index,
+                                id=delta.id,
+                                type=delta.type,
+                                function=delta.function,
+                            )
+                            for delta in choice.delta.tool_calls
+                        ]
+                        if choice.delta.tool_calls
+                        else None,
+                    )
+                else:
+                    if not getattr(choice, "finish_reason", None):
+                        raise ValueError(f"No content or tool calls in event: {event}")
+
+
+    async def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Any] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            response_format=response_format,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            convert_images_to_image_urls=True,
+            custom_role_conversions=self.custom_role_conversions,
+            **kwargs,
+        )
+
+        completion_kwargs['tools'] = [
+            {"type": "web_search_preview"},
+        ]
+
+        # Async call to the LiteLLM client for completion
+        response = self.client.completion(**completion_kwargs)
+        print(response)
+        exit()
+
+        response = ChatCompletion.model_validate(response)
+
+        self._last_input_token_count = response.usage.prompt_tokens
+        self._last_output_token_count = response.usage.completion_tokens
+        return ChatMessage.from_dict(
+            response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}),
+            raw=response,
+            token_usage=TokenUsage(
+                input_tokens=response.usage.prompt_tokens,
+                output_tokens=response.usage.completion_tokens,
+            ),
+        )
+
+    async def __call__(self, *args, **kwargs) -> ChatMessage:
+        """
+        Call the model with the given arguments.
+        This is a convenience method that calls `generate` with the same arguments.
+        """
+        return await self.generate(*args, **kwargs)
diff --git a/tests/test_models.py b/tests/test_models.py