diff --git a/pyproject.toml b/pyproject.toml index 1e1ae8a0..1925f305 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,7 @@ dependencies = [ "google-genai", "opentelemetry-instrumentation-google-genai>=0.2b0", "tensorzero>=2025.4.7", - "google-genai", - "opentelemetry-instrumentation-google-genai>=0.2b0", + "deprecated" ] [project.optional-dependencies] diff --git a/src/mcp_agent/agents/base_agent.py b/src/mcp_agent/agents/base_agent.py index 68c43bf3..630499f0 100644 --- a/src/mcp_agent/agents/base_agent.py +++ b/src/mcp_agent/agents/base_agent.py @@ -106,6 +106,8 @@ def __init__( # Initialize the LLM to None (will be set by attach_llm) self._llm: Optional[AugmentedLLMProtocol] = None + self._last_call_timestamp: float | None = None + # Map function names to tools self._function_tool_map: Dict[str, Any] = {} diff --git a/src/mcp_agent/core/request_params.py b/src/mcp_agent/core/request_params.py index 7b087829..3971f5f4 100644 --- a/src/mcp_agent/core/request_params.py +++ b/src/mcp_agent/core/request_params.py @@ -52,3 +52,11 @@ class RequestParams(CreateMessageRequestParams): """ Optional dictionary of template variables for dynamic templates. Currently only works for TensorZero inference backend """ + + delay_between_calls: float | None = None + """ + Optional delay between tool calls in seconds. This is useful for rate limiting as well as for working with tool calls that have delayed effects. + + Example tool calls where this is helpful are tools with asynchronous effects, like sending emails or using a web browser. Web browser tools + may finish the tool calls already before all Ajax calls are finished, leading to problems if the LLM is too quick to continue processing. + """ \ No newline at end of file diff --git a/src/mcp_agent/llm/augmented_llm.py b/src/mcp_agent/llm/augmented_llm.py index 6ae9b646..e7bae837 100644 --- a/src/mcp_agent/llm/augmented_llm.py +++ b/src/mcp_agent/llm/augmented_llm.py @@ -1,3 +1,5 @@ +import asyncio +import time from abc import abstractmethod from typing import ( TYPE_CHECKING, @@ -158,6 +160,8 @@ def __init__( # Initialize default parameters self.default_request_params = self._initialize_default_params(kwargs) + self._last_call_timestamp: float | None = 0.0 + # Apply model override if provided if model: self.default_request_params.model = model @@ -171,6 +175,7 @@ def __init__( self.type_converter = type_converter self.verb = kwargs.get("verb") + def _initialize_default_params(self, kwargs: dict) -> RequestParams: """Initialize default parameters for the LLM. Should be overridden by provider implementations to set provider-specific defaults.""" @@ -195,6 +200,9 @@ async def generate( # We never expect this for structured() calls - this is for interactive use - developers # can do this programatically # TODO -- create a "fast-agent" control role rather than magic strings + + final_params = self.get_request_params(request_params) + await self._apply_delay(final_params) if multipart_messages[-1].first_text().startswith("***SAVE_HISTORY"): parts: list[str] = multipart_messages[-1].first_text().split(" ", 1) @@ -246,9 +254,12 @@ async def structured( ) -> Tuple[ModelT | None, PromptMessageMultipart]: """Return a structured response from the LLM using the provided messages.""" + final_params = self.get_request_params(request_params) + await self._apply_delay(final_params) + self._precall(multipart_messages) result, assistant_response = await self._apply_prompt_provider_specific_structured( - multipart_messages, model, request_params + multipart_messages, model, final_params ) self._message_history.append(assistant_response) @@ -337,6 +348,20 @@ def _precall(self, multipart_messages: List[PromptMessageMultipart]) -> None: chat_turn=self.chat_turn(), ) + async def _apply_delay(self, request_params: RequestParams) -> None: + """Checks and applies a delay if configured in request_params.""" + if request_params.delay_between_calls and self._last_call_timestamp > 0: + required_delay = request_params.delay_between_calls + time_since_last_call = time.monotonic() - self._last_call_timestamp + + if time_since_last_call < required_delay: + wait_time = required_delay - time_since_last_call + self.logger.debug(f"Applying delay: waiting for {wait_time:.2f} seconds.") + await asyncio.sleep(wait_time) + + # Always update the timestamp for the next call + self._last_call_timestamp = time.monotonic() + def chat_turn(self) -> int: """Return the current chat turn number""" return 1 + sum(1 for message in self._message_history if message.role == "assistant") diff --git a/src/mcp_agent/llm/providers/google_converter.py b/src/mcp_agent/llm/providers/google_converter.py index ce5a4581..b6e8a208 100644 --- a/src/mcp_agent/llm/providers/google_converter.py +++ b/src/mcp_agent/llm/providers/google_converter.py @@ -166,6 +166,10 @@ def convert_from_google_content( fast_agent_parts: List[ TextContent | ImageContent | EmbeddedResource | CallToolRequestParams ] = [] + + if content is None or not hasattr(content, 'parts') or content.parts is None: + return [] # Google API response 'content' object is None. Cannot extract parts. + for part in content.parts: if part.text: fast_agent_parts.append(TextContent(type="text", text=part.text))