Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c5e4e51
fix(event_loop): raise dedicated exception when encountering max toke…
dbschmigelski Jul 30, 2025
6703819
fix: update integ tests
dbschmigelski Jul 30, 2025
c94b74e
fix: rename exception message, add to exception, move earlier in cycle
dbschmigelski Jul 31, 2025
36dd0f9
Update tests_integ/test_max_tokens_reached.py
dbschmigelski Jul 31, 2025
e04c73d
Update tests_integ/test_max_tokens_reached.py
dbschmigelski Jul 31, 2025
cca2f86
linting
dbschmigelski Jul 31, 2025
f647baa
Merge branch 'strands-agents:main' into fix-max-tokens
dbschmigelski Jul 31, 2025
78c5a91
Merge branch 'strands-agents:main' into fix-max-tokens
dbschmigelski Aug 1, 2025
a208496
Merge branch 'strands-agents:main' into fix-max-tokens
dbschmigelski Aug 4, 2025
2e2d4df
feat: add builtin hook provider to address max tokens reached truncation
dbschmigelski Aug 4, 2025
447d147
tests: modify integ test to inspect message history
dbschmigelski Aug 4, 2025
564895d
fix: fix linting errors
dbschmigelski Aug 4, 2025
2f118fb
fix: linting
dbschmigelski Aug 4, 2025
e5fc51a
refactor: switch from hook approach to conversation manager
dbschmigelski Aug 5, 2025
5906fc2
linting
dbschmigelski Aug 5, 2025
87445a3
fix: test contained incorrect assertions
dbschmigelski Aug 6, 2025
924fea9
fix: add event emission
dbschmigelski Aug 6, 2025
104f6b4
feat: move to async
dbschmigelski Aug 6, 2025
11b91f4
feat: add additional error case where no tool uses were fixed
dbschmigelski Aug 6, 2025
1da9ba7
feat: add max tokens reached test
dbschmigelski Aug 6, 2025
623f3c7
linting
dbschmigelski Aug 6, 2025
66c4c07
feat: add max tokens reached test
dbschmigelski Aug 6, 2025
4b5c5a7
feat: switch to a default behavior to recover from max tokens reached
dbschmigelski Aug 7, 2025
83ad822
fix: all tool uses now must be replaced
dbschmigelski Aug 8, 2025
faa4618
fix: boolean
dbschmigelski Aug 8, 2025
fa8195f
remove todo
dbschmigelski Aug 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions src/strands/event_loop/_recover_message_on_max_tokens_reached.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Message recovery utilities for handling max token limit scenarios.

This module provides functionality to recover and clean up incomplete messages that occur
when model responses are truncated due to maximum token limits being reached. It specifically
handles cases where tool use blocks are incomplete or malformed due to truncation.
"""

import logging

from ..types.content import ContentBlock, Message
from ..types.tools import ToolUse

logger = logging.getLogger(__name__)


def recover_message_on_max_tokens_reached(message: Message) -> Message:
"""Recover and clean up messages when max token limits are reached.

When a model response is truncated due to maximum token limits, all tool use blocks
should be replaced with informative error messages since they may be incomplete or
unreliable. This function inspects the message content and:

1. Identifies all tool use blocks (regardless of validity)
2. Replaces all tool uses with informative error messages
3. Preserves all non-tool content blocks (text, images, etc.)
4. Returns a cleaned message suitable for conversation history

This recovery mechanism ensures that the conversation can continue gracefully even when
model responses are truncated, providing clear feedback about what happened and preventing
potentially incomplete or corrupted tool executions.

Args:
message: The potentially incomplete message from the model that was truncated
due to max token limits.

Returns:
A cleaned Message with all tool uses replaced by explanatory text content.
The returned message maintains the same role as the input message.

Example:
If a message contains any tool use (complete or incomplete):
```
{"toolUse": {"name": "calculator", "input": {"expression": "2+2"}, "toolUseId": "123"}}
```

It will be replaced with:
```
{"text": "The selected tool calculator's tool use was incomplete due to maximum token limits being reached."}
```
"""
logger.info("handling max_tokens stop reason - replacing all tool uses with error messages")

valid_content: list[ContentBlock] = []
for content in message["content"] or []:
tool_use: ToolUse | None = content.get("toolUse")
if not tool_use:
valid_content.append(content)
continue

# Replace all tool uses with error messages when max_tokens is reached
display_name = tool_use.get("name") or "<unknown>"
logger.warning("tool_name=<%s> | replacing with error message due to max_tokens truncation.", display_name)

valid_content.append(
{
"text": f"The selected tool {display_name}'s tool use was incomplete due "
f"to maximum token limits being reached."
}
)

return {"content": valid_content, "role": message["role"]}
32 changes: 18 additions & 14 deletions src/strands/event_loop/event_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from ..types.streaming import Metrics, StopReason
from ..types.tools import ToolChoice, ToolChoiceAuto, ToolConfig, ToolGenerator, ToolResult, ToolUse
from ._recover_message_on_max_tokens_reached import recover_message_on_max_tokens_reached
from .streaming import stream_messages

if TYPE_CHECKING:
Expand Down Expand Up @@ -156,6 +157,9 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
)
)

if stop_reason == "max_tokens":
message = recover_message_on_max_tokens_reached(message)

if model_invoke_span:
tracer.end_model_invoke_span(model_invoke_span, message, usage, stop_reason)
break # Success! Break out of retry loop
Expand Down Expand Up @@ -192,6 +196,19 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
raise e

try:
# Add message in trace and mark the end of the stream messages trace
stream_trace.add_message(message)
stream_trace.end()

# Add the response message to the conversation
agent.messages.append(message)
agent.hooks.invoke_callbacks(MessageAddedEvent(agent=agent, message=message))
yield {"callback": {"message": message}}

# Update metrics
agent.event_loop_metrics.update_usage(usage)
agent.event_loop_metrics.update_metrics(metrics)

if stop_reason == "max_tokens":
"""
Handle max_tokens limit reached by the model.
Expand All @@ -205,21 +222,8 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
"Agent has reached an unrecoverable state due to max_tokens limit. "
"For more information see: "
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
),
incomplete_message=message,
)
)
# Add message in trace and mark the end of the stream messages trace
stream_trace.add_message(message)
stream_trace.end()

# Add the response message to the conversation
agent.messages.append(message)
agent.hooks.invoke_callbacks(MessageAddedEvent(agent=agent, message=message))
yield {"callback": {"message": message}}

# Update metrics
agent.event_loop_metrics.update_usage(usage)
agent.event_loop_metrics.update_metrics(metrics)

# If the model is requesting to use tools
if stop_reason == "tool_use":
Expand Down
6 changes: 1 addition & 5 deletions src/strands/types/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from typing import Any

from strands.types.content import Message


class EventLoopException(Exception):
"""Exception raised by the event loop."""
Expand All @@ -28,14 +26,12 @@ class MaxTokensReachedException(Exception):
the complexity of the response, or when the model naturally reaches its configured output limit during generation.
"""

def __init__(self, message: str, incomplete_message: Message):
def __init__(self, message: str):
"""Initialize the exception with an error message and the incomplete message object.

Args:
message: The error message describing the token limit issue
incomplete_message: The valid Message object with incomplete content due to token limits
"""
self.incomplete_message = incomplete_message
super().__init__(message)


Expand Down
55 changes: 31 additions & 24 deletions tests/strands/event_loop/test_event_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,10 @@ async def test_event_loop_cycle_text_response_error(
await alist(stream)


@patch("strands.event_loop.event_loop.recover_message_on_max_tokens_reached")
@pytest.mark.asyncio
async def test_event_loop_cycle_tool_result(
mock_recover_message,
agent,
model,
system_prompt,
Expand Down Expand Up @@ -339,6 +341,9 @@ async def test_event_loop_cycle_tool_result(

assert tru_stop_reason == exp_stop_reason and tru_message == exp_message and tru_request_state == exp_request_state

# Verify that recover_message_on_max_tokens_reached was NOT called for tool_use stop reason
mock_recover_message.assert_not_called()

model.stream.assert_called_with(
[
{"role": "user", "content": [{"text": "Hello"}]},
Expand Down Expand Up @@ -568,42 +573,44 @@ async def test_event_loop_cycle_max_tokens_exception(
agenerator,
alist,
):
"""Test that max_tokens stop reason raises MaxTokensReachedException."""
"""Test that max_tokens stop reason calls _recover_message_on_max_tokens_reached then MaxTokensReachedException."""

# Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495
model.stream.return_value = agenerator(
[
{
"contentBlockStart": {
"start": {
"toolUse": {},
model.stream.side_effect = [
agenerator(
[
{
"contentBlockStart": {
"start": {
"toolUse": {
"toolUseId": "t1",
"name": "asdf",
"input": {}, # empty
},
},
},
},
},
{"contentBlockStop": {}},
{"messageStop": {"stopReason": "max_tokens"}},
]
)
{"contentBlockStop": {}},
{"messageStop": {"stopReason": "max_tokens"}},
]
),
]

# Call event_loop_cycle, expecting it to raise MaxTokensReachedException
with pytest.raises(MaxTokensReachedException) as exc_info:
expected_message = (
"Agent has reached an unrecoverable state due to max_tokens limit. "
"For more information see: "
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
)
with pytest.raises(MaxTokensReachedException, match=expected_message):
stream = strands.event_loop.event_loop.event_loop_cycle(
agent=agent,
invocation_state={},
)
await alist(stream)

# Verify the exception message contains the expected content
expected_message = (
"Agent has reached an unrecoverable state due to max_tokens limit. "
"For more information see: "
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
)
assert str(exc_info.value) == expected_message

# Verify that the message has not been appended to the messages array
assert len(agent.messages) == 1
assert exc_info.value.incomplete_message not in agent.messages
assert len(agent.messages) == 2
assert "tool use was incomplete due" in agent.messages[1]["content"][0]["text"]


@patch("strands.event_loop.event_loop.get_tracer")
Expand Down
Loading
Loading