Skip to content

fix(event_loop): raise dedicated exception when encountering max toke… #576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 1, 2025
26 changes: 24 additions & 2 deletions src/strands/event_loop/event_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@
from ..telemetry.tracer import get_tracer
from ..tools.executor import run_tools, validate_and_prepare_tools
from ..types.content import Message
from ..types.exceptions import ContextWindowOverflowException, EventLoopException, ModelThrottledException
from ..types.exceptions import (
ContextWindowOverflowException,
EventLoopException,
MaxTokensReachedException,
ModelThrottledException,
)
from ..types.streaming import Metrics, StopReason
from ..types.tools import ToolChoice, ToolChoiceAuto, ToolConfig, ToolGenerator, ToolResult, ToolUse
from .streaming import stream_messages
Expand Down Expand Up @@ -187,6 +192,22 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
raise e

try:
if stop_reason == "max_tokens":
"""
Handle max_tokens limit reached by the model.

When the model reaches its maximum token limit, this represents a potentially unrecoverable
state where the model's response was truncated. By default, Strands fails hard with an
MaxTokensReachedException to maintain consistency with other failure types.
"""
raise MaxTokensReachedException(
message=(
"Agent has reached an unrecoverable state due to max_tokens limit. "
"For more information see: "
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
),
incomplete_message=message,
)
# Add message in trace and mark the end of the stream messages trace
stream_trace.add_message(message)
stream_trace.end()
Expand Down Expand Up @@ -231,7 +252,8 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
# Don't yield or log the exception - we already did it when we
# raised the exception and we don't need that duplication.
raise
except ContextWindowOverflowException as e:
except (ContextWindowOverflowException, MaxTokensReachedException) as e:
# Special cased exceptions which we want to bubble up rather than get wrapped in an EventLoopException
if cycle_span:
tracer.end_span_with_error(cycle_span, str(e), e)
raise e
Expand Down
2 changes: 1 addition & 1 deletion src/strands/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ async def structured_output(
stop_reason, messages, _, _ = event["stop"]

if stop_reason != "tool_use":
raise ValueError(f"Model returned stop_reason: {stop_reason} instead of \"tool_use\".")
raise ValueError(f'Model returned stop_reason: {stop_reason} instead of "tool_use".')

content = messages["content"]
output_response: dict[str, Any] | None = None
Expand Down
2 changes: 1 addition & 1 deletion src/strands/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ async def structured_output(
stop_reason, messages, _, _ = event["stop"]

if stop_reason != "tool_use":
raise ValueError(f"Model returned stop_reason: {stop_reason} instead of \"tool_use\".")
raise ValueError(f'Model returned stop_reason: {stop_reason} instead of "tool_use".')

content = messages["content"]
output_response: dict[str, Any] | None = None
Expand Down
21 changes: 21 additions & 0 deletions src/strands/types/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from typing import Any

from strands.types.content import Message


class EventLoopException(Exception):
"""Exception raised by the event loop."""
Expand All @@ -18,6 +20,25 @@ def __init__(self, original_exception: Exception, request_state: Any = None) ->
super().__init__(str(original_exception))


class MaxTokensReachedException(Exception):
"""Exception raised when the model reaches its maximum token generation limit.

This exception is raised when the model stops generating tokens because it has reached the maximum number of
tokens allowed for output generation. This can occur when the model's max_tokens parameter is set too low for
the complexity of the response, or when the model naturally reaches its configured output limit during generation.
"""

def __init__(self, message: str, incomplete_message: Message):
"""Initialize the exception with an error message and the incomplete message object.

Args:
message: The error message describing the token limit issue
incomplete_message: The valid Message object with incomplete content due to token limits
"""
self.incomplete_message = incomplete_message
super().__init__(message)


class ContextWindowOverflowException(Exception):
"""Exception raised when the context window is exceeded.

Expand Down
52 changes: 51 additions & 1 deletion tests/strands/event_loop/test_event_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
)
from strands.telemetry.metrics import EventLoopMetrics
from strands.tools.registry import ToolRegistry
from strands.types.exceptions import ContextWindowOverflowException, EventLoopException, ModelThrottledException
from strands.types.exceptions import (
ContextWindowOverflowException,
EventLoopException,
MaxTokensReachedException,
ModelThrottledException,
)
from tests.fixtures.mock_hook_provider import MockHookProvider


Expand Down Expand Up @@ -556,6 +561,51 @@ async def test_event_loop_tracing_with_model_error(
mock_tracer.end_span_with_error.assert_called_once_with(model_span, "Input too long", model.stream.side_effect)


@pytest.mark.asyncio
async def test_event_loop_cycle_max_tokens_exception(
agent,
model,
agenerator,
alist,
):
"""Test that max_tokens stop reason raises MaxTokensReachedException."""

# Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495
model.stream.return_value = agenerator(
[
{
"contentBlockStart": {
"start": {
"toolUse": {},
},
},
},
{"contentBlockStop": {}},
{"messageStop": {"stopReason": "max_tokens"}},
]
)

# Call event_loop_cycle, expecting it to raise MaxTokensReachedException
with pytest.raises(MaxTokensReachedException) as exc_info:
stream = strands.event_loop.event_loop.event_loop_cycle(
agent=agent,
invocation_state={},
)
await alist(stream)

# Verify the exception message contains the expected content
expected_message = (
"Agent has reached an unrecoverable state due to max_tokens limit. "
"For more information see: "
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
)
assert str(exc_info.value) == expected_message

# Verify that the message has not been appended to the messages array
assert len(agent.messages) == 1
assert exc_info.value.incomplete_message not in agent.messages


@patch("strands.event_loop.event_loop.get_tracer")
@pytest.mark.asyncio
async def test_event_loop_tracing_with_tool_execution(
Expand Down
20 changes: 20 additions & 0 deletions tests_integ/test_max_tokens_reached.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest

from strands import Agent, tool
from strands.models.bedrock import BedrockModel
from strands.types.exceptions import MaxTokensReachedException


@tool
def story_tool(story: str) -> str:
return story


def test_context_window_overflow():
model = BedrockModel(max_tokens=100)
agent = Agent(model=model, tools=[story_tool])

with pytest.raises(MaxTokensReachedException):
agent("Tell me a story!")

assert len(agent.messages) == 1
Loading