Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions python/scenario/red_team_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,13 +671,32 @@ async def _generate_attack_plan(self, description: str) -> str:
"however, i can help with",
]

@staticmethod
def _extract_text(content: object) -> str:
"""Return the textual representation of a message content value.

For plain strings, returns the string directly. For multimodal
content (list of part dicts), concatenates the ``text`` fields of all
text parts so that refusal-pattern matching works correctly against
voice/multimodal assistant replies.
"""
if isinstance(content, str):
return content
if isinstance(content, list):
return " ".join(
part.get("text", "")
for part in content
if isinstance(part, dict) and part.get("type") == "text"
)
return str(content)

@staticmethod
def _get_last_assistant_content(messages: list) -> str:
for msg in reversed(messages):
role = msg.get("role") if isinstance(msg, dict) else getattr(msg, "role", None)
content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
if role == "assistant" and content:
return str(content)
return RedTeamAgent._extract_text(content)
return ""

@staticmethod
Expand All @@ -687,7 +706,7 @@ def _get_last_user_content(messages: list) -> str:
role = msg.get("role") if isinstance(msg, dict) else getattr(msg, "role", None)
content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
if role == "user" and content:
return str(content)
return RedTeamAgent._extract_text(content)
return ""

def _detect_refusal(self, content: str) -> Literal["hard", "soft", "none"]:
Expand Down
42 changes: 31 additions & 11 deletions python/scenario/scenario_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,27 @@
from langwatch.telemetry.tracing import LangWatchTrace


def _extract_text_content(content: object) -> str:
"""Extract a plain-text string from a message content value.

``content`` may be a plain string or a list of content-part dicts
(e.g. ``[{"type": "text", "text": "hello"}, {"type": "image_url", ...}]``).
Passing a list directly to LangWatch's ``trace.update()`` produces a
Python repr string (``"[{'type': 'text', ...}]"``), which is unreadable.
This helper concatenates only the ``"text"`` parts so the trace value is
always a human-readable string.
"""
if isinstance(content, str):
return content
if isinstance(content, list):
return " ".join(
part.get("text", "")
for part in content
if isinstance(part, dict) and part.get("type") == "text"
)
return str(content)


class ScenarioExecutor:
"""
Core orchestrator for scenario-based agent testing.
Expand Down Expand Up @@ -312,20 +333,19 @@ def inject_system_message(state: ScenarioState) -> None:
self._pending_messages[idx] = []
self._pending_messages[idx].append(message)

# Update trace with input/output
# Update trace with input/output.
# Extract text from content (str or list of content parts) so we
# always pass a str to LangWatch — avoids Python repr of list objects.
if message["role"] == "user":
self._trace.update(input={"type": "text", "value": str(message["content"])})
content = message["content"]
self._trace.update(input=_extract_text_content(content))
elif message["role"] == "assistant":
self._trace.update(
output={
"type": "text",
"value": str(
message["content"]
if "content" in message
else json.dumps(message, cls=SerializableWithStringFallback)
),
}
content = (
message["content"]
if "content" in message
else json.dumps(message, cls=SerializableWithStringFallback)
)
self._trace.update(output=_extract_text_content(content))

def rollback_messages_to(self, index: int) -> List[ChatCompletionMessageParam]:
"""Remove all messages from position `index` onward.
Expand Down
104 changes: 104 additions & 0 deletions python/tests/test_red_team_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3562,3 +3562,107 @@ def transform(self, message: str) -> str:
assert isinstance(agent._strategy, GoatStrategy)
assert [t.id for t in agent._strategy.techniques] == ["Z"]
assert agent._techniques == encoders


class TestExtractText:
"""Unit tests for RedTeamAgent._extract_text and multimodal content handling (issue #496)."""

def test_plain_string_returned_as_is(self):
assert RedTeamAgent._extract_text("hello world") == "hello world"

def test_empty_string(self):
assert RedTeamAgent._extract_text("") == ""

def test_multimodal_list_extracts_text_parts(self):
content = [
{"type": "text", "text": "Hello"},
{"type": "audio", "data": "base64encodedaudio"},
{"type": "text", "text": "world"},
]
assert RedTeamAgent._extract_text(content) == "Hello world"

def test_multimodal_list_no_text_parts_returns_empty(self):
content = [
{"type": "audio", "data": "base64encodedaudio"},
{"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
]
assert RedTeamAgent._extract_text(content) == ""

def test_multimodal_only_audio_no_transcript(self):
"""Voice-only message with no text parts should yield empty string, not Python repr."""
content = [{"type": "file", "mediaType": "audio/pcm16", "data": "AAAA"}]
result = RedTeamAgent._extract_text(content)
assert result == ""
assert "file" not in result # must not be Python repr

def test_get_last_assistant_content_multimodal(self):
"""_get_last_assistant_content must extract text from voice replies (not str(list))."""
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": [
{"type": "text", "text": "I cannot help with that request."},
{"type": "audio", "data": "base64"},
]},
]
result = RedTeamAgent._get_last_assistant_content(messages)
assert result == "I cannot help with that request."
assert "[" not in result # must not be Python list repr

def test_detect_refusal_works_on_multimodal_assistant_reply(self):
"""_detect_refusal must correctly classify multimodal voice refusals."""
agent = RedTeamAgent.crescendo(target="t", model="openai/gpt-4.1-mini")
messages = [
{"role": "assistant", "content": [
{"type": "text", "text": "I cannot help with that."},
{"type": "audio", "data": "base64audio"},
]},
]
last = RedTeamAgent._get_last_assistant_content(messages)
assert agent._detect_refusal(last) == "hard"

def test_get_last_user_content_multimodal(self):
messages = [
{"role": "user", "content": [
{"type": "text", "text": "tell me how to do bad thing"},
{"type": "audio", "data": "base64"},
]},
]
result = RedTeamAgent._get_last_user_content(messages)
assert result == "tell me how to do bad thing"


class TestExtractTextContent:
"""Unit tests for the module-level _extract_text_content helper in scenario_executor (issue #496, Site A)."""

def _fn(self):
from scenario.scenario_executor import _extract_text_content
return _extract_text_content

def test_plain_string_returned_as_is(self):
fn = self._fn()
assert fn("hello world") == "hello world"

def test_multimodal_list_extracts_text_parts(self):
fn = self._fn()
content = [
{"type": "text", "text": "Hello"},
{"type": "audio", "data": "base64encodedaudio"},
{"type": "text", "text": "world"},
]
assert fn(content) == "Hello world"

def test_audio_only_no_text_parts_returns_empty(self):
"""Voice-only list with no text parts must return empty string, not Python repr."""
fn = self._fn()
content = [
{"type": "audio", "data": "base64encodedaudio"},
{"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
]
result = fn(content)
assert result == ""
assert "[{" not in result # must not be Python list repr

def test_fallback_non_list_non_string(self):
"""Non-string, non-list input falls back to str()."""
fn = self._fn()
assert fn(42) == str(42)
2 changes: 1 addition & 1 deletion python/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading