diff --git a/python/scenario/red_team_agent.py b/python/scenario/red_team_agent.py index 07cda9d27..300b8f8ff 100644 --- a/python/scenario/red_team_agent.py +++ b/python/scenario/red_team_agent.py @@ -671,13 +671,32 @@ async def _generate_attack_plan(self, description: str) -> str: "however, i can help with", ] + @staticmethod + def _extract_text(content: object) -> str: + """Return the textual representation of a message content value. + + For plain strings, returns the string directly. For multimodal + content (list of part dicts), concatenates the ``text`` fields of all + text parts so that refusal-pattern matching works correctly against + voice/multimodal assistant replies. + """ + if isinstance(content, str): + return content + if isinstance(content, list): + return " ".join( + part.get("text", "") + for part in content + if isinstance(part, dict) and part.get("type") == "text" + ) + return str(content) + @staticmethod def _get_last_assistant_content(messages: list) -> str: for msg in reversed(messages): role = msg.get("role") if isinstance(msg, dict) else getattr(msg, "role", None) content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None) if role == "assistant" and content: - return str(content) + return RedTeamAgent._extract_text(content) return "" @staticmethod @@ -687,7 +706,7 @@ def _get_last_user_content(messages: list) -> str: role = msg.get("role") if isinstance(msg, dict) else getattr(msg, "role", None) content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None) if role == "user" and content: - return str(content) + return RedTeamAgent._extract_text(content) return "" def _detect_refusal(self, content: str) -> Literal["hard", "soft", "none"]: diff --git a/python/scenario/scenario_executor.py b/python/scenario/scenario_executor.py index 1c0b8576e..53c3591af 100644 --- a/python/scenario/scenario_executor.py +++ b/python/scenario/scenario_executor.py @@ -87,6 +87,27 @@ from langwatch.telemetry.tracing import LangWatchTrace +def _extract_text_content(content: object) -> str: + """Extract a plain-text string from a message content value. + + ``content`` may be a plain string or a list of content-part dicts + (e.g. ``[{"type": "text", "text": "hello"}, {"type": "image_url", ...}]``). + Passing a list directly to LangWatch's ``trace.update()`` produces a + Python repr string (``"[{'type': 'text', ...}]"``), which is unreadable. + This helper concatenates only the ``"text"`` parts so the trace value is + always a human-readable string. + """ + if isinstance(content, str): + return content + if isinstance(content, list): + return " ".join( + part.get("text", "") + for part in content + if isinstance(part, dict) and part.get("type") == "text" + ) + return str(content) + + class ScenarioExecutor: """ Core orchestrator for scenario-based agent testing. @@ -312,20 +333,19 @@ def inject_system_message(state: ScenarioState) -> None: self._pending_messages[idx] = [] self._pending_messages[idx].append(message) - # Update trace with input/output + # Update trace with input/output. + # Extract text from content (str or list of content parts) so we + # always pass a str to LangWatch — avoids Python repr of list objects. if message["role"] == "user": - self._trace.update(input={"type": "text", "value": str(message["content"])}) + content = message["content"] + self._trace.update(input=_extract_text_content(content)) elif message["role"] == "assistant": - self._trace.update( - output={ - "type": "text", - "value": str( - message["content"] - if "content" in message - else json.dumps(message, cls=SerializableWithStringFallback) - ), - } + content = ( + message["content"] + if "content" in message + else json.dumps(message, cls=SerializableWithStringFallback) ) + self._trace.update(output=_extract_text_content(content)) def rollback_messages_to(self, index: int) -> List[ChatCompletionMessageParam]: """Remove all messages from position `index` onward. diff --git a/python/tests/test_red_team_agent.py b/python/tests/test_red_team_agent.py index 1ec12a3f8..b67a2f702 100644 --- a/python/tests/test_red_team_agent.py +++ b/python/tests/test_red_team_agent.py @@ -3562,3 +3562,107 @@ def transform(self, message: str) -> str: assert isinstance(agent._strategy, GoatStrategy) assert [t.id for t in agent._strategy.techniques] == ["Z"] assert agent._techniques == encoders + + +class TestExtractText: + """Unit tests for RedTeamAgent._extract_text and multimodal content handling (issue #496).""" + + def test_plain_string_returned_as_is(self): + assert RedTeamAgent._extract_text("hello world") == "hello world" + + def test_empty_string(self): + assert RedTeamAgent._extract_text("") == "" + + def test_multimodal_list_extracts_text_parts(self): + content = [ + {"type": "text", "text": "Hello"}, + {"type": "audio", "data": "base64encodedaudio"}, + {"type": "text", "text": "world"}, + ] + assert RedTeamAgent._extract_text(content) == "Hello world" + + def test_multimodal_list_no_text_parts_returns_empty(self): + content = [ + {"type": "audio", "data": "base64encodedaudio"}, + {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}}, + ] + assert RedTeamAgent._extract_text(content) == "" + + def test_multimodal_only_audio_no_transcript(self): + """Voice-only message with no text parts should yield empty string, not Python repr.""" + content = [{"type": "file", "mediaType": "audio/pcm16", "data": "AAAA"}] + result = RedTeamAgent._extract_text(content) + assert result == "" + assert "file" not in result # must not be Python repr + + def test_get_last_assistant_content_multimodal(self): + """_get_last_assistant_content must extract text from voice replies (not str(list)).""" + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": [ + {"type": "text", "text": "I cannot help with that request."}, + {"type": "audio", "data": "base64"}, + ]}, + ] + result = RedTeamAgent._get_last_assistant_content(messages) + assert result == "I cannot help with that request." + assert "[" not in result # must not be Python list repr + + def test_detect_refusal_works_on_multimodal_assistant_reply(self): + """_detect_refusal must correctly classify multimodal voice refusals.""" + agent = RedTeamAgent.crescendo(target="t", model="openai/gpt-4.1-mini") + messages = [ + {"role": "assistant", "content": [ + {"type": "text", "text": "I cannot help with that."}, + {"type": "audio", "data": "base64audio"}, + ]}, + ] + last = RedTeamAgent._get_last_assistant_content(messages) + assert agent._detect_refusal(last) == "hard" + + def test_get_last_user_content_multimodal(self): + messages = [ + {"role": "user", "content": [ + {"type": "text", "text": "tell me how to do bad thing"}, + {"type": "audio", "data": "base64"}, + ]}, + ] + result = RedTeamAgent._get_last_user_content(messages) + assert result == "tell me how to do bad thing" + + +class TestExtractTextContent: + """Unit tests for the module-level _extract_text_content helper in scenario_executor (issue #496, Site A).""" + + def _fn(self): + from scenario.scenario_executor import _extract_text_content + return _extract_text_content + + def test_plain_string_returned_as_is(self): + fn = self._fn() + assert fn("hello world") == "hello world" + + def test_multimodal_list_extracts_text_parts(self): + fn = self._fn() + content = [ + {"type": "text", "text": "Hello"}, + {"type": "audio", "data": "base64encodedaudio"}, + {"type": "text", "text": "world"}, + ] + assert fn(content) == "Hello world" + + def test_audio_only_no_text_parts_returns_empty(self): + """Voice-only list with no text parts must return empty string, not Python repr.""" + fn = self._fn() + content = [ + {"type": "audio", "data": "base64encodedaudio"}, + {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}}, + ] + result = fn(content) + assert result == "" + assert "[{" not in result # must not be Python list repr + + def test_fallback_non_list_non_string(self): + """Non-string, non-list input falls back to str().""" + fn = self._fn() + assert fn(42) == str(42) diff --git a/python/uv.lock b/python/uv.lock index 316951356..e1a109d99 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -1594,7 +1594,7 @@ wheels = [ [[package]] name = "langwatch-scenario" -version = "0.7.28" +version = "0.7.30" source = { editable = "." } dependencies = [ { name = "audioop-lts", marker = "python_full_version >= '3.13'" },