From 4324cdfd42a69c82d5fbad8c7895a0f64b1ab786 Mon Sep 17 00:00:00 2001
From: AlinsRan <alinsran@apache.org>
Date: Thu, 2 Jul 2026 16:08:42 +0800
Subject: [PATCH] feat(ai-aliyun-content-moderation): moderate system and tool
 role content

Extend request-side moderation beyond the user role via a new
request_check_roles option (array, default ["user"], backward compatible):

- user/tool follow request_check_mode; "last" walks the trailing block of
  selected-role messages, so a fresh user turn or the current round's tool
  results are moderated without re-checking history.
- system ignores request_check_mode and is moderated on every request (all
  system messages), because it can be poisoned by malicious ToolCall
  arguments overwriting the system prompt.

Protocol layer gains extract_turn_content(body, mode, roles) and
extract_system_content(body) across openai-chat/anthropic-messages/
openai-responses/bedrock-converse/openai-embeddings. A configured role with
no extractor on the current protocol is routed through binding.on_unsupported
so fail_mode decides, instead of silently passing unmoderated.

Note: tool-result moderation applies to OpenAI-compatible formats where the
tool output is a distinct tool role; Anthropic/Bedrock nest tool results in
user messages and are not extracted (documented).
---
 .../plugins/ai-aliyun-content-moderation.lua  |  83 +++-
 .../ai-protocols/anthropic-messages.lua       |  39 +-
 .../plugins/ai-protocols/bedrock-converse.lua |  51 ++-
 apisix/plugins/ai-protocols/openai-chat.lua   |  38 +-
 .../ai-protocols/openai-embeddings.lua        |  15 +-
 .../plugins/ai-protocols/openai-responses.lua |  96 ++++-
 .../plugins/ai-aliyun-content-moderation.md   |   3 +-
 .../plugins/ai-aliyun-content-moderation.md   |   3 +-
 t/plugin/ai-aliyun-content-moderation.t       | 359 ++++++++++++++++++
 9 files changed, 622 insertions(+), 65 deletions(-)

diff --git a/apisix/plugins/ai-aliyun-content-moderation.lua b/apisix/plugins/ai-aliyun-content-moderation.lua
index 6e41364d9a0a..a796f3229333 100644
--- a/apisix/plugins/ai-aliyun-content-moderation.lua
+++ b/apisix/plugins/ai-aliyun-content-moderation.lua
@@ -19,6 +19,7 @@ local ngx_ok    = ngx.OK
 local os        = os
 local pairs     = pairs
 local ipairs    = ipairs
+local next      = next
 local table     = table
 local string    = string
 local type      = type
@@ -66,8 +67,24 @@ local schema = {
             enum = {"last", "all"},
             default = "last",
             description = [[
-            which user messages to moderate: last (only the latest consecutive user
-            message block) | all (every user message). Both ignore non-user roles.
+            which user/tool messages to moderate: last (only the latest consecutive
+            block of selected-role messages) | all (every selected-role message).
+            Does not apply to the system role, which is always checked.
+            ]]
+        },
+        request_check_roles = {
+            type = "array",
+            items = {type = "string", enum = {"user", "tool", "system"}},
+            minItems = 1,
+            uniqueItems = true,
+            default = {"user"},
+            description = [[
+            which message roles to moderate on the request side. user/tool follow
+            request_check_mode; system is checked on every request because it can
+            be poisoned by malicious ToolCall arguments. Note: tool-result
+            moderation applies to OpenAI-compatible formats where the tool output
+            is a distinct "tool" role/item; for Anthropic/Bedrock (tool results
+            are nested blocks inside user messages) tool content is not extracted.
             ]]
         },
         request_check_service = {type = "string", minLength = 1, default = "llm_query_moderation"},
@@ -390,23 +407,63 @@ function _M.access(conf, ctx)
         return
     end
 
-    -- Request moderation targets user input only (request_check_mode: "last" =
-    -- latest user turn, "all" = every user message). Protocols that can't surface
-    -- user-role content have nothing to moderate, so the request passes through.
-    local contents = proto.extract_user_content
-        and proto.extract_user_content(request_tab, conf.request_check_mode)
+    local function set_deny_content_type()
+        if ctx.var.request_type == "ai_stream" then
+            core.response.set_header("Content-Type", "text/event-stream")
+        else
+            core.response.set_header("Content-Type", "application/json")
+        end
+    end
+
+    local roles = {}
+    for _, r in ipairs(conf.request_check_roles) do
+        roles[r] = true
+    end
+    local turn_roles = {}
+    if roles.user then turn_roles.user = true end
+    if roles.tool then turn_roles.tool = true end
+
+    -- A configured role whose extractor this protocol doesn't implement would
+    -- otherwise pass unmoderated. Route that through fail_mode instead of
+    -- silently skipping the configured moderation.
+    if (roles.system and not proto.extract_system_content)
+            or (next(turn_roles) and not proto.extract_turn_content) then
+        local handled, code, body = binding.on_unsupported(
+            conf.fail_mode, _M.name, ctx,
+            "protocol cannot extract configured request_check_roles",
+            500, "protocol " .. (ctx.ai_client_protocol or "unknown")
+                .. " cannot moderate the configured request_check_roles")
+        if handled then
+            return code, body
+        end
+        return
+    end
+
+    -- The system prompt is checked on every request (not subject to
+    -- request_check_mode) because it can be poisoned by malicious ToolCall
+    -- arguments. All system messages are moderated; deduping unchanged system
+    -- content via a cache is deferred to a later iteration.
+    if roles.system then
+        local system_text = table.concat(proto.extract_system_content(request_tab), " ")
+        local code, message = request_content_moderation(ctx, conf, system_text)
+        release_cm_httpc(ctx, conf)
+        if code then
+            set_deny_content_type()
+            return code, message
+        end
+    end
+
+    -- user/tool turn moderation follows request_check_mode ("last" = latest turn,
+    -- "all" = every selected-role message).
+    local contents = next(turn_roles)
+        and proto.extract_turn_content(request_tab, conf.request_check_mode, turn_roles)
         or {}
     local content_to_check = table.concat(contents, " ")
 
     local code, message = request_content_moderation(ctx, conf, content_to_check)
     release_cm_httpc(ctx, conf)
     if code then
-        local stream = ctx.var.request_type == "ai_stream"
-        if stream then
-            core.response.set_header("Content-Type", "text/event-stream")
-        else
-            core.response.set_header("Content-Type", "application/json")
-        end
+        set_deny_content_type()
         return code, message
     end
 end
diff --git a/apisix/plugins/ai-protocols/anthropic-messages.lua b/apisix/plugins/ai-protocols/anthropic-messages.lua
index 5c9c286a922c..a535ef4fa937 100644
--- a/apisix/plugins/ai-protocols/anthropic-messages.lua
+++ b/apisix/plugins/ai-protocols/anthropic-messages.lua
@@ -244,11 +244,18 @@ function _M.extract_request_content(body)
 end
 
 
--- Extract text from user-role messages for request moderation.
--- mode "last" (default): only the last consecutive block of user messages (the
--- latest user turn); mode "all": every user message. Non-user roles are ignored
--- (the Anthropic system prompt lives in body.system, not in messages).
-function _M.extract_user_content(body, mode)
+local function is_turn_role(message, roles)
+    return type(message) == "table" and message.role ~= nil and roles[message.role]
+end
+
+
+-- Extract text from turn-role messages (user/tool) for request moderation.
+-- `roles` is a set such as {user = true, tool = true} selecting which roles to
+-- collect. mode "last" (default): only the last consecutive block of messages
+-- whose role is in `roles` (the latest turn); mode "all": every such message.
+-- The Anthropic system prompt lives in body.system and is handled separately by
+-- extract_system_content.
+function _M.extract_turn_content(body, mode, roles)
     local contents = {}
     if type(body.messages) ~= "table" then
         return contents
@@ -258,7 +265,7 @@ function _M.extract_user_content(body, mode)
     if mode ~= "all" then
         start_idx = nil
         for i = #messages, 1, -1 do
-            if type(messages[i]) == "table" and messages[i].role == "user" then
+            if is_turn_role(messages[i], roles) then
                 start_idx = i
             else
                 break
@@ -269,7 +276,7 @@ function _M.extract_user_content(body, mode)
         end
     end
     for i = start_idx, #messages do
-        if type(messages[i]) == "table" and messages[i].role == "user" then
+        if is_turn_role(messages[i], roles) then
             append_message_text(contents, messages[i])
         end
     end
@@ -277,6 +284,24 @@ function _M.extract_user_content(body, mode)
 end
 
 
+-- Extract system-role text for request moderation. Anthropic carries the system
+-- prompt in body.system (a string or an array of text blocks), not in messages.
+function _M.extract_system_content(body)
+    local contents = {}
+    if type(body.system) == "string" then
+        core.table.insert(contents, body.system)
+    elseif type(body.system) == "table" then
+        for _, block in ipairs(body.system) do
+            if type(block) == "table" and block.type == "text"
+                    and type(block.text) == "string" then
+                core.table.insert(contents, block.text)
+            end
+        end
+    end
+    return contents
+end
+
+
 --- Get messages in canonical {role, content} format.
 -- Anthropic content blocks are flattened to plain text.
 function _M.get_messages(body)
diff --git a/apisix/plugins/ai-protocols/bedrock-converse.lua b/apisix/plugins/ai-protocols/bedrock-converse.lua
index 7fecd3efcd2d..8c6ca60ce77f 100644
--- a/apisix/plugins/ai-protocols/bedrock-converse.lua
+++ b/apisix/plugins/ai-protocols/bedrock-converse.lua
@@ -201,10 +201,28 @@ function _M.extract_request_content(body)
 end
 
 
--- Extract text from user-role messages for request moderation (mode "last" =
--- latest user turn, "all" = every user message). The `system` blocks and
--- non-user messages are ignored.
-function _M.extract_user_content(body, mode)
+local function is_turn_role(message, roles)
+    return type(message) == "table" and message.role ~= nil and roles[message.role]
+end
+
+
+local function append_message_blocks(contents, message)
+    if type(message.content) == "table" then
+        for _, block in ipairs(message.content) do
+            if type(block) == "table" and type(block.text) == "string" then
+                core.table.insert(contents, block.text)
+            end
+        end
+    end
+end
+
+
+-- Extract text from turn-role messages (user/tool) for request moderation.
+-- `roles` is a set such as {user = true, tool = true} selecting which roles to
+-- collect. mode "last" = the latest consecutive block of selected-role messages,
+-- "all" = every such message. Bedrock `system` blocks are handled separately by
+-- extract_system_content.
+function _M.extract_turn_content(body, mode, roles)
     local contents = {}
     if type(body.messages) ~= "table" then
         return contents
@@ -214,7 +232,7 @@ function _M.extract_user_content(body, mode)
     if mode ~= "all" then
         start_idx = nil
         for i = #messages, 1, -1 do
-            if type(messages[i]) == "table" and messages[i].role == "user" then
+            if is_turn_role(messages[i], roles) then
                 start_idx = i
             else
                 break
@@ -225,13 +243,22 @@ function _M.extract_user_content(body, mode)
         end
     end
     for i = start_idx, #messages do
-        local message = messages[i]
-        if type(message) == "table" and message.role == "user"
-                and type(message.content) == "table" then
-            for _, block in ipairs(message.content) do
-                if type(block) == "table" and type(block.text) == "string" then
-                    core.table.insert(contents, block.text)
-                end
+        if is_turn_role(messages[i], roles) then
+            append_message_blocks(contents, messages[i])
+        end
+    end
+    return contents
+end
+
+
+-- Extract system-role text for request moderation. Bedrock carries the system
+-- prompt in body.system (an array of text blocks), not in messages.
+function _M.extract_system_content(body)
+    local contents = {}
+    if type(body.system) == "table" then
+        for _, block in ipairs(body.system) do
+            if type(block) == "table" and type(block.text) == "string" then
+                core.table.insert(contents, block.text)
             end
         end
     end
diff --git a/apisix/plugins/ai-protocols/openai-chat.lua b/apisix/plugins/ai-protocols/openai-chat.lua
index ce54d736ad32..224be1167b05 100644
--- a/apisix/plugins/ai-protocols/openai-chat.lua
+++ b/apisix/plugins/ai-protocols/openai-chat.lua
@@ -245,11 +245,19 @@ function _M.extract_request_content(body)
 end
 
 
--- Extract text from user-role messages for request moderation.
--- mode "last" (default): only the last consecutive block of user messages (the
--- latest user turn); mode "all": every user message. Non-user roles are ignored
--- because the query moderation service is meant for user input.
-function _M.extract_user_content(body, mode)
+local function is_turn_role(message, roles)
+    return type(message) == "table" and message.role ~= nil and roles[message.role]
+end
+
+
+-- Extract text from turn-role messages (user/tool) for request moderation.
+-- `roles` is a set such as {user = true, tool = true} selecting which roles to
+-- collect. mode "last" (default): only the last consecutive block of messages
+-- whose role is in `roles` -- the latest turn, i.e. a fresh user message or the
+-- tool results appended in the current agent round, so history is not re-checked.
+-- mode "all": every such message. The system role is handled separately by
+-- extract_system_content because it is not subject to the last-turn rule.
+function _M.extract_turn_content(body, mode, roles)
     local contents = {}
     if type(body.messages) ~= "table" then
         return contents
@@ -259,7 +267,7 @@ function _M.extract_user_content(body, mode)
     if mode ~= "all" then
         start_idx = nil
         for i = #messages, 1, -1 do
-            if type(messages[i]) == "table" and messages[i].role == "user" then
+            if is_turn_role(messages[i], roles) then
                 start_idx = i
             else
                 break
@@ -270,7 +278,7 @@ function _M.extract_user_content(body, mode)
         end
     end
     for i = start_idx, #messages do
-        if type(messages[i]) == "table" and messages[i].role == "user" then
+        if is_turn_role(messages[i], roles) then
             append_message_text(contents, messages[i])
         end
     end
@@ -278,6 +286,22 @@ function _M.extract_user_content(body, mode)
 end
 
 
+-- Extract system-role text for request moderation. Unlike turn content, the
+-- system prompt is checked on every request (it can be poisoned by malicious
+-- ToolCall arguments), so the last-turn rule does not apply here.
+function _M.extract_system_content(body)
+    local contents = {}
+    if type(body.messages) == "table" then
+        for _, message in ipairs(body.messages) do
+            if type(message) == "table" and message.role == "system" then
+                append_message_text(contents, message)
+            end
+        end
+    end
+    return contents
+end
+
+
 --- Get messages in canonical {role, content} format.
 function _M.get_messages(body)
     return body.messages or {}
diff --git a/apisix/plugins/ai-protocols/openai-embeddings.lua b/apisix/plugins/ai-protocols/openai-embeddings.lua
index 96f8faa2a963..d1e6a5a283eb 100644
--- a/apisix/plugins/ai-protocols/openai-embeddings.lua
+++ b/apisix/plugins/ai-protocols/openai-embeddings.lua
@@ -90,13 +90,22 @@ function _M.extract_request_content(body)
 end
 
 
--- Embeddings has no message roles; the `input` text is the user content. The
--- mode argument does not apply (no conversation turns).
-function _M.extract_user_content(body, _)
+-- Embeddings has no message roles; the `input` text is user content. mode and
+-- roles do not apply, but the input is only surfaced when user is selected.
+function _M.extract_turn_content(body, _, roles)
+    if roles and not roles.user then
+        return {}
+    end
     return _M.extract_request_content(body)
 end
 
 
+-- Embeddings has no system prompt.
+function _M.extract_system_content(_)
+    return {}
+end
+
+
 function _M.get_messages(body)
     local messages = {}
     if body and body.input then
diff --git a/apisix/plugins/ai-protocols/openai-responses.lua b/apisix/plugins/ai-protocols/openai-responses.lua
index 8c263a0fbbc9..eecda969c737 100644
--- a/apisix/plugins/ai-protocols/openai-responses.lua
+++ b/apisix/plugins/ai-protocols/openai-responses.lua
@@ -223,18 +223,65 @@ function _M.extract_request_content(body)
 end
 
 
--- Extract user input text for request moderation. A plain-string `input` is the
--- user's content. For an `input` array, only user-role items are considered
--- (mode "last" = latest user turn, "all" = every user item); `instructions` (the
--- system prompt) and non-user items are ignored.
-local function is_user_item(item)
-    return type(item) == "string" or (type(item) == "table" and item.role == "user")
+-- Append an input item's text into `contents`: a bare string, a role item's
+-- string/parts content, or a tool output item's `output` text.
+local function append_item_text(contents, item)
+    if type(item) == "string" then
+        core.table.insert(contents, item)
+    elseif type(item) == "table" then
+        if type(item.content) == "string" then
+            core.table.insert(contents, item.content)
+        elseif type(item.content) == "table" then
+            for _, part in ipairs(item.content) do
+                if type(part) == "table" and type(part.text) == "string" then
+                    core.table.insert(contents, part.text)
+                end
+            end
+        elseif type(item.output) == "string" then
+            core.table.insert(contents, item.output)
+        elseif type(item.output) == "table" then
+            -- function_call_output.output may be an array of content parts
+            for _, part in ipairs(item.output) do
+                if type(part) == "table" and type(part.text) == "string" then
+                    core.table.insert(contents, part.text)
+                end
+            end
+        end
+    end
+end
+
+
+-- Whether an input item belongs to a selected turn role. A bare string is user
+-- text; a role item matches when its role is in `roles`; a Responses-API tool
+-- result (`function_call_output`, which has no role) matches when tool is selected.
+local function turn_item_matches(item, roles)
+    if type(item) == "string" then
+        return roles.user and true or false
+    end
+    if type(item) ~= "table" then
+        return false
+    end
+    if item.role ~= nil then
+        return roles[item.role] and true or false
+    end
+    if roles.tool and item.type == "function_call_output" then
+        return true
+    end
+    return false
 end
-function _M.extract_user_content(body, mode)
+
+
+-- Extract turn-role (user/tool) input text for request moderation. A plain-string
+-- `input` is user content. For an `input` array, mode "last" = the latest
+-- consecutive block of selected-role items, "all" = every selected-role item.
+-- `instructions` (the system prompt) is handled by extract_system_content.
+function _M.extract_turn_content(body, mode, roles)
     local contents = {}
     local input = body.input
     if type(input) == "string" then
-        core.table.insert(contents, input)
+        if roles.user then
+            core.table.insert(contents, input)
+        end
         return contents
     end
     if type(input) ~= "table" then
@@ -244,7 +291,7 @@ function _M.extract_user_content(body, mode)
     if mode ~= "all" then
         start_idx = nil
         for i = #input, 1, -1 do
-            if is_user_item(input[i]) then
+            if turn_item_matches(input[i], roles) then
                 start_idx = i
             else
                 break
@@ -255,18 +302,25 @@ function _M.extract_user_content(body, mode)
         end
     end
     for i = start_idx, #input do
-        local item = input[i]
-        if type(item) == "string" then
-            core.table.insert(contents, item)
-        elseif type(item) == "table" and item.role == "user" and item.content then
-            if type(item.content) == "string" then
-                core.table.insert(contents, item.content)
-            elseif type(item.content) == "table" then
-                for _, part in ipairs(item.content) do
-                    if type(part) == "table" and part.text then
-                        core.table.insert(contents, part.text)
-                    end
-                end
+        if turn_item_matches(input[i], roles) then
+            append_item_text(contents, input[i])
+        end
+    end
+    return contents
+end
+
+
+-- Extract system-role text for request moderation. Responses API carries the
+-- system prompt in `instructions`; an `input` array may also hold system items.
+function _M.extract_system_content(body)
+    local contents = {}
+    if type(body.instructions) == "string" then
+        core.table.insert(contents, body.instructions)
+    end
+    if type(body.input) == "table" then
+        for _, item in ipairs(body.input) do
+            if type(item) == "table" and item.role == "system" then
+                append_item_text(contents, item)
             end
         end
     end
diff --git a/docs/en/latest/plugins/ai-aliyun-content-moderation.md b/docs/en/latest/plugins/ai-aliyun-content-moderation.md
index 98b2344fe402..2ca235ee0792 100644
--- a/docs/en/latest/plugins/ai-aliyun-content-moderation.md
+++ b/docs/en/latest/plugins/ai-aliyun-content-moderation.md
@@ -58,7 +58,8 @@ The `ai-aliyun-content-moderation` Plugin should be used with either [`ai-proxy`
 | stream_check_mode | string | False | `"final_packet"` | `realtime`, `final_packet` | Streaming moderation mode. `realtime`: batched checks during streaming. `final_packet`: append risk level at the end. |
 | stream_check_cache_size | integer | False | `128` | >= 1 | Maximum bytes per moderation batch in `realtime` mode. Length is measured using Lua string length, so for UTF-8 text non-ASCII characters may consume multiple bytes. |
 | stream_check_interval | number | False | `3` | >= 0.1 | Seconds between batch checks in `realtime` mode. |
-| request_check_mode | string | False | `"last"` | `last`, `all` | Which user messages to moderate. `last`: only the latest consecutive block of user messages (the newest user turn). `all`: every user message. Both modes consider only `user`-role messages; `system`, `assistant` and `tool` messages are ignored. |
+| request_check_roles | array[string] | False | `["user"]` | items are `user`, `tool`, `system` | Which message roles to moderate on the request side. `user` and `tool` follow `request_check_mode`; `system` is checked on every request (it can be poisoned by malicious ToolCall arguments overwriting the system prompt). The default `["user"]` preserves the previous behavior. Note: tool-result moderation applies to OpenAI-compatible formats where the tool output is a distinct `tool` role/item; for Anthropic and Bedrock (tool results are nested blocks inside user messages) tool content is not extracted. |
+| request_check_mode | string | False | `"last"` | `last`, `all` | Which user/tool messages to moderate. `last`: only the latest consecutive block of selected-role messages (the newest turn). `all`: every selected-role message. Does not apply to `system`, which is always moderated when enabled via `request_check_roles`. |
 | request_check_service | string | False | `"llm_query_moderation"` | | Aliyun service for request moderation. |
 | request_check_length_limit | number | False | `2000` | >= 1 | Request content length limit. If exceeded, the content is sent to Aliyun in chunks. For instance, if the request content is 250 characters and `request_check_length_limit` is set to `100`, the content is sent in 3 requests to Aliyun. |
 | response_check_service | string | False | `"llm_response_moderation"` | | Aliyun service for response moderation. |
diff --git a/docs/zh/latest/plugins/ai-aliyun-content-moderation.md b/docs/zh/latest/plugins/ai-aliyun-content-moderation.md
index 04489e36d28a..57d48a9a64d9 100644
--- a/docs/zh/latest/plugins/ai-aliyun-content-moderation.md
+++ b/docs/zh/latest/plugins/ai-aliyun-content-moderation.md
@@ -58,7 +58,8 @@ import TabItem from '@theme/TabItem';
 | stream_check_mode | string | 否 | `"final_packet"` | `realtime`、`final_packet` | 流式审核模式。`realtime`：流式传输期间批量检查。`final_packet`：在最后附加风险等级。 |
 | stream_check_cache_size | integer | 否 | `128` | >= 1 | `realtime` 模式下每次审核批次的最大字节数（按 UTF-8 编码后的字节长度计算）。 |
 | stream_check_interval | number | 否 | `3` | >= 0.1 | `realtime` 模式下批次检查之间的间隔秒数。 |
-| request_check_mode | string | 否 | `"last"` | `last`, `all` | 审核哪些 user 消息。`last`：仅审核最后一段连续的 user 消息（最新的用户轮次）；`all`：审核所有 user 消息。两种模式都只处理 `user` 角色的消息，`system`、`assistant`、`tool` 消息会被忽略。 |
+| request_check_roles | array[string] | 否 | `["user"]` | 取值为 `user`、`tool`、`system` | 请求侧审核哪些消息角色。`user` 与 `tool` 遵循 `request_check_mode`；`system` 每次请求都审核（其可能被恶意 ToolCall 参数覆盖篡改）。默认 `["user"]` 保持既有行为。注意：tool 结果审核适用于 OpenAI 兼容格式（tool 输出为独立的 `tool` 角色/项）；Anthropic、Bedrock 的 tool 结果以嵌套 block 形式存在于 user 消息中，其内容不会被抽取。 |
+| request_check_mode | string | 否 | `"last"` | `last`, `all` | 审核哪些 user/tool 消息。`last`：仅审核最后一段连续的所选角色消息（最新一轮）；`all`：审核所有所选角色消息。不作用于 `system`——只要通过 `request_check_roles` 启用，`system` 每次都审核。 |
 | request_check_service | string | 否 | `"llm_query_moderation"` | | 用于请求审核的阿里云服务。 |
 | request_check_length_limit | number | 否 | `2000` | >= 1 | 请求内容长度上限。如果超过该限制，内容将分块发送到阿里云。例如，如果请求内容为 250 个字符，且 `request_check_length_limit` 设置为 `100`，则内容将分 3 次请求发送到阿里云。 |
 | response_check_service | string | 否 | `"llm_response_moderation"` | | 用于响应审核的阿里云服务。 |
diff --git a/t/plugin/ai-aliyun-content-moderation.t b/t/plugin/ai-aliyun-content-moderation.t
index 9588b9a3947c..603e674bc184 100644
--- a/t/plugin/ai-aliyun-content-moderation.t
+++ b/t/plugin/ai-aliyun-content-moderation.t
@@ -1922,3 +1922,362 @@ qr/cannot write unethical/
     }
 --- response_body
 rejected
+
+
+
+=== TEST 57: extract_turn_content - last mode uses the trailing user/tool block
+--- config
+    location /t {
+        content_by_lua_block {
+            local proto = require("apisix.plugins.ai-protocols.openai-chat")
+            local body = {
+                messages = {
+                    { role = "system", content = "sys" },
+                    { role = "user", content = "u1" },
+                    { role = "assistant", tool_calls = {} },
+                    { role = "tool", content = "t1" },
+                    { role = "tool", content = "t2" },
+                }
+            }
+            local roles = { user = true, tool = true }
+            ngx.say("last:", table.concat(proto.extract_turn_content(body, "last", roles), ","))
+            ngx.say("all:", table.concat(proto.extract_turn_content(body, "all", roles), ","))
+            ngx.say("user_only_last_count:",
+                    #proto.extract_turn_content(body, "last", { user = true }))
+            ngx.say("system:", table.concat(proto.extract_system_content(body), ","))
+        }
+    }
+--- response_body
+last:t1,t2
+all:u1,t1,t2
+user_only_last_count:0
+system:sys
+
+
+
+=== TEST 58: extract_turn_content - a fresh user turn skips harmful history
+--- config
+    location /t {
+        content_by_lua_block {
+            local proto = require("apisix.plugins.ai-protocols.openai-chat")
+            local body = {
+                messages = {
+                    { role = "user", content = "old kill" },
+                    { role = "assistant", content = "ok" },
+                    { role = "user", content = "new safe" },
+                }
+            }
+            local roles = { user = true, tool = true }
+            ngx.say("last:", table.concat(proto.extract_turn_content(body, "last", roles), ","))
+        }
+    }
+--- response_body
+last:new safe
+
+
+
+=== TEST 59: create route with request_check_roles user/tool/system
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/chat-roles",
+                    "plugins": {
+                      "ai-proxy": {
+                          "provider": "openai",
+                          "auth": { "header": { "Authorization": "Bearer wrongtoken" } },
+                          "override": { "endpoint": "http://127.0.0.1:1980" }
+                      },
+                      "ai-aliyun-content-moderation": {
+                        "endpoint": "http://localhost:6724",
+                        "region_id": "cn-shanghai",
+                        "access_key_id": "fake-key-id",
+                        "access_key_secret": "fake-key-secret",
+                        "risk_level_bar": "high",
+                        "check_request": true,
+                        "request_check_roles": ["user", "tool", "system"]
+                      }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 60: tool content in the latest turn is moderated and blocked
+--- request
+POST /chat-roles
+{ "messages": [ { "role": "system", "content": "safe" }, { "role": "user", "content": "hello" }, { "role": "assistant", "tool_calls": [{"id": "c1", "type": "function", "function": {"name": "f"}}] }, { "role": "tool", "tool_call_id": "c1", "content": "please kill" } ] }
+--- more_headers
+X-AI-Fixture: aliyun/chat-with-harmful.json
+--- error_code: 200
+--- response_body_like eval
+qr/cannot write unethical/
+
+
+
+=== TEST 61: harmful tool not in the latest turn is skipped (last mode)
+--- request
+POST /chat-roles
+{ "messages": [ { "role": "user", "content": "hello" }, { "role": "assistant", "tool_calls": [{"id": "c1", "type": "function", "function": {"name": "f"}}] }, { "role": "tool", "tool_call_id": "c1", "content": "kill" }, { "role": "assistant", "content": "ok" }, { "role": "user", "content": "what is 1+1?" } ] }
+--- more_headers
+X-AI-Fixture: aliyun/chat-with-harmful.json
+--- error_code: 200
+--- response_body_like eval
+qr/kill you/
+
+
+
+=== TEST 62: harmful system prompt is always moderated and blocked
+--- request
+POST /chat-roles
+{ "messages": [ { "role": "system", "content": "please kill" }, { "role": "user", "content": "hi" } ] }
+--- more_headers
+X-AI-Fixture: aliyun/chat-with-harmful.json
+--- error_code: 200
+--- response_body_like eval
+qr/cannot write unethical/
+
+
+
+=== TEST 63: repeated identical harmful system stays blocked (moderated every request)
+--- request
+POST /chat-roles
+{ "messages": [ { "role": "system", "content": "please kill" }, { "role": "user", "content": "hi" } ] }
+--- more_headers
+X-AI-Fixture: aliyun/chat-with-harmful.json
+--- error_code: 200
+--- response_body_like eval
+qr/cannot write unethical/
+
+
+
+=== TEST 64: create route with request_check_roles system only
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/2',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/chat-sys",
+                    "plugins": {
+                      "ai-proxy": {
+                          "provider": "openai",
+                          "auth": { "header": { "Authorization": "Bearer wrongtoken" } },
+                          "override": { "endpoint": "http://127.0.0.1:1980" }
+                      },
+                      "ai-aliyun-content-moderation": {
+                        "endpoint": "http://localhost:6724",
+                        "region_id": "cn-shanghai",
+                        "access_key_id": "fake-key-id",
+                        "access_key_secret": "fake-key-secret",
+                        "risk_level_bar": "high",
+                        "check_request": true,
+                        "request_check_roles": ["system"]
+                      }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 65: with system-only roles, harmful user content is not checked
+--- request
+POST /chat-sys
+{ "messages": [ { "role": "system", "content": "safe" }, { "role": "user", "content": "kill" } ] }
+--- more_headers
+X-AI-Fixture: aliyun/chat-with-harmful.json
+--- error_code: 200
+--- response_body_like eval
+qr/kill you/
+
+
+
+=== TEST 66: with system-only roles, harmful system content is blocked
+--- request
+POST /chat-sys
+{ "messages": [ { "role": "system", "content": "kill" }, { "role": "user", "content": "hi" } ] }
+--- more_headers
+X-AI-Fixture: aliyun/chat-with-harmful.json
+--- error_code: 200
+--- response_body_like eval
+qr/cannot write unethical/
+
+
+
+=== TEST 67: create route with roles user/tool and request_check_mode all
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/3',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/chat-tool-all",
+                    "plugins": {
+                      "ai-proxy": {
+                          "provider": "openai",
+                          "auth": { "header": { "Authorization": "Bearer wrongtoken" } },
+                          "override": { "endpoint": "http://127.0.0.1:1980" }
+                      },
+                      "ai-aliyun-content-moderation": {
+                        "endpoint": "http://localhost:6724",
+                        "region_id": "cn-shanghai",
+                        "access_key_id": "fake-key-id",
+                        "access_key_secret": "fake-key-secret",
+                        "risk_level_bar": "high",
+                        "check_request": true,
+                        "request_check_mode": "all",
+                        "request_check_roles": ["user", "tool"]
+                      }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 68: all mode - harmful tool result in an earlier turn is detected
+--- request
+POST /chat-tool-all
+{ "messages": [ { "role": "user", "content": "hello" }, { "role": "assistant", "tool_calls": [{"id": "c1", "type": "function", "function": {"name": "f"}}] }, { "role": "tool", "tool_call_id": "c1", "content": "kill" }, { "role": "assistant", "content": "ok" }, { "role": "user", "content": "bye" } ] }
+--- more_headers
+X-AI-Fixture: aliyun/chat-with-harmful.json
+--- error_code: 200
+--- response_body_like eval
+qr/cannot write unethical/
+
+
+
+=== TEST 69: invalid request_check_roles value is rejected
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/4',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/chat-bad-role",
+                    "plugins": {
+                      "ai-aliyun-content-moderation": {
+                        "endpoint": "http://localhost:6724",
+                        "region_id": "cn-shanghai",
+                        "access_key_id": "fake-key-id",
+                        "access_key_secret": "fake-key-secret",
+                        "request_check_roles": ["assistant"]
+                      }
+                    }
+                }]]
+            )
+            ngx.say(code >= 300 and "rejected" or "accepted")
+        }
+    }
+--- response_body
+rejected
+
+
+
+=== TEST 70: empty request_check_roles is rejected (minItems)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/4',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/chat-empty-role",
+                    "plugins": {
+                      "ai-aliyun-content-moderation": {
+                        "endpoint": "http://localhost:6724",
+                        "region_id": "cn-shanghai",
+                        "access_key_id": "fake-key-id",
+                        "access_key_secret": "fake-key-secret",
+                        "request_check_roles": []
+                      }
+                    }
+                }]]
+            )
+            ngx.say(code >= 300 and "rejected" or "accepted")
+        }
+    }
+--- response_body
+rejected
+
+
+
+=== TEST 71: openai-responses extract_turn_content / extract_system_content
+--- config
+    location /t {
+        content_by_lua_block {
+            local proto = require("apisix.plugins.ai-protocols.openai-responses")
+            -- bare-string input is user content
+            local b1 = { input = "hello user" }
+            ngx.say("str_user:", table.concat(proto.extract_turn_content(b1, "last", {user=true}), ","))
+            -- array input: user item, a non-turn item (boundary), then a tool output
+            local b2 = { instructions = "sys instr", input = {
+                { role = "user", content = "u-old" },
+                { type = "reasoning", content = "r" },
+                { type = "function_call_output", output = "tool-out kill" },
+            }}
+            ngx.say("last_ut:", table.concat(proto.extract_turn_content(b2, "last", {user=true, tool=true}), ","))
+            ngx.say("last_user_only:", table.concat(proto.extract_turn_content(b2, "last", {user=true}), ","))
+            ngx.say("sys:", table.concat(proto.extract_system_content(b2), ","))
+            -- function_call_output.output as an array of parts
+            local b3 = { input = {
+                { type = "function_call_output", output = { { text = "arr-out" } } },
+            }}
+            ngx.say("arr_out:", table.concat(proto.extract_turn_content(b3, "all", {tool=true}), ","))
+        }
+    }
+--- response_body
+str_user:hello user
+last_ut:tool-out kill
+last_user_only:
+sys:sys instr
+arr_out:arr-out
+
+
+
+=== TEST 72: anthropic / bedrock extract_system_content (string and block-array)
+--- config
+    location /t {
+        content_by_lua_block {
+            local anth = require("apisix.plugins.ai-protocols.anthropic-messages")
+            ngx.say("anth_str:", table.concat(anth.extract_system_content({system = "anth sys"}), ","))
+            ngx.say("anth_blk:", table.concat(anth.extract_system_content(
+                {system = { {type="text", text="b1"}, {type="text", text="b2"} }}), ","))
+            local bed = require("apisix.plugins.ai-protocols.bedrock-converse")
+            ngx.say("bed_blk:", table.concat(bed.extract_system_content(
+                {system = { {text="bs1"}, {text="bs2"} }}), ","))
+        }
+    }
+--- response_body
+anth_str:anth sys
+anth_blk:b1,b2
+bed_blk:bs1,bs2