From 4324cdfd42a69c82d5fbad8c7895a0f64b1ab786 Mon Sep 17 00:00:00 2001 From: AlinsRan Date: Thu, 2 Jul 2026 16:08:42 +0800 Subject: [PATCH] feat(ai-aliyun-content-moderation): moderate system and tool role content Extend request-side moderation beyond the user role via a new request_check_roles option (array, default ["user"], backward compatible): - user/tool follow request_check_mode; "last" walks the trailing block of selected-role messages, so a fresh user turn or the current round's tool results are moderated without re-checking history. - system ignores request_check_mode and is moderated on every request (all system messages), because it can be poisoned by malicious ToolCall arguments overwriting the system prompt. Protocol layer gains extract_turn_content(body, mode, roles) and extract_system_content(body) across openai-chat/anthropic-messages/ openai-responses/bedrock-converse/openai-embeddings. A configured role with no extractor on the current protocol is routed through binding.on_unsupported so fail_mode decides, instead of silently passing unmoderated. Note: tool-result moderation applies to OpenAI-compatible formats where the tool output is a distinct tool role; Anthropic/Bedrock nest tool results in user messages and are not extracted (documented). --- .../plugins/ai-aliyun-content-moderation.lua | 83 +++- .../ai-protocols/anthropic-messages.lua | 39 +- .../plugins/ai-protocols/bedrock-converse.lua | 51 ++- apisix/plugins/ai-protocols/openai-chat.lua | 38 +- .../ai-protocols/openai-embeddings.lua | 15 +- .../plugins/ai-protocols/openai-responses.lua | 96 ++++- .../plugins/ai-aliyun-content-moderation.md | 3 +- .../plugins/ai-aliyun-content-moderation.md | 3 +- t/plugin/ai-aliyun-content-moderation.t | 359 ++++++++++++++++++ 9 files changed, 622 insertions(+), 65 deletions(-) diff --git a/apisix/plugins/ai-aliyun-content-moderation.lua b/apisix/plugins/ai-aliyun-content-moderation.lua index 6e41364d9a0a..a796f3229333 100644 --- a/apisix/plugins/ai-aliyun-content-moderation.lua +++ b/apisix/plugins/ai-aliyun-content-moderation.lua @@ -19,6 +19,7 @@ local ngx_ok = ngx.OK local os = os local pairs = pairs local ipairs = ipairs +local next = next local table = table local string = string local type = type @@ -66,8 +67,24 @@ local schema = { enum = {"last", "all"}, default = "last", description = [[ - which user messages to moderate: last (only the latest consecutive user - message block) | all (every user message). Both ignore non-user roles. + which user/tool messages to moderate: last (only the latest consecutive + block of selected-role messages) | all (every selected-role message). + Does not apply to the system role, which is always checked. + ]] + }, + request_check_roles = { + type = "array", + items = {type = "string", enum = {"user", "tool", "system"}}, + minItems = 1, + uniqueItems = true, + default = {"user"}, + description = [[ + which message roles to moderate on the request side. user/tool follow + request_check_mode; system is checked on every request because it can + be poisoned by malicious ToolCall arguments. Note: tool-result + moderation applies to OpenAI-compatible formats where the tool output + is a distinct "tool" role/item; for Anthropic/Bedrock (tool results + are nested blocks inside user messages) tool content is not extracted. ]] }, request_check_service = {type = "string", minLength = 1, default = "llm_query_moderation"}, @@ -390,23 +407,63 @@ function _M.access(conf, ctx) return end - -- Request moderation targets user input only (request_check_mode: "last" = - -- latest user turn, "all" = every user message). Protocols that can't surface - -- user-role content have nothing to moderate, so the request passes through. - local contents = proto.extract_user_content - and proto.extract_user_content(request_tab, conf.request_check_mode) + local function set_deny_content_type() + if ctx.var.request_type == "ai_stream" then + core.response.set_header("Content-Type", "text/event-stream") + else + core.response.set_header("Content-Type", "application/json") + end + end + + local roles = {} + for _, r in ipairs(conf.request_check_roles) do + roles[r] = true + end + local turn_roles = {} + if roles.user then turn_roles.user = true end + if roles.tool then turn_roles.tool = true end + + -- A configured role whose extractor this protocol doesn't implement would + -- otherwise pass unmoderated. Route that through fail_mode instead of + -- silently skipping the configured moderation. + if (roles.system and not proto.extract_system_content) + or (next(turn_roles) and not proto.extract_turn_content) then + local handled, code, body = binding.on_unsupported( + conf.fail_mode, _M.name, ctx, + "protocol cannot extract configured request_check_roles", + 500, "protocol " .. (ctx.ai_client_protocol or "unknown") + .. " cannot moderate the configured request_check_roles") + if handled then + return code, body + end + return + end + + -- The system prompt is checked on every request (not subject to + -- request_check_mode) because it can be poisoned by malicious ToolCall + -- arguments. All system messages are moderated; deduping unchanged system + -- content via a cache is deferred to a later iteration. + if roles.system then + local system_text = table.concat(proto.extract_system_content(request_tab), " ") + local code, message = request_content_moderation(ctx, conf, system_text) + release_cm_httpc(ctx, conf) + if code then + set_deny_content_type() + return code, message + end + end + + -- user/tool turn moderation follows request_check_mode ("last" = latest turn, + -- "all" = every selected-role message). + local contents = next(turn_roles) + and proto.extract_turn_content(request_tab, conf.request_check_mode, turn_roles) or {} local content_to_check = table.concat(contents, " ") local code, message = request_content_moderation(ctx, conf, content_to_check) release_cm_httpc(ctx, conf) if code then - local stream = ctx.var.request_type == "ai_stream" - if stream then - core.response.set_header("Content-Type", "text/event-stream") - else - core.response.set_header("Content-Type", "application/json") - end + set_deny_content_type() return code, message end end diff --git a/apisix/plugins/ai-protocols/anthropic-messages.lua b/apisix/plugins/ai-protocols/anthropic-messages.lua index 5c9c286a922c..a535ef4fa937 100644 --- a/apisix/plugins/ai-protocols/anthropic-messages.lua +++ b/apisix/plugins/ai-protocols/anthropic-messages.lua @@ -244,11 +244,18 @@ function _M.extract_request_content(body) end --- Extract text from user-role messages for request moderation. --- mode "last" (default): only the last consecutive block of user messages (the --- latest user turn); mode "all": every user message. Non-user roles are ignored --- (the Anthropic system prompt lives in body.system, not in messages). -function _M.extract_user_content(body, mode) +local function is_turn_role(message, roles) + return type(message) == "table" and message.role ~= nil and roles[message.role] +end + + +-- Extract text from turn-role messages (user/tool) for request moderation. +-- `roles` is a set such as {user = true, tool = true} selecting which roles to +-- collect. mode "last" (default): only the last consecutive block of messages +-- whose role is in `roles` (the latest turn); mode "all": every such message. +-- The Anthropic system prompt lives in body.system and is handled separately by +-- extract_system_content. +function _M.extract_turn_content(body, mode, roles) local contents = {} if type(body.messages) ~= "table" then return contents @@ -258,7 +265,7 @@ function _M.extract_user_content(body, mode) if mode ~= "all" then start_idx = nil for i = #messages, 1, -1 do - if type(messages[i]) == "table" and messages[i].role == "user" then + if is_turn_role(messages[i], roles) then start_idx = i else break @@ -269,7 +276,7 @@ function _M.extract_user_content(body, mode) end end for i = start_idx, #messages do - if type(messages[i]) == "table" and messages[i].role == "user" then + if is_turn_role(messages[i], roles) then append_message_text(contents, messages[i]) end end @@ -277,6 +284,24 @@ function _M.extract_user_content(body, mode) end +-- Extract system-role text for request moderation. Anthropic carries the system +-- prompt in body.system (a string or an array of text blocks), not in messages. +function _M.extract_system_content(body) + local contents = {} + if type(body.system) == "string" then + core.table.insert(contents, body.system) + elseif type(body.system) == "table" then + for _, block in ipairs(body.system) do + if type(block) == "table" and block.type == "text" + and type(block.text) == "string" then + core.table.insert(contents, block.text) + end + end + end + return contents +end + + --- Get messages in canonical {role, content} format. -- Anthropic content blocks are flattened to plain text. function _M.get_messages(body) diff --git a/apisix/plugins/ai-protocols/bedrock-converse.lua b/apisix/plugins/ai-protocols/bedrock-converse.lua index 7fecd3efcd2d..8c6ca60ce77f 100644 --- a/apisix/plugins/ai-protocols/bedrock-converse.lua +++ b/apisix/plugins/ai-protocols/bedrock-converse.lua @@ -201,10 +201,28 @@ function _M.extract_request_content(body) end --- Extract text from user-role messages for request moderation (mode "last" = --- latest user turn, "all" = every user message). The `system` blocks and --- non-user messages are ignored. -function _M.extract_user_content(body, mode) +local function is_turn_role(message, roles) + return type(message) == "table" and message.role ~= nil and roles[message.role] +end + + +local function append_message_blocks(contents, message) + if type(message.content) == "table" then + for _, block in ipairs(message.content) do + if type(block) == "table" and type(block.text) == "string" then + core.table.insert(contents, block.text) + end + end + end +end + + +-- Extract text from turn-role messages (user/tool) for request moderation. +-- `roles` is a set such as {user = true, tool = true} selecting which roles to +-- collect. mode "last" = the latest consecutive block of selected-role messages, +-- "all" = every such message. Bedrock `system` blocks are handled separately by +-- extract_system_content. +function _M.extract_turn_content(body, mode, roles) local contents = {} if type(body.messages) ~= "table" then return contents @@ -214,7 +232,7 @@ function _M.extract_user_content(body, mode) if mode ~= "all" then start_idx = nil for i = #messages, 1, -1 do - if type(messages[i]) == "table" and messages[i].role == "user" then + if is_turn_role(messages[i], roles) then start_idx = i else break @@ -225,13 +243,22 @@ function _M.extract_user_content(body, mode) end end for i = start_idx, #messages do - local message = messages[i] - if type(message) == "table" and message.role == "user" - and type(message.content) == "table" then - for _, block in ipairs(message.content) do - if type(block) == "table" and type(block.text) == "string" then - core.table.insert(contents, block.text) - end + if is_turn_role(messages[i], roles) then + append_message_blocks(contents, messages[i]) + end + end + return contents +end + + +-- Extract system-role text for request moderation. Bedrock carries the system +-- prompt in body.system (an array of text blocks), not in messages. +function _M.extract_system_content(body) + local contents = {} + if type(body.system) == "table" then + for _, block in ipairs(body.system) do + if type(block) == "table" and type(block.text) == "string" then + core.table.insert(contents, block.text) end end end diff --git a/apisix/plugins/ai-protocols/openai-chat.lua b/apisix/plugins/ai-protocols/openai-chat.lua index ce54d736ad32..224be1167b05 100644 --- a/apisix/plugins/ai-protocols/openai-chat.lua +++ b/apisix/plugins/ai-protocols/openai-chat.lua @@ -245,11 +245,19 @@ function _M.extract_request_content(body) end --- Extract text from user-role messages for request moderation. --- mode "last" (default): only the last consecutive block of user messages (the --- latest user turn); mode "all": every user message. Non-user roles are ignored --- because the query moderation service is meant for user input. -function _M.extract_user_content(body, mode) +local function is_turn_role(message, roles) + return type(message) == "table" and message.role ~= nil and roles[message.role] +end + + +-- Extract text from turn-role messages (user/tool) for request moderation. +-- `roles` is a set such as {user = true, tool = true} selecting which roles to +-- collect. mode "last" (default): only the last consecutive block of messages +-- whose role is in `roles` -- the latest turn, i.e. a fresh user message or the +-- tool results appended in the current agent round, so history is not re-checked. +-- mode "all": every such message. The system role is handled separately by +-- extract_system_content because it is not subject to the last-turn rule. +function _M.extract_turn_content(body, mode, roles) local contents = {} if type(body.messages) ~= "table" then return contents @@ -259,7 +267,7 @@ function _M.extract_user_content(body, mode) if mode ~= "all" then start_idx = nil for i = #messages, 1, -1 do - if type(messages[i]) == "table" and messages[i].role == "user" then + if is_turn_role(messages[i], roles) then start_idx = i else break @@ -270,7 +278,7 @@ function _M.extract_user_content(body, mode) end end for i = start_idx, #messages do - if type(messages[i]) == "table" and messages[i].role == "user" then + if is_turn_role(messages[i], roles) then append_message_text(contents, messages[i]) end end @@ -278,6 +286,22 @@ function _M.extract_user_content(body, mode) end +-- Extract system-role text for request moderation. Unlike turn content, the +-- system prompt is checked on every request (it can be poisoned by malicious +-- ToolCall arguments), so the last-turn rule does not apply here. +function _M.extract_system_content(body) + local contents = {} + if type(body.messages) == "table" then + for _, message in ipairs(body.messages) do + if type(message) == "table" and message.role == "system" then + append_message_text(contents, message) + end + end + end + return contents +end + + --- Get messages in canonical {role, content} format. function _M.get_messages(body) return body.messages or {} diff --git a/apisix/plugins/ai-protocols/openai-embeddings.lua b/apisix/plugins/ai-protocols/openai-embeddings.lua index 96f8faa2a963..d1e6a5a283eb 100644 --- a/apisix/plugins/ai-protocols/openai-embeddings.lua +++ b/apisix/plugins/ai-protocols/openai-embeddings.lua @@ -90,13 +90,22 @@ function _M.extract_request_content(body) end --- Embeddings has no message roles; the `input` text is the user content. The --- mode argument does not apply (no conversation turns). -function _M.extract_user_content(body, _) +-- Embeddings has no message roles; the `input` text is user content. mode and +-- roles do not apply, but the input is only surfaced when user is selected. +function _M.extract_turn_content(body, _, roles) + if roles and not roles.user then + return {} + end return _M.extract_request_content(body) end +-- Embeddings has no system prompt. +function _M.extract_system_content(_) + return {} +end + + function _M.get_messages(body) local messages = {} if body and body.input then diff --git a/apisix/plugins/ai-protocols/openai-responses.lua b/apisix/plugins/ai-protocols/openai-responses.lua index 8c263a0fbbc9..eecda969c737 100644 --- a/apisix/plugins/ai-protocols/openai-responses.lua +++ b/apisix/plugins/ai-protocols/openai-responses.lua @@ -223,18 +223,65 @@ function _M.extract_request_content(body) end --- Extract user input text for request moderation. A plain-string `input` is the --- user's content. For an `input` array, only user-role items are considered --- (mode "last" = latest user turn, "all" = every user item); `instructions` (the --- system prompt) and non-user items are ignored. -local function is_user_item(item) - return type(item) == "string" or (type(item) == "table" and item.role == "user") +-- Append an input item's text into `contents`: a bare string, a role item's +-- string/parts content, or a tool output item's `output` text. +local function append_item_text(contents, item) + if type(item) == "string" then + core.table.insert(contents, item) + elseif type(item) == "table" then + if type(item.content) == "string" then + core.table.insert(contents, item.content) + elseif type(item.content) == "table" then + for _, part in ipairs(item.content) do + if type(part) == "table" and type(part.text) == "string" then + core.table.insert(contents, part.text) + end + end + elseif type(item.output) == "string" then + core.table.insert(contents, item.output) + elseif type(item.output) == "table" then + -- function_call_output.output may be an array of content parts + for _, part in ipairs(item.output) do + if type(part) == "table" and type(part.text) == "string" then + core.table.insert(contents, part.text) + end + end + end + end +end + + +-- Whether an input item belongs to a selected turn role. A bare string is user +-- text; a role item matches when its role is in `roles`; a Responses-API tool +-- result (`function_call_output`, which has no role) matches when tool is selected. +local function turn_item_matches(item, roles) + if type(item) == "string" then + return roles.user and true or false + end + if type(item) ~= "table" then + return false + end + if item.role ~= nil then + return roles[item.role] and true or false + end + if roles.tool and item.type == "function_call_output" then + return true + end + return false end -function _M.extract_user_content(body, mode) + + +-- Extract turn-role (user/tool) input text for request moderation. A plain-string +-- `input` is user content. For an `input` array, mode "last" = the latest +-- consecutive block of selected-role items, "all" = every selected-role item. +-- `instructions` (the system prompt) is handled by extract_system_content. +function _M.extract_turn_content(body, mode, roles) local contents = {} local input = body.input if type(input) == "string" then - core.table.insert(contents, input) + if roles.user then + core.table.insert(contents, input) + end return contents end if type(input) ~= "table" then @@ -244,7 +291,7 @@ function _M.extract_user_content(body, mode) if mode ~= "all" then start_idx = nil for i = #input, 1, -1 do - if is_user_item(input[i]) then + if turn_item_matches(input[i], roles) then start_idx = i else break @@ -255,18 +302,25 @@ function _M.extract_user_content(body, mode) end end for i = start_idx, #input do - local item = input[i] - if type(item) == "string" then - core.table.insert(contents, item) - elseif type(item) == "table" and item.role == "user" and item.content then - if type(item.content) == "string" then - core.table.insert(contents, item.content) - elseif type(item.content) == "table" then - for _, part in ipairs(item.content) do - if type(part) == "table" and part.text then - core.table.insert(contents, part.text) - end - end + if turn_item_matches(input[i], roles) then + append_item_text(contents, input[i]) + end + end + return contents +end + + +-- Extract system-role text for request moderation. Responses API carries the +-- system prompt in `instructions`; an `input` array may also hold system items. +function _M.extract_system_content(body) + local contents = {} + if type(body.instructions) == "string" then + core.table.insert(contents, body.instructions) + end + if type(body.input) == "table" then + for _, item in ipairs(body.input) do + if type(item) == "table" and item.role == "system" then + append_item_text(contents, item) end end end diff --git a/docs/en/latest/plugins/ai-aliyun-content-moderation.md b/docs/en/latest/plugins/ai-aliyun-content-moderation.md index 98b2344fe402..2ca235ee0792 100644 --- a/docs/en/latest/plugins/ai-aliyun-content-moderation.md +++ b/docs/en/latest/plugins/ai-aliyun-content-moderation.md @@ -58,7 +58,8 @@ The `ai-aliyun-content-moderation` Plugin should be used with either [`ai-proxy` | stream_check_mode | string | False | `"final_packet"` | `realtime`, `final_packet` | Streaming moderation mode. `realtime`: batched checks during streaming. `final_packet`: append risk level at the end. | | stream_check_cache_size | integer | False | `128` | >= 1 | Maximum bytes per moderation batch in `realtime` mode. Length is measured using Lua string length, so for UTF-8 text non-ASCII characters may consume multiple bytes. | | stream_check_interval | number | False | `3` | >= 0.1 | Seconds between batch checks in `realtime` mode. | -| request_check_mode | string | False | `"last"` | `last`, `all` | Which user messages to moderate. `last`: only the latest consecutive block of user messages (the newest user turn). `all`: every user message. Both modes consider only `user`-role messages; `system`, `assistant` and `tool` messages are ignored. | +| request_check_roles | array[string] | False | `["user"]` | items are `user`, `tool`, `system` | Which message roles to moderate on the request side. `user` and `tool` follow `request_check_mode`; `system` is checked on every request (it can be poisoned by malicious ToolCall arguments overwriting the system prompt). The default `["user"]` preserves the previous behavior. Note: tool-result moderation applies to OpenAI-compatible formats where the tool output is a distinct `tool` role/item; for Anthropic and Bedrock (tool results are nested blocks inside user messages) tool content is not extracted. | +| request_check_mode | string | False | `"last"` | `last`, `all` | Which user/tool messages to moderate. `last`: only the latest consecutive block of selected-role messages (the newest turn). `all`: every selected-role message. Does not apply to `system`, which is always moderated when enabled via `request_check_roles`. | | request_check_service | string | False | `"llm_query_moderation"` | | Aliyun service for request moderation. | | request_check_length_limit | number | False | `2000` | >= 1 | Request content length limit. If exceeded, the content is sent to Aliyun in chunks. For instance, if the request content is 250 characters and `request_check_length_limit` is set to `100`, the content is sent in 3 requests to Aliyun. | | response_check_service | string | False | `"llm_response_moderation"` | | Aliyun service for response moderation. | diff --git a/docs/zh/latest/plugins/ai-aliyun-content-moderation.md b/docs/zh/latest/plugins/ai-aliyun-content-moderation.md index 04489e36d28a..57d48a9a64d9 100644 --- a/docs/zh/latest/plugins/ai-aliyun-content-moderation.md +++ b/docs/zh/latest/plugins/ai-aliyun-content-moderation.md @@ -58,7 +58,8 @@ import TabItem from '@theme/TabItem'; | stream_check_mode | string | 否 | `"final_packet"` | `realtime`、`final_packet` | 流式审核模式。`realtime`:流式传输期间批量检查。`final_packet`:在最后附加风险等级。 | | stream_check_cache_size | integer | 否 | `128` | >= 1 | `realtime` 模式下每次审核批次的最大字节数(按 UTF-8 编码后的字节长度计算)。 | | stream_check_interval | number | 否 | `3` | >= 0.1 | `realtime` 模式下批次检查之间的间隔秒数。 | -| request_check_mode | string | 否 | `"last"` | `last`, `all` | 审核哪些 user 消息。`last`:仅审核最后一段连续的 user 消息(最新的用户轮次);`all`:审核所有 user 消息。两种模式都只处理 `user` 角色的消息,`system`、`assistant`、`tool` 消息会被忽略。 | +| request_check_roles | array[string] | 否 | `["user"]` | 取值为 `user`、`tool`、`system` | 请求侧审核哪些消息角色。`user` 与 `tool` 遵循 `request_check_mode`;`system` 每次请求都审核(其可能被恶意 ToolCall 参数覆盖篡改)。默认 `["user"]` 保持既有行为。注意:tool 结果审核适用于 OpenAI 兼容格式(tool 输出为独立的 `tool` 角色/项);Anthropic、Bedrock 的 tool 结果以嵌套 block 形式存在于 user 消息中,其内容不会被抽取。 | +| request_check_mode | string | 否 | `"last"` | `last`, `all` | 审核哪些 user/tool 消息。`last`:仅审核最后一段连续的所选角色消息(最新一轮);`all`:审核所有所选角色消息。不作用于 `system`——只要通过 `request_check_roles` 启用,`system` 每次都审核。 | | request_check_service | string | 否 | `"llm_query_moderation"` | | 用于请求审核的阿里云服务。 | | request_check_length_limit | number | 否 | `2000` | >= 1 | 请求内容长度上限。如果超过该限制,内容将分块发送到阿里云。例如,如果请求内容为 250 个字符,且 `request_check_length_limit` 设置为 `100`,则内容将分 3 次请求发送到阿里云。 | | response_check_service | string | 否 | `"llm_response_moderation"` | | 用于响应审核的阿里云服务。 | diff --git a/t/plugin/ai-aliyun-content-moderation.t b/t/plugin/ai-aliyun-content-moderation.t index 9588b9a3947c..603e674bc184 100644 --- a/t/plugin/ai-aliyun-content-moderation.t +++ b/t/plugin/ai-aliyun-content-moderation.t @@ -1922,3 +1922,362 @@ qr/cannot write unethical/ } --- response_body rejected + + + +=== TEST 57: extract_turn_content - last mode uses the trailing user/tool block +--- config + location /t { + content_by_lua_block { + local proto = require("apisix.plugins.ai-protocols.openai-chat") + local body = { + messages = { + { role = "system", content = "sys" }, + { role = "user", content = "u1" }, + { role = "assistant", tool_calls = {} }, + { role = "tool", content = "t1" }, + { role = "tool", content = "t2" }, + } + } + local roles = { user = true, tool = true } + ngx.say("last:", table.concat(proto.extract_turn_content(body, "last", roles), ",")) + ngx.say("all:", table.concat(proto.extract_turn_content(body, "all", roles), ",")) + ngx.say("user_only_last_count:", + #proto.extract_turn_content(body, "last", { user = true })) + ngx.say("system:", table.concat(proto.extract_system_content(body), ",")) + } + } +--- response_body +last:t1,t2 +all:u1,t1,t2 +user_only_last_count:0 +system:sys + + + +=== TEST 58: extract_turn_content - a fresh user turn skips harmful history +--- config + location /t { + content_by_lua_block { + local proto = require("apisix.plugins.ai-protocols.openai-chat") + local body = { + messages = { + { role = "user", content = "old kill" }, + { role = "assistant", content = "ok" }, + { role = "user", content = "new safe" }, + } + } + local roles = { user = true, tool = true } + ngx.say("last:", table.concat(proto.extract_turn_content(body, "last", roles), ",")) + } + } +--- response_body +last:new safe + + + +=== TEST 59: create route with request_check_roles user/tool/system +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/chat-roles", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer wrongtoken" } }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-aliyun-content-moderation": { + "endpoint": "http://localhost:6724", + "region_id": "cn-shanghai", + "access_key_id": "fake-key-id", + "access_key_secret": "fake-key-secret", + "risk_level_bar": "high", + "check_request": true, + "request_check_roles": ["user", "tool", "system"] + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 60: tool content in the latest turn is moderated and blocked +--- request +POST /chat-roles +{ "messages": [ { "role": "system", "content": "safe" }, { "role": "user", "content": "hello" }, { "role": "assistant", "tool_calls": [{"id": "c1", "type": "function", "function": {"name": "f"}}] }, { "role": "tool", "tool_call_id": "c1", "content": "please kill" } ] } +--- more_headers +X-AI-Fixture: aliyun/chat-with-harmful.json +--- error_code: 200 +--- response_body_like eval +qr/cannot write unethical/ + + + +=== TEST 61: harmful tool not in the latest turn is skipped (last mode) +--- request +POST /chat-roles +{ "messages": [ { "role": "user", "content": "hello" }, { "role": "assistant", "tool_calls": [{"id": "c1", "type": "function", "function": {"name": "f"}}] }, { "role": "tool", "tool_call_id": "c1", "content": "kill" }, { "role": "assistant", "content": "ok" }, { "role": "user", "content": "what is 1+1?" } ] } +--- more_headers +X-AI-Fixture: aliyun/chat-with-harmful.json +--- error_code: 200 +--- response_body_like eval +qr/kill you/ + + + +=== TEST 62: harmful system prompt is always moderated and blocked +--- request +POST /chat-roles +{ "messages": [ { "role": "system", "content": "please kill" }, { "role": "user", "content": "hi" } ] } +--- more_headers +X-AI-Fixture: aliyun/chat-with-harmful.json +--- error_code: 200 +--- response_body_like eval +qr/cannot write unethical/ + + + +=== TEST 63: repeated identical harmful system stays blocked (moderated every request) +--- request +POST /chat-roles +{ "messages": [ { "role": "system", "content": "please kill" }, { "role": "user", "content": "hi" } ] } +--- more_headers +X-AI-Fixture: aliyun/chat-with-harmful.json +--- error_code: 200 +--- response_body_like eval +qr/cannot write unethical/ + + + +=== TEST 64: create route with request_check_roles system only +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/2', + ngx.HTTP_PUT, + [[{ + "uri": "/chat-sys", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer wrongtoken" } }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-aliyun-content-moderation": { + "endpoint": "http://localhost:6724", + "region_id": "cn-shanghai", + "access_key_id": "fake-key-id", + "access_key_secret": "fake-key-secret", + "risk_level_bar": "high", + "check_request": true, + "request_check_roles": ["system"] + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 65: with system-only roles, harmful user content is not checked +--- request +POST /chat-sys +{ "messages": [ { "role": "system", "content": "safe" }, { "role": "user", "content": "kill" } ] } +--- more_headers +X-AI-Fixture: aliyun/chat-with-harmful.json +--- error_code: 200 +--- response_body_like eval +qr/kill you/ + + + +=== TEST 66: with system-only roles, harmful system content is blocked +--- request +POST /chat-sys +{ "messages": [ { "role": "system", "content": "kill" }, { "role": "user", "content": "hi" } ] } +--- more_headers +X-AI-Fixture: aliyun/chat-with-harmful.json +--- error_code: 200 +--- response_body_like eval +qr/cannot write unethical/ + + + +=== TEST 67: create route with roles user/tool and request_check_mode all +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/3', + ngx.HTTP_PUT, + [[{ + "uri": "/chat-tool-all", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer wrongtoken" } }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-aliyun-content-moderation": { + "endpoint": "http://localhost:6724", + "region_id": "cn-shanghai", + "access_key_id": "fake-key-id", + "access_key_secret": "fake-key-secret", + "risk_level_bar": "high", + "check_request": true, + "request_check_mode": "all", + "request_check_roles": ["user", "tool"] + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 68: all mode - harmful tool result in an earlier turn is detected +--- request +POST /chat-tool-all +{ "messages": [ { "role": "user", "content": "hello" }, { "role": "assistant", "tool_calls": [{"id": "c1", "type": "function", "function": {"name": "f"}}] }, { "role": "tool", "tool_call_id": "c1", "content": "kill" }, { "role": "assistant", "content": "ok" }, { "role": "user", "content": "bye" } ] } +--- more_headers +X-AI-Fixture: aliyun/chat-with-harmful.json +--- error_code: 200 +--- response_body_like eval +qr/cannot write unethical/ + + + +=== TEST 69: invalid request_check_roles value is rejected +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code = t('/apisix/admin/routes/4', + ngx.HTTP_PUT, + [[{ + "uri": "/chat-bad-role", + "plugins": { + "ai-aliyun-content-moderation": { + "endpoint": "http://localhost:6724", + "region_id": "cn-shanghai", + "access_key_id": "fake-key-id", + "access_key_secret": "fake-key-secret", + "request_check_roles": ["assistant"] + } + } + }]] + ) + ngx.say(code >= 300 and "rejected" or "accepted") + } + } +--- response_body +rejected + + + +=== TEST 70: empty request_check_roles is rejected (minItems) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code = t('/apisix/admin/routes/4', + ngx.HTTP_PUT, + [[{ + "uri": "/chat-empty-role", + "plugins": { + "ai-aliyun-content-moderation": { + "endpoint": "http://localhost:6724", + "region_id": "cn-shanghai", + "access_key_id": "fake-key-id", + "access_key_secret": "fake-key-secret", + "request_check_roles": [] + } + } + }]] + ) + ngx.say(code >= 300 and "rejected" or "accepted") + } + } +--- response_body +rejected + + + +=== TEST 71: openai-responses extract_turn_content / extract_system_content +--- config + location /t { + content_by_lua_block { + local proto = require("apisix.plugins.ai-protocols.openai-responses") + -- bare-string input is user content + local b1 = { input = "hello user" } + ngx.say("str_user:", table.concat(proto.extract_turn_content(b1, "last", {user=true}), ",")) + -- array input: user item, a non-turn item (boundary), then a tool output + local b2 = { instructions = "sys instr", input = { + { role = "user", content = "u-old" }, + { type = "reasoning", content = "r" }, + { type = "function_call_output", output = "tool-out kill" }, + }} + ngx.say("last_ut:", table.concat(proto.extract_turn_content(b2, "last", {user=true, tool=true}), ",")) + ngx.say("last_user_only:", table.concat(proto.extract_turn_content(b2, "last", {user=true}), ",")) + ngx.say("sys:", table.concat(proto.extract_system_content(b2), ",")) + -- function_call_output.output as an array of parts + local b3 = { input = { + { type = "function_call_output", output = { { text = "arr-out" } } }, + }} + ngx.say("arr_out:", table.concat(proto.extract_turn_content(b3, "all", {tool=true}), ",")) + } + } +--- response_body +str_user:hello user +last_ut:tool-out kill +last_user_only: +sys:sys instr +arr_out:arr-out + + + +=== TEST 72: anthropic / bedrock extract_system_content (string and block-array) +--- config + location /t { + content_by_lua_block { + local anth = require("apisix.plugins.ai-protocols.anthropic-messages") + ngx.say("anth_str:", table.concat(anth.extract_system_content({system = "anth sys"}), ",")) + ngx.say("anth_blk:", table.concat(anth.extract_system_content( + {system = { {type="text", text="b1"}, {type="text", text="b2"} }}), ",")) + local bed = require("apisix.plugins.ai-protocols.bedrock-converse") + ngx.say("bed_blk:", table.concat(bed.extract_system_content( + {system = { {text="bs1"}, {text="bs2"} }}), ",")) + } + } +--- response_body +anth_str:anth sys +anth_blk:b1,b2 +bed_blk:bs1,bs2