Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 70 additions & 13 deletions apisix/plugins/ai-aliyun-content-moderation.lua
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ local ngx_ok = ngx.OK
local os = os
local pairs = pairs
local ipairs = ipairs
local next = next
local table = table
local string = string
local type = type
Expand Down Expand Up @@ -66,8 +67,24 @@ local schema = {
enum = {"last", "all"},
default = "last",
description = [[
which user messages to moderate: last (only the latest consecutive user
message block) | all (every user message). Both ignore non-user roles.
which user/tool messages to moderate: last (only the latest consecutive
block of selected-role messages) | all (every selected-role message).
Does not apply to the system role, which is always checked.
]]
},
request_check_roles = {
type = "array",
items = {type = "string", enum = {"user", "tool", "system"}},
minItems = 1,
uniqueItems = true,
default = {"user"},
description = [[
which message roles to moderate on the request side. user/tool follow
request_check_mode; system is checked on every request because it can
be poisoned by malicious ToolCall arguments. Note: tool-result
moderation applies to OpenAI-compatible formats where the tool output
is a distinct "tool" role/item; for Anthropic/Bedrock (tool results
are nested blocks inside user messages) tool content is not extracted.
]]
},
request_check_service = {type = "string", minLength = 1, default = "llm_query_moderation"},
Expand Down Expand Up @@ -390,23 +407,63 @@ function _M.access(conf, ctx)
return
end

-- Request moderation targets user input only (request_check_mode: "last" =
-- latest user turn, "all" = every user message). Protocols that can't surface
-- user-role content have nothing to moderate, so the request passes through.
local contents = proto.extract_user_content
and proto.extract_user_content(request_tab, conf.request_check_mode)
local function set_deny_content_type()
if ctx.var.request_type == "ai_stream" then
core.response.set_header("Content-Type", "text/event-stream")
else
core.response.set_header("Content-Type", "application/json")
end
end

local roles = {}
for _, r in ipairs(conf.request_check_roles) do
roles[r] = true
end
local turn_roles = {}
if roles.user then turn_roles.user = true end
if roles.tool then turn_roles.tool = true end

-- A configured role whose extractor this protocol doesn't implement would
-- otherwise pass unmoderated. Route that through fail_mode instead of
-- silently skipping the configured moderation.
if (roles.system and not proto.extract_system_content)
or (next(turn_roles) and not proto.extract_turn_content) then
local handled, code, body = binding.on_unsupported(
conf.fail_mode, _M.name, ctx,
"protocol cannot extract configured request_check_roles",
500, "protocol " .. (ctx.ai_client_protocol or "unknown")
.. " cannot moderate the configured request_check_roles")
if handled then
return code, body
end
return
end

-- The system prompt is checked on every request (not subject to
-- request_check_mode) because it can be poisoned by malicious ToolCall
-- arguments. All system messages are moderated; deduping unchanged system
-- content via a cache is deferred to a later iteration.
if roles.system then
local system_text = table.concat(proto.extract_system_content(request_tab), " ")
local code, message = request_content_moderation(ctx, conf, system_text)
release_cm_httpc(ctx, conf)
if code then
set_deny_content_type()
return code, message
end
end

-- user/tool turn moderation follows request_check_mode ("last" = latest turn,
-- "all" = every selected-role message).
local contents = next(turn_roles)
and proto.extract_turn_content(request_tab, conf.request_check_mode, turn_roles)
or {}
local content_to_check = table.concat(contents, " ")

local code, message = request_content_moderation(ctx, conf, content_to_check)
release_cm_httpc(ctx, conf)
if code then
local stream = ctx.var.request_type == "ai_stream"
if stream then
core.response.set_header("Content-Type", "text/event-stream")
else
core.response.set_header("Content-Type", "application/json")
end
set_deny_content_type()
return code, message
end
end
Expand Down
39 changes: 32 additions & 7 deletions apisix/plugins/ai-protocols/anthropic-messages.lua
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,18 @@ function _M.extract_request_content(body)
end


-- Extract text from user-role messages for request moderation.
-- mode "last" (default): only the last consecutive block of user messages (the
-- latest user turn); mode "all": every user message. Non-user roles are ignored
-- (the Anthropic system prompt lives in body.system, not in messages).
function _M.extract_user_content(body, mode)
local function is_turn_role(message, roles)
return type(message) == "table" and message.role ~= nil and roles[message.role]
end


-- Extract text from turn-role messages (user/tool) for request moderation.
-- `roles` is a set such as {user = true, tool = true} selecting which roles to
-- collect. mode "last" (default): only the last consecutive block of messages
-- whose role is in `roles` (the latest turn); mode "all": every such message.
-- The Anthropic system prompt lives in body.system and is handled separately by
-- extract_system_content.
function _M.extract_turn_content(body, mode, roles)
local contents = {}
if type(body.messages) ~= "table" then
return contents
Expand All @@ -258,7 +265,7 @@ function _M.extract_user_content(body, mode)
if mode ~= "all" then
start_idx = nil
for i = #messages, 1, -1 do
if type(messages[i]) == "table" and messages[i].role == "user" then
if is_turn_role(messages[i], roles) then
start_idx = i
else
break
Expand All @@ -269,14 +276,32 @@ function _M.extract_user_content(body, mode)
end
end
for i = start_idx, #messages do
if type(messages[i]) == "table" and messages[i].role == "user" then
if is_turn_role(messages[i], roles) then
append_message_text(contents, messages[i])
end
end
return contents
end


-- Extract system-role text for request moderation. Anthropic carries the system
-- prompt in body.system (a string or an array of text blocks), not in messages.
function _M.extract_system_content(body)
local contents = {}
if type(body.system) == "string" then
core.table.insert(contents, body.system)
elseif type(body.system) == "table" then
for _, block in ipairs(body.system) do
if type(block) == "table" and block.type == "text"
and type(block.text) == "string" then
core.table.insert(contents, block.text)
end
end
end
return contents
end


--- Get messages in canonical {role, content} format.
-- Anthropic content blocks are flattened to plain text.
function _M.get_messages(body)
Expand Down
37 changes: 29 additions & 8 deletions apisix/plugins/ai-protocols/bedrock-converse.lua
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,17 @@ function _M.extract_request_content(body)
end


-- Extract text from user-role messages for request moderation (mode "last" =
-- latest user turn, "all" = every user message). The `system` blocks and
-- non-user messages are ignored.
function _M.extract_user_content(body, mode)
local function is_turn_role(message, roles)
return type(message) == "table" and message.role ~= nil and roles[message.role]
end


-- Extract text from turn-role messages (user/tool) for request moderation.
-- `roles` is a set such as {user = true, tool = true} selecting which roles to
-- collect. mode "last" = the latest consecutive block of selected-role messages,
-- "all" = every such message. Bedrock `system` blocks are handled separately by
-- extract_system_content.
function _M.extract_turn_content(body, mode, roles)
local contents = {}
if type(body.messages) ~= "table" then
return contents
Expand All @@ -209,7 +216,7 @@ function _M.extract_user_content(body, mode)
if mode ~= "all" then
start_idx = nil
for i = #messages, 1, -1 do
if type(messages[i]) == "table" and messages[i].role == "user" then
if is_turn_role(messages[i], roles) then
start_idx = i
else
break
Expand All @@ -220,9 +227,23 @@ function _M.extract_user_content(body, mode)
end
end
for i = start_idx, #messages do
local message = messages[i]
if type(message) == "table" and message.role == "user" then
append_block_texts(contents, message.content)
if is_turn_role(messages[i], roles) then
append_block_texts(contents, messages[i].content)
end
end
return contents
end


-- Extract system-role text for request moderation. Bedrock carries the system
-- prompt in body.system (an array of text blocks), not in messages.
function _M.extract_system_content(body)
local contents = {}
if type(body.system) == "table" then
for _, block in ipairs(body.system) do
if type(block) == "table" and type(block.text) == "string" then
core.table.insert(contents, block.text)
end
end
end
return contents
Expand Down
38 changes: 31 additions & 7 deletions apisix/plugins/ai-protocols/openai-chat.lua
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,19 @@ function _M.extract_request_content(body)
end


-- Extract text from user-role messages for request moderation.
-- mode "last" (default): only the last consecutive block of user messages (the
-- latest user turn); mode "all": every user message. Non-user roles are ignored
-- because the query moderation service is meant for user input.
function _M.extract_user_content(body, mode)
local function is_turn_role(message, roles)
return type(message) == "table" and message.role ~= nil and roles[message.role]
end


-- Extract text from turn-role messages (user/tool) for request moderation.
-- `roles` is a set such as {user = true, tool = true} selecting which roles to
-- collect. mode "last" (default): only the last consecutive block of messages
-- whose role is in `roles` -- the latest turn, i.e. a fresh user message or the
-- tool results appended in the current agent round, so history is not re-checked.
-- mode "all": every such message. The system role is handled separately by
-- extract_system_content because it is not subject to the last-turn rule.
function _M.extract_turn_content(body, mode, roles)
local contents = {}
if type(body.messages) ~= "table" then
return contents
Expand All @@ -262,7 +270,7 @@ function _M.extract_user_content(body, mode)
if mode ~= "all" then
start_idx = nil
for i = #messages, 1, -1 do
if type(messages[i]) == "table" and messages[i].role == "user" then
if is_turn_role(messages[i], roles) then
start_idx = i
else
break
Expand All @@ -273,14 +281,30 @@ function _M.extract_user_content(body, mode)
end
end
for i = start_idx, #messages do
if type(messages[i]) == "table" and messages[i].role == "user" then
if is_turn_role(messages[i], roles) then
append_message_text(contents, messages[i])
end
end
return contents
end


-- Extract system-role text for request moderation. Unlike turn content, the
-- system prompt is checked on every request (it can be poisoned by malicious
-- ToolCall arguments), so the last-turn rule does not apply here.
function _M.extract_system_content(body)
local contents = {}
if type(body.messages) == "table" then
for _, message in ipairs(body.messages) do
if type(message) == "table" and message.role == "system" then
append_message_text(contents, message)
end
end
end
return contents
end


--- Get messages in canonical {role, content} format.
-- OpenAI Chat content may be a plain string or an array of typed parts
-- (e.g. {type = "text", text = "..."}); the text parts are flattened so
Expand Down
15 changes: 12 additions & 3 deletions apisix/plugins/ai-protocols/openai-embeddings.lua
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,22 @@ function _M.extract_request_content(body)
end


-- Embeddings has no message roles; the `input` text is the user content. The
-- mode argument does not apply (no conversation turns).
function _M.extract_user_content(body, _)
-- Embeddings has no message roles; the `input` text is user content. mode and
-- roles do not apply, but the input is only surfaced when user is selected.
function _M.extract_turn_content(body, _, roles)
if roles and not roles.user then
return {}
end
return _M.extract_request_content(body)
end


-- Embeddings has no system prompt.
function _M.extract_system_content(_)
return {}
end


function _M.get_messages(body)
local messages = {}
if body and body.input then
Expand Down
Loading
Loading