diff --git a/CHANGELOG.md b/CHANGELOG.md index e4172316f..56508b4a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased ### Added +- Moderation: persist structured moderation snapshots (static scan + VT/LLM merged verdict, reason codes, and evidence) on skills and versions (#333) (thanks @ArthurzKV). - Moderation: add comment reporting with per-user active report caps, unique reporter/target enforcement, and auto-hide on the 4th unique report. - Moderation: add AI-driven comment scam backfill (`commentModeration:*`) with persisted verdict/confidence/explainer metadata and strict auto-ban for `certain_scam` + `high` confidence. - Admin: add manual unban for banned users (clears `deletedAt` + `banReason`, audit log entry). Revoked API tokens stay revoked. diff --git a/convex/_generated/api.d.ts b/convex/_generated/api.d.ts index 46335a1ed..02d32f27a 100644 --- a/convex/_generated/api.d.ts +++ b/convex/_generated/api.d.ts @@ -56,6 +56,8 @@ import type * as lib_httpHeaders from "../lib/httpHeaders.js"; import type * as lib_httpRateLimit from "../lib/httpRateLimit.js"; import type * as lib_leaderboards from "../lib/leaderboards.js"; import type * as lib_moderation from "../lib/moderation.js"; +import type * as lib_moderationEngine from "../lib/moderationEngine.js"; +import type * as lib_moderationReasonCodes from "../lib/moderationReasonCodes.js"; import type * as lib_openaiResponse from "../lib/openaiResponse.js"; import type * as lib_public from "../lib/public.js"; import type * as lib_reporting from "../lib/reporting.js"; @@ -152,6 +154,8 @@ declare const fullApi: ApiFromModules<{ "lib/httpRateLimit": typeof lib_httpRateLimit; "lib/leaderboards": typeof lib_leaderboards; "lib/moderation": typeof lib_moderation; + "lib/moderationEngine": typeof lib_moderationEngine; + "lib/moderationReasonCodes": typeof lib_moderationReasonCodes; "lib/openaiResponse": typeof lib_openaiResponse; "lib/public": typeof lib_public; "lib/reporting": typeof lib_reporting; diff --git a/convex/httpApi.handlers.test.ts b/convex/httpApi.handlers.test.ts index b603f1706..5f67d847a 100644 --- a/convex/httpApi.handlers.test.ts +++ b/convex/httpApi.handlers.test.ts @@ -343,6 +343,7 @@ describe('httpApi handlers', () => { displayName: 'Cool Skill', version: '1.2.3', changelog: 'c', + acceptLicenseTerms: true, files: [{ path: 'SKILL.md', size: 1, storageId: 'id', sha256: 'a' }], }), }) @@ -365,6 +366,7 @@ describe('httpApi handlers', () => { displayName: 'Cool Skill', version: '1.2.3', changelog: 'c', + acceptLicenseTerms: true, files: [{ path: 'SKILL.md', size: 1, storageId: 'id', sha256: 'a' }], }), }) diff --git a/convex/lib/moderationEngine.test.ts b/convex/lib/moderationEngine.test.ts new file mode 100644 index 000000000..d3a6cbe25 --- /dev/null +++ b/convex/lib/moderationEngine.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, it } from 'vitest' +import { buildModerationSnapshot, runStaticModerationScan } from './moderationEngine' + +describe('moderationEngine', () => { + it('does not flag benign token/password docs text alone', () => { + const result = runStaticModerationScan({ + slug: 'demo', + displayName: 'Demo', + summary: 'A normal integration skill', + frontmatter: {}, + metadata: {}, + files: [{ path: 'SKILL.md', size: 64 }], + fileContents: [ + { + path: 'SKILL.md', + content: + 'This skill requires API token and password from the official provider settings.', + }, + ], + }) + + expect(result.reasonCodes).toEqual([]) + expect(result.status).toBe('clean') + }) + + it('flags dynamic eval usage as suspicious', () => { + const result = runStaticModerationScan({ + slug: 'demo', + displayName: 'Demo', + summary: 'A normal integration skill', + frontmatter: {}, + metadata: {}, + files: [{ path: 'index.ts', size: 64 }], + fileContents: [{ path: 'index.ts', content: 'const value = eval(code)' }], + }) + + expect(result.reasonCodes).toContain('suspicious.dynamic_code_execution') + expect(result.status).toBe('suspicious') + }) + + it('upgrades merged verdict to malicious when VT is malicious', () => { + const snapshot = buildModerationSnapshot({ + staticScan: { + status: 'suspicious', + reasonCodes: ['suspicious.dynamic_code_execution'], + findings: [], + summary: '', + engineVersion: 'v2.0.0', + checkedAt: Date.now(), + }, + vtStatus: 'malicious', + }) + + expect(snapshot.verdict).toBe('malicious') + expect(snapshot.reasonCodes).toContain('malicious.vt_malicious') + }) + + it('rebuilds snapshots from current signals instead of retaining stale scanner codes', () => { + const snapshot = buildModerationSnapshot({ + staticScan: { + status: 'clean', + reasonCodes: [], + findings: [], + summary: '', + engineVersion: 'v2.0.0', + checkedAt: Date.now(), + }, + }) + + expect(snapshot.verdict).toBe('clean') + expect(snapshot.reasonCodes).toEqual([]) + }) +}) diff --git a/convex/lib/moderationEngine.ts b/convex/lib/moderationEngine.ts new file mode 100644 index 000000000..9c2624bd4 --- /dev/null +++ b/convex/lib/moderationEngine.ts @@ -0,0 +1,354 @@ +import type { Doc, Id } from '../_generated/dataModel' +import { + legacyFlagsFromVerdict, + MODERATION_ENGINE_VERSION, + normalizeReasonCodes, + type ModerationFinding, + REASON_CODES, + summarizeReasonCodes, + type ModerationVerdict, + verdictFromCodes, +} from './moderationReasonCodes' + +type TextFile = { path: string; content: string } + +export type StaticScanInput = { + slug: string + displayName: string + summary?: string + frontmatter: Record + metadata?: unknown + files: Array<{ path: string; size: number }> + fileContents: TextFile[] +} + +export type StaticScanResult = { + status: ModerationVerdict + reasonCodes: string[] + findings: ModerationFinding[] + summary: string + engineVersion: string + checkedAt: number +} + +export type ModerationSnapshot = { + verdict: ModerationVerdict + reasonCodes: string[] + evidence: ModerationFinding[] + summary: string + engineVersion: string + evaluatedAt: number + sourceVersionId?: Id<'skillVersions'> + legacyFlags?: string[] +} + +const MANIFEST_EXTENSION = /\.(json|yaml|yml|toml)$/i +const MARKDOWN_EXTENSION = /\.(md|markdown|mdx)$/i +const CODE_EXTENSION = /\.(js|ts|mjs|cjs|mts|cts|jsx|tsx|py|sh|bash|zsh|rb|go)$/i +const STANDARD_PORTS = new Set([80, 443, 8080, 8443, 3000]) + +function truncateEvidence(evidence: string, maxLen = 160) { + if (evidence.length <= maxLen) return evidence + return `${evidence.slice(0, maxLen)}...` +} + +function addFinding( + findings: ModerationFinding[], + finding: Omit & { evidence: string }, +) { + findings.push({ ...finding, evidence: truncateEvidence(finding.evidence.trim()) }) +} + +function findFirstLine(content: string, pattern: RegExp) { + const lines = content.split('\n') + for (let i = 0; i < lines.length; i += 1) { + if (pattern.test(lines[i])) { + return { line: i + 1, text: lines[i] } + } + } + return { line: 1, text: lines[0] ?? '' } +} + +function scanCodeFile(path: string, content: string, findings: ModerationFinding[]) { + if (!CODE_EXTENSION.test(path)) return + + const hasChildProcess = /child_process/.test(content) + const execPattern = /\b(exec|execSync|spawn|spawnSync|execFile|execFileSync)\s*\(/ + if (hasChildProcess && execPattern.test(content)) { + const match = findFirstLine(content, execPattern) + addFinding(findings, { + code: REASON_CODES.DANGEROUS_EXEC, + severity: 'critical', + file: path, + line: match.line, + message: 'Shell command execution detected (child_process).', + evidence: match.text, + }) + } + + if (/\beval\s*\(|new\s+Function\s*\(/.test(content)) { + const match = findFirstLine(content, /\beval\s*\(|new\s+Function\s*\(/) + addFinding(findings, { + code: REASON_CODES.DYNAMIC_CODE, + severity: 'critical', + file: path, + line: match.line, + message: 'Dynamic code execution detected.', + evidence: match.text, + }) + } + + if (/stratum\+tcp|stratum\+ssl|coinhive|cryptonight|xmrig/i.test(content)) { + const match = findFirstLine(content, /stratum\+tcp|stratum\+ssl|coinhive|cryptonight|xmrig/i) + addFinding(findings, { + code: REASON_CODES.CRYPTO_MINING, + severity: 'critical', + file: path, + line: match.line, + message: 'Possible crypto mining behavior detected.', + evidence: match.text, + }) + } + + const wsMatch = content.match(/new\s+WebSocket\s*\(\s*["']wss?:\/\/[^"']*:(\d+)/) + if (wsMatch) { + const port = Number.parseInt(wsMatch[1] ?? '', 10) + if (Number.isFinite(port) && !STANDARD_PORTS.has(port)) { + const match = findFirstLine(content, /new\s+WebSocket\s*\(/) + addFinding(findings, { + code: REASON_CODES.SUSPICIOUS_NETWORK, + severity: 'warn', + file: path, + line: match.line, + message: 'WebSocket connection to non-standard port detected.', + evidence: match.text, + }) + } + } + + const hasFileRead = /readFileSync|readFile/.test(content) + const hasNetworkSend = /\bfetch\b|http\.request|\baxios\b/.test(content) + if (hasFileRead && hasNetworkSend) { + const match = findFirstLine(content, /readFileSync|readFile/) + addFinding(findings, { + code: REASON_CODES.EXFILTRATION, + severity: 'warn', + file: path, + line: match.line, + message: 'File read combined with network send (possible exfiltration).', + evidence: match.text, + }) + } + + const hasProcessEnv = /process\.env/.test(content) + if (hasProcessEnv && hasNetworkSend) { + const match = findFirstLine(content, /process\.env/) + addFinding(findings, { + code: REASON_CODES.CREDENTIAL_HARVEST, + severity: 'critical', + file: path, + line: match.line, + message: 'Environment variable access combined with network send.', + evidence: match.text, + }) + } + + if ( + /(\\x[0-9a-fA-F]{2}){6,}/.test(content) || + /(?:atob|Buffer\.from)\s*\(\s*["'][A-Za-z0-9+/=]{200,}["']/.test(content) + ) { + const match = findFirstLine(content, /(\\x[0-9a-fA-F]{2}){6,}|(?:atob|Buffer\.from)\s*\(/) + addFinding(findings, { + code: REASON_CODES.OBFUSCATED_CODE, + severity: 'warn', + file: path, + line: match.line, + message: 'Potential obfuscated payload detected.', + evidence: match.text, + }) + } +} + +function scanMarkdownFile(path: string, content: string, findings: ModerationFinding[]) { + if (!MARKDOWN_EXTENSION.test(path)) return + + if ( + /ignore\s+(all\s+)?previous\s+instructions/i.test(content) || + /system\s*prompt\s*[:=]/i.test(content) || + /you\s+are\s+now\s+(a|an)\b/i.test(content) + ) { + const match = findFirstLine( + content, + /ignore\s+(all\s+)?previous\s+instructions|system\s*prompt\s*[:=]|you\s+are\s+now\s+(a|an)\b/i, + ) + addFinding(findings, { + code: REASON_CODES.INJECTION_INSTRUCTIONS, + severity: 'warn', + file: path, + line: match.line, + message: 'Prompt-injection style instruction pattern detected.', + evidence: match.text, + }) + } +} + +function scanManifestFile(path: string, content: string, findings: ModerationFinding[]) { + if (!MANIFEST_EXTENSION.test(path)) return + + if ( + /https?:\/\/(bit\.ly|tinyurl\.com|t\.co|goo\.gl|is\.gd)\//i.test(content) || + /https?:\/\/\d{1,3}(?:\.\d{1,3}){3}/i.test(content) + ) { + const match = findFirstLine( + content, + /https?:\/\/(bit\.ly|tinyurl\.com|t\.co|goo\.gl|is\.gd)\/|https?:\/\/\d{1,3}(?:\.\d{1,3}){3}/i, + ) + addFinding(findings, { + code: REASON_CODES.SUSPICIOUS_INSTALL_SOURCE, + severity: 'warn', + file: path, + line: match.line, + message: 'Install source points to URL shortener or raw IP.', + evidence: match.text, + }) + } +} + +function dedupeEvidence(evidence: ModerationFinding[]) { + const seen = new Set() + const out: ModerationFinding[] = [] + for (const item of evidence) { + const key = `${item.code}:${item.file}:${item.line}:${item.message}` + if (seen.has(key)) continue + seen.add(key) + out.push(item) + } + return out.slice(0, 40) +} + +function addScannerStatusReason(reasonCodes: string[], scanner: 'vt' | 'llm', status?: string) { + const normalized = status?.trim().toLowerCase() + if (normalized === 'malicious') { + reasonCodes.push(`malicious.${scanner}_malicious`) + } else if (normalized === 'suspicious') { + reasonCodes.push(`suspicious.${scanner}_suspicious`) + } +} + +export function runStaticModerationScan(input: StaticScanInput): StaticScanResult { + const findings: ModerationFinding[] = [] + const files = [...input.fileContents].sort((a, b) => a.path.localeCompare(b.path)) + + for (const file of files) { + scanCodeFile(file.path, file.content, findings) + scanMarkdownFile(file.path, file.content, findings) + scanManifestFile(file.path, file.content, findings) + } + + const installJson = JSON.stringify(input.metadata ?? {}) + if (/https?:\/\/(bit\.ly|tinyurl\.com|t\.co|goo\.gl|is\.gd)\//i.test(installJson)) { + addFinding(findings, { + code: REASON_CODES.SUSPICIOUS_INSTALL_SOURCE, + severity: 'warn', + file: 'metadata', + line: 1, + message: 'Install metadata references shortener URL.', + evidence: installJson, + }) + } + + const alwaysValue = input.frontmatter.always + if (alwaysValue === true || alwaysValue === 'true') { + addFinding(findings, { + code: REASON_CODES.MANIFEST_PRIVILEGED_ALWAYS, + severity: 'warn', + file: 'SKILL.md', + line: 1, + message: 'Skill is configured with always=true (persistent invocation).', + evidence: 'always: true', + }) + } + + const identityText = `${input.slug}\n${input.displayName}\n${input.summary ?? ''}` + if (/keepcold131\/ClawdAuthenticatorTool|ClawdAuthenticatorTool/i.test(identityText)) { + addFinding(findings, { + code: REASON_CODES.KNOWN_BLOCKED_SIGNATURE, + severity: 'critical', + file: 'metadata', + line: 1, + message: 'Matched a known blocked malware signature.', + evidence: identityText, + }) + } + + findings.sort((a, b) => + `${a.code}:${a.file}:${a.line}:${a.message}`.localeCompare( + `${b.code}:${b.file}:${b.line}:${b.message}`, + ), + ) + + const reasonCodes = normalizeReasonCodes(findings.map((finding) => finding.code)) + const status = verdictFromCodes(reasonCodes) + return { + status, + reasonCodes, + findings, + summary: summarizeReasonCodes(reasonCodes), + engineVersion: MODERATION_ENGINE_VERSION, + checkedAt: Date.now(), + } +} + +export function buildModerationSnapshot(params: { + staticScan?: StaticScanResult + vtStatus?: string + llmStatus?: string + sourceVersionId?: Id<'skillVersions'> +}): ModerationSnapshot { + const reasonCodes = [...(params.staticScan?.reasonCodes ?? [])] + const evidence = [...(params.staticScan?.findings ?? [])] + + addScannerStatusReason(reasonCodes, 'vt', params.vtStatus) + addScannerStatusReason(reasonCodes, 'llm', params.llmStatus) + + const normalizedCodes = normalizeReasonCodes(reasonCodes) + const verdict = verdictFromCodes(normalizedCodes) + return { + verdict, + reasonCodes: normalizedCodes, + evidence: dedupeEvidence(evidence), + summary: summarizeReasonCodes(normalizedCodes), + engineVersion: MODERATION_ENGINE_VERSION, + evaluatedAt: Date.now(), + sourceVersionId: params.sourceVersionId, + legacyFlags: legacyFlagsFromVerdict(verdict), + } +} + +export function resolveSkillVerdict( + skill: Pick< + Doc<'skills'>, + 'moderationVerdict' | 'moderationFlags' | 'moderationReason' | 'moderationReasonCodes' + >, +): ModerationVerdict { + if (skill.moderationVerdict) return skill.moderationVerdict + if (skill.moderationFlags?.includes('blocked.malware')) return 'malicious' + if (skill.moderationFlags?.includes('flagged.suspicious')) return 'suspicious' + if ( + skill.moderationReason?.startsWith('scanner.') && + skill.moderationReason.endsWith('.malicious') + ) { + return 'malicious' + } + if ( + skill.moderationReason?.startsWith('scanner.') && + skill.moderationReason.endsWith('.suspicious') + ) { + return 'suspicious' + } + if ((skill.moderationReasonCodes ?? []).some((code) => code.startsWith('malicious.'))) { + return 'malicious' + } + if ((skill.moderationReasonCodes ?? []).length > 0) return 'suspicious' + return 'clean' +} diff --git a/convex/lib/moderationReasonCodes.ts b/convex/lib/moderationReasonCodes.ts new file mode 100644 index 000000000..346864ffa --- /dev/null +++ b/convex/lib/moderationReasonCodes.ts @@ -0,0 +1,60 @@ +export type ModerationVerdict = 'clean' | 'suspicious' | 'malicious' + +export type ModerationFindingSeverity = 'info' | 'warn' | 'critical' + +export type ModerationFinding = { + code: string + severity: ModerationFindingSeverity + file: string + line: number + message: string + evidence: string +} + +export const MODERATION_ENGINE_VERSION = 'v2.0.0' + +export const REASON_CODES = { + DANGEROUS_EXEC: 'suspicious.dangerous_exec', + DYNAMIC_CODE: 'suspicious.dynamic_code_execution', + CREDENTIAL_HARVEST: 'malicious.env_harvesting', + EXFILTRATION: 'suspicious.potential_exfiltration', + OBFUSCATED_CODE: 'suspicious.obfuscated_code', + SUSPICIOUS_NETWORK: 'suspicious.nonstandard_network', + CRYPTO_MINING: 'malicious.crypto_mining', + INJECTION_INSTRUCTIONS: 'suspicious.prompt_injection_instructions', + SUSPICIOUS_INSTALL_SOURCE: 'suspicious.install_untrusted_source', + MANIFEST_PRIVILEGED_ALWAYS: 'suspicious.privileged_always', + KNOWN_BLOCKED_SIGNATURE: 'malicious.known_blocked_signature', +} as const + +const MALICIOUS_CODES = new Set([ + REASON_CODES.CREDENTIAL_HARVEST, + REASON_CODES.CRYPTO_MINING, + REASON_CODES.KNOWN_BLOCKED_SIGNATURE, +]) + +export function normalizeReasonCodes(codes: string[]) { + return Array.from(new Set(codes.filter(Boolean))).sort((a, b) => a.localeCompare(b)) +} + +export function summarizeReasonCodes(codes: string[]) { + if (codes.length === 0) return 'No suspicious patterns detected.' + const top = codes.slice(0, 3).join(', ') + const extra = codes.length > 3 ? ` (+${codes.length - 3} more)` : '' + return `Detected: ${top}${extra}` +} + +export function verdictFromCodes(codes: string[]): ModerationVerdict { + const normalized = normalizeReasonCodes(codes) + if (normalized.some((code) => MALICIOUS_CODES.has(code) || code.startsWith('malicious.'))) { + return 'malicious' + } + if (normalized.length > 0) return 'suspicious' + return 'clean' +} + +export function legacyFlagsFromVerdict(verdict: ModerationVerdict) { + if (verdict === 'malicious') return ['blocked.malware'] + if (verdict === 'suspicious') return ['flagged.suspicious'] + return undefined +} diff --git a/convex/lib/skillPublish.ts b/convex/lib/skillPublish.ts index f47f369c2..aa9743b39 100644 --- a/convex/lib/skillPublish.ts +++ b/convex/lib/skillPublish.ts @@ -7,6 +7,7 @@ import { getSkillBadgeMap, isSkillHighlighted } from './badges' import { generateChangelogForPublish } from './changelog' import { generateEmbedding } from './embeddings' import { requireGitHubAccountAge } from './githubAccount' +import { runStaticModerationScan } from './moderationEngine' import type { PublicUser } from './public' import { computeQualitySignals, @@ -206,15 +207,30 @@ export async function publishVersionForUser( const metadata = mergeSourceIntoMetadata(frontmatterMetadata, args.source, qualityAssessment) - const otherFiles = [] as Array<{ path: string; content: string }> + const fileContents: Array<{ path: string; content: string }> = [ + { path: readmeFile.path, content: readmeText }, + ] for (const file of publishFiles) { - if (!file.path || file.path.toLowerCase().endsWith('.md')) continue + if (!file.path || file.storageId === readmeFile.storageId) continue if (!isTextFile(file.path, file.contentType ?? undefined)) continue const content = await fetchText(ctx, file.storageId) - otherFiles.push({ path: file.path, content }) - if (otherFiles.length >= MAX_FILES_FOR_EMBEDDING) break + fileContents.push({ path: file.path, content }) } + const otherFiles = fileContents + .filter((file) => !file.path.toLowerCase().endsWith('.md')) + .slice(0, MAX_FILES_FOR_EMBEDDING) + + const staticScan = runStaticModerationScan({ + slug, + displayName, + summary, + frontmatter, + metadata, + files: publishFiles.map((file) => ({ path: file.path, size: file.size })), + fileContents, + }) + const embeddingText = buildEmbeddingText({ frontmatter, readme: readmeText, @@ -272,6 +288,7 @@ export async function publishVersionForUser( license: PLATFORM_SKILL_LICENSE, }, summary, + staticScan, embedding, qualityAssessment: qualityAssessment ? { diff --git a/convex/schema.ts b/convex/schema.ts index d9d669ae8..b4c8b98b8 100644 --- a/convex/schema.ts +++ b/convex/schema.ts @@ -92,6 +92,26 @@ const skills = defineTable({ ), moderationNotes: v.optional(v.string()), moderationReason: v.optional(v.string()), + moderationVerdict: v.optional( + v.union(v.literal('clean'), v.literal('suspicious'), v.literal('malicious')), + ), + moderationReasonCodes: v.optional(v.array(v.string())), + moderationEvidence: v.optional( + v.array( + v.object({ + code: v.string(), + severity: v.union(v.literal('info'), v.literal('warn'), v.literal('critical')), + file: v.string(), + line: v.number(), + message: v.string(), + evidence: v.string(), + }), + ), + ), + moderationSummary: v.optional(v.string()), + moderationEngineVersion: v.optional(v.string()), + moderationEvaluatedAt: v.optional(v.number()), + moderationSourceVersionId: v.optional(v.id('skillVersions')), quality: v.optional( v.object({ score: v.number(), @@ -254,6 +274,25 @@ const skillVersions = defineTable({ checkedAt: v.number(), }), ), + staticScan: v.optional( + v.object({ + status: v.union(v.literal('clean'), v.literal('suspicious'), v.literal('malicious')), + reasonCodes: v.array(v.string()), + findings: v.array( + v.object({ + code: v.string(), + severity: v.union(v.literal('info'), v.literal('warn'), v.literal('critical')), + file: v.string(), + line: v.number(), + message: v.string(), + evidence: v.string(), + }), + ), + summary: v.string(), + engineVersion: v.string(), + checkedAt: v.number(), + }), + ), }) .index('by_skill', ['skillId']) .index('by_skill_version', ['skillId', 'version']) diff --git a/convex/skills.ts b/convex/skills.ts index 61dd87ca4..9b9fb50da 100644 --- a/convex/skills.ts +++ b/convex/skills.ts @@ -31,7 +31,13 @@ import { readGlobalPublicSkillsCount, } from './lib/globalStats' import { buildTrendingLeaderboard } from './lib/leaderboards' +import { buildModerationSnapshot } from './lib/moderationEngine' import { deriveModerationFlags } from './lib/moderation' +import { + legacyFlagsFromVerdict, + summarizeReasonCodes, + verdictFromCodes, +} from './lib/moderationReasonCodes' import { toPublicSkill, toPublicUser } from './lib/public' import { AUTO_HIDE_REPORT_THRESHOLD, @@ -78,6 +84,57 @@ const RATE_LIMIT_DAY_MS = 24 * RATE_LIMIT_HOUR_MS const SLUG_RESERVATION_DAYS = 90 const SLUG_RESERVATION_MS = SLUG_RESERVATION_DAYS * RATE_LIMIT_DAY_MS const LOW_TRUST_ACCOUNT_AGE_MS = 30 * RATE_LIMIT_DAY_MS + +function buildStructuredModerationPatch(params: { + staticScan?: Doc<'skillVersions'>['staticScan'] + vtStatus?: string + llmStatus?: string + sourceVersionId?: Id<'skillVersions'> +}): Pick< + Doc<'skills'>, + | 'moderationVerdict' + | 'moderationReasonCodes' + | 'moderationEvidence' + | 'moderationSummary' + | 'moderationEngineVersion' + | 'moderationEvaluatedAt' + | 'moderationSourceVersionId' +> { + const snapshot = buildModerationSnapshot({ + staticScan: params.staticScan, + vtStatus: params.vtStatus, + llmStatus: params.llmStatus, + sourceVersionId: params.sourceVersionId, + }) + + return { + moderationVerdict: snapshot.verdict, + moderationReasonCodes: snapshot.reasonCodes.length ? snapshot.reasonCodes : undefined, + moderationEvidence: snapshot.evidence.length ? snapshot.evidence : undefined, + moderationSummary: snapshot.summary, + moderationEngineVersion: snapshot.engineVersion, + moderationEvaluatedAt: snapshot.evaluatedAt, + moderationSourceVersionId: params.sourceVersionId, + } +} + +async function patchStructuredModerationFromVersion( + ctx: MutationCtx, + skill: Doc<'skills'>, + version: Pick, '_id' | 'staticScan' | 'vtAnalysis' | 'llmAnalysis'>, +) { + const patch = buildStructuredModerationPatch({ + staticScan: version.staticScan, + vtStatus: version.vtAnalysis?.status, + llmStatus: version.llmAnalysis?.status, + sourceVersionId: version._id, + }) + + await ctx.db.patch(skill._id, { + ...patch, + updatedAt: Date.now(), + }) +} const TRUSTED_PUBLISHER_SKILL_THRESHOLD = 10 const LOW_TRUST_BURST_THRESHOLD_PER_HOUR = 8 const OWNER_ACTIVITY_SCAN_LIMIT = 500 @@ -807,6 +864,11 @@ export const getBySlug = query({ isSuspicious, isHiddenByMod, isRemoved, + verdict: skill.moderationVerdict, + reasonCodes: skill.moderationReasonCodes, + summary: skill.moderationSummary, + engineVersion: skill.moderationEngineVersion, + updatedAt: skill.moderationEvaluatedAt, reason: isOwner ? skill.moderationReason : undefined, } : null @@ -2821,7 +2883,12 @@ export const updateVersionLlmAnalysisInternal = internalMutation({ handler: async (ctx, args) => { const version = await ctx.db.get(args.versionId) if (!version) return + const nextVersion = { ...version, llmAnalysis: args.llmAnalysis } await ctx.db.patch(args.versionId, { llmAnalysis: args.llmAnalysis }) + + const skill = await ctx.db.get(version.skillId) + if (!skill || skill.latestVersionId !== version._id) return + await patchStructuredModerationFromVersion(ctx, skill, nextVersion) }, }) @@ -2889,14 +2956,34 @@ export const approveSkillByHashInternal = internalMutation({ ? (skill.moderationNotes ?? 'Quality gate quarantine is still active. Manual moderation review required.') : undefined + const scanner = args.scanner.trim().toLowerCase() + const snapshot = buildModerationSnapshot({ + staticScan: version.staticScan, + vtStatus: scanner === 'vt' ? args.status : version.vtAnalysis?.status, + llmStatus: scanner === 'llm' ? args.status : version.llmAnalysis?.status, + sourceVersionId: version._id, + }) + const nextReasonCodes = + bypassSuspicious && !isMalicious + ? snapshot.reasonCodes.filter((code) => !code.startsWith('suspicious.')) + : snapshot.reasonCodes + const nextVerdict = verdictFromCodes(nextReasonCodes) + const nextLegacyFlags = legacyFlagsFromVerdict(nextVerdict) const patch: Partial> = { moderationStatus: nextModerationStatus, moderationReason: nextModerationReason, - moderationFlags: newFlags, + moderationFlags: newFlags ?? nextLegacyFlags, + moderationVerdict: nextVerdict, + moderationReasonCodes: nextReasonCodes.length ? nextReasonCodes : undefined, + moderationEvidence: snapshot.evidence.length ? snapshot.evidence : undefined, + moderationSummary: summarizeReasonCodes(nextReasonCodes), + moderationEngineVersion: snapshot.engineVersion, + moderationEvaluatedAt: snapshot.evaluatedAt, + moderationSourceVersionId: version._id, moderationNotes: nextModerationNotes, isSuspicious: computeIsSuspicious({ - moderationFlags: newFlags, + moderationFlags: (newFlags ?? nextLegacyFlags) as string[] | undefined, moderationReason: nextModerationReason, }), hiddenAt: nextModerationStatus === 'hidden' ? now : undefined, @@ -2960,8 +3047,26 @@ export const escalateByVtInternal = internalMutation({ } const nextModerationFlags = newFlags.length ? newFlags : undefined + const snapshot = buildModerationSnapshot({ + staticScan: version.staticScan, + vtStatus: args.status, + llmStatus: version.llmAnalysis?.status, + sourceVersionId: version._id, + }) + const nextReasonCodes = + bypassSuspicious && !isMalicious + ? snapshot.reasonCodes.filter((code) => !code.startsWith('suspicious.')) + : snapshot.reasonCodes + const nextVerdict = verdictFromCodes(nextReasonCodes) const patch: Partial> = { moderationFlags: nextModerationFlags, + moderationVerdict: nextVerdict, + moderationReasonCodes: nextReasonCodes.length ? nextReasonCodes : undefined, + moderationEvidence: snapshot.evidence.length ? snapshot.evidence : undefined, + moderationSummary: summarizeReasonCodes(nextReasonCodes), + moderationEngineVersion: snapshot.engineVersion, + moderationEvaluatedAt: snapshot.evaluatedAt, + moderationSourceVersionId: version._id, updatedAt: Date.now(), } if (bypassSuspicious) { @@ -3788,6 +3893,23 @@ export const insertVersion = internalMutation({ }), }), ), + staticScan: v.object({ + status: v.union(v.literal('clean'), v.literal('suspicious'), v.literal('malicious')), + reasonCodes: v.array(v.string()), + findings: v.array( + v.object({ + code: v.string(), + severity: v.union(v.literal('info'), v.literal('warn'), v.literal('critical')), + file: v.string(), + line: v.number(), + message: v.string(), + evidence: v.string(), + }), + ), + summary: v.string(), + engineVersion: v.string(), + checkedAt: v.number(), + }), embedding: v.array(v.number()), }, handler: async (ctx, args) => { @@ -3845,6 +3967,7 @@ export const insertVersion = internalMutation({ const moderationNotes = isQualityQuarantine ? `Auto-quarantined by quality gate (score=${qualityAssessment.score}, tier=${qualityAssessment.trustTier}, similar=${qualityAssessment.similarRecentCount}).` : undefined + const staticSnapshot = buildModerationSnapshot({ staticScan: args.staticScan }) const qualityRecord = qualityAssessment ? { @@ -3907,12 +4030,14 @@ export const insertVersion = internalMutation({ const summary = args.summary ?? getFrontmatterValue(args.parsed.frontmatter, 'description') const summaryValue = summary ?? undefined - const moderationFlags = deriveModerationFlags({ + const derivedFlags = deriveModerationFlags({ skill: { slug: args.slug, displayName: args.displayName, summary: summaryValue }, parsed: args.parsed, files: args.files, }) - const newSkillFlags = moderationFlags.length ? moderationFlags : undefined + const newSkillFlags = Array.from( + new Set([...(derivedFlags ?? []), ...(staticSnapshot.legacyFlags ?? [])]), + ) const skillId = await ctx.db.insert('skills', { slug: args.slug, displayName: args.displayName, @@ -3932,10 +4057,19 @@ export const insertVersion = internalMutation({ moderationStatus: initialModerationStatus, moderationReason, moderationNotes, + moderationVerdict: staticSnapshot.verdict, + moderationReasonCodes: staticSnapshot.reasonCodes.length + ? staticSnapshot.reasonCodes + : undefined, + moderationEvidence: staticSnapshot.evidence.length ? staticSnapshot.evidence : undefined, + moderationSummary: staticSnapshot.summary, + moderationEngineVersion: staticSnapshot.engineVersion, + moderationEvaluatedAt: staticSnapshot.evaluatedAt, + moderationSourceVersionId: undefined, quality: qualityRecord, - moderationFlags: newSkillFlags, + moderationFlags: newSkillFlags.length ? newSkillFlags : undefined, isSuspicious: computeIsSuspicious({ - moderationFlags: newSkillFlags, + moderationFlags: newSkillFlags.length ? newSkillFlags : undefined, moderationReason: moderationReason, }), reportCount: 0, @@ -3979,6 +4113,7 @@ export const insertVersion = internalMutation({ changelogSource: args.changelogSource, files: args.files, parsed: args.parsed, + staticScan: args.staticScan, createdBy: userId, createdAt: now, softDeletedAt: undefined, @@ -3994,13 +4129,18 @@ export const insertVersion = internalMutation({ const nextSummary = args.summary ?? getFrontmatterValue(args.parsed.frontmatter, 'description') ?? skill.summary - const moderationFlags = deriveModerationFlags({ + const derivedFlags = deriveModerationFlags({ skill: { slug: skill.slug, displayName: args.displayName, summary: nextSummary ?? undefined }, parsed: args.parsed, files: args.files, }) - - const nextFlags = moderationFlags.length ? moderationFlags : undefined + const moderationSnapshot = buildModerationSnapshot({ + staticScan: args.staticScan, + sourceVersionId: versionId, + }) + const nextFlags = Array.from( + new Set([...(derivedFlags ?? []), ...(moderationSnapshot.legacyFlags ?? [])]), + ) const patch: Partial> = { displayName: args.displayName, summary: nextSummary ?? undefined, @@ -4018,10 +4158,21 @@ export const insertVersion = internalMutation({ moderationStatus: initialModerationStatus, moderationReason, moderationNotes, + moderationVerdict: moderationSnapshot.verdict, + moderationReasonCodes: moderationSnapshot.reasonCodes.length + ? moderationSnapshot.reasonCodes + : undefined, + moderationEvidence: moderationSnapshot.evidence.length + ? moderationSnapshot.evidence + : undefined, + moderationSummary: moderationSnapshot.summary, + moderationEngineVersion: moderationSnapshot.engineVersion, + moderationEvaluatedAt: moderationSnapshot.evaluatedAt, + moderationSourceVersionId: versionId, quality: qualityRecord ?? skill.quality, - moderationFlags: nextFlags, + moderationFlags: nextFlags.length ? nextFlags : undefined, isSuspicious: computeIsSuspicious({ - moderationFlags: nextFlags, + moderationFlags: nextFlags.length ? nextFlags : undefined, moderationReason: moderationReason, }), updatedAt: now, diff --git a/docs/security.md b/docs/security.md index 15d625f63..967d301d0 100644 --- a/docs/security.md +++ b/docs/security.md @@ -39,6 +39,17 @@ read_when: - Skills directory supports an optional "Hide suspicious" filter to exclude active-but-flagged (`flagged.suspicious`) entries from browse/search results. +## Skill moderation pipeline + +- New skill publishes now persist a deterministic static scan result on the version. +- Skill moderation state stores a structured snapshot: + - `moderationVerdict`: `clean | suspicious | malicious` + - `moderationReasonCodes[]`: canonical machine-readable reasons + - `moderationEvidence[]`: capped file/line evidence for static findings + - `moderationSummary`, engine version, evaluation timestamp, source version id +- Structured moderation is rebuilt from current signals instead of appending stale scanner codes. +- Legacy moderation flags remain in sync for existing public visibility and suspicious-skill filtering. + ## AI comment scam backfill - Moderators/admins can run a comment backfill scanner to classify scam comments with OpenAI.