diff --git a/CHANGELOG.md b/CHANGELOG.md index c82b78d75..7b05f2a7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ All notable changes to OpenContracts will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] - 2025-12-28 + +### Added + +#### MCP (Model Context Protocol) Interface Proposal (Issue #387) +- **Comprehensive MCP interface design** (`docs/mcp/mcp_interface_proposal.md`): Read-only access to public OpenContracts resources for AI assistants +- **4 resource types**: corpus, document, annotation, thread - with hierarchical URI patterns +- **7 tools for discovery and retrieval**: `list_public_corpuses`, `list_documents`, `get_document_text`, `list_annotations`, `search_corpus`, `list_threads`, `get_thread_messages` +- **Anonymous user permission model**: Operates as AnonymousUser with automatic filtering to `is_public=True` resources +- **Synchronous Django ORM implementation**: Uses `sync_to_async` wrapper pattern for MCP server integration +- **Performance optimizations**: Uses existing `AnnotationQueryOptimizer`, `prefetch_related` for threaded messages, and proper pagination +- **Robust URI parsing**: Regex-based URI parsing with slug validation to prevent injection attacks +- **Helper function implementations**: Complete `format_*` functions for corpus, document, annotation, thread, and message formatting ## [Unreleased] - 2025-12-27 ### Added diff --git a/config/asgi.py b/config/asgi.py index 1185e54ea..557cbc18c 100644 --- a/config/asgi.py +++ b/config/asgi.py @@ -41,6 +41,7 @@ from config.websocket.consumers.unified_agent_conversation import ( # noqa: E402 UnifiedAgentConsumer, ) +from opencontractserver.mcp.server import mcp_asgi_app # noqa: E402 logger = logging.getLogger(__name__) @@ -52,6 +53,28 @@ # This application object is used by any ASGI server configured to use this file. django_application = get_asgi_application() + +def create_http_router(django_app, mcp_app): + """ + Create an HTTP router that dispatches to MCP or Django based on path. + + Routes /mcp and /mcp/* to the MCP ASGI app, everything else to Django. + The MCP server uses Streamable HTTP transport in stateless mode. + """ + + async def router(scope, receive, send): + path = scope.get("path", "") + # Match /mcp exactly or /mcp/* paths + if path == "/mcp" or path.startswith("/mcp/"): + await mcp_app(scope, receive, send) + else: + await django_app(scope, receive, send) + + return router + + +http_application = create_http_router(django_application, mcp_asgi_app) + document_query_pattern = re_path( r"ws/document/(?P[-a-zA-Z0-9_=]+)/query/(?:corpus/(?P[-a-zA-Z0-9_=]+)/)?$", DocumentQueryConsumer.as_asgi(), @@ -119,7 +142,7 @@ # 4. URL routing application = ProtocolTypeRouter( { - "http": django_application, + "http": http_application, # Routes /mcp/* to MCP, rest to Django "websocket": websocket_auth_middleware(URLRouter(websocket_urlpatterns)), } ) diff --git a/config/settings/base.py b/config/settings/base.py index 0a407a1b8..73f95fbcd 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -720,7 +720,7 @@ "http://127.0.0.1:5173", ] -DEFAULT_IMAGE = """""" # noqa +DEFAULT_IMAGE = """""" # noqa # Model paths DOCLING_MODELS_PATH = env.str("DOCLING_MODELS_PATH", default="/models/docling") @@ -983,3 +983,16 @@ ) POSTHOG_HOST = env.str("POSTHOG_HOST", default="https://us.i.posthog.com") MODE = "LOCAL" + +# MCP Server Configuration +# ------------------------------------------------------------------------------ +# See docs/mcp/mcp_interface_proposal.md for details +MCP_SERVER = { + "enabled": env.bool("MCP_SERVER_ENABLED", default=False), + "max_results_per_page": env.int("MCP_MAX_RESULTS_PER_PAGE", default=100), + "rate_limit": { + "requests": env.int("MCP_RATE_LIMIT_REQUESTS", default=100), + "window": env.int("MCP_RATE_LIMIT_WINDOW", default=60), + }, + "cache_ttl": env.int("MCP_CACHE_TTL", default=300), +} diff --git a/docs/mcp/README.md b/docs/mcp/README.md new file mode 100644 index 000000000..6f45adda2 --- /dev/null +++ b/docs/mcp/README.md @@ -0,0 +1,174 @@ +# OpenContracts MCP Server + +## TL;DR + +OpenContracts exposes a read-only [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) server for AI assistants to access **public** corpuses, documents, annotations, and discussion threads. + +- **Endpoint**: `POST /mcp/` (Streamable HTTP, stateless) +- **Scope**: Public resources only (anonymous user visibility) +- **Auth**: None required (public data only) + +### Claude Desktop Quick Start + +Add to `~/.config/Claude/claude_desktop_config.json`: + +```json +{ + "mcpServers": { + "opencontracts": { + "command": "npx", + "args": [ + "mcp-remote", + "https://your-instance.com/mcp/" + ] + } + } +} +``` + +--- + +## Available Tools + +| Tool | Description | +|------|-------------| +| `list_public_corpuses` | List all public corpuses (paginated, searchable) | +| `list_documents` | List documents in a corpus | +| `get_document_text` | Get full extracted text from a document | +| `list_annotations` | List annotations on a document (filter by page/label) | +| `search_corpus` | Semantic vector search within a corpus | +| `list_threads` | List discussion threads in a corpus | +| `get_thread_messages` | Get messages in a thread (flat or hierarchical) | + +## Available Resources + +Resources use URI patterns for direct access: + +| URI Pattern | Description | +|-------------|-------------| +| `corpus://{corpus_slug}` | Corpus metadata and document list | +| `document://{corpus_slug}/{document_slug}` | Document with extracted text | +| `annotation://{corpus_slug}/{document_slug}/{annotation_id}` | Specific annotation | +| `thread://{corpus_slug}/threads/{thread_id}` | Thread with messages | + +--- + +## Transport Options + +### HTTP (Streamable HTTP) + +The primary transport. Stateless mode - each request is independent. + +```bash +# Test with curl +curl -X POST https://your-instance.com/mcp/ \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc": "2.0", "method": "tools/list", "id": 1}' +``` + +### stdio (CLI) + +For local development or direct integration: + +```bash +cd /path/to/OpenContracts +python -m opencontractserver.mcp.server +``` + +--- + +## Example Usage + +### List Public Corpuses + +```json +{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "list_public_corpuses", + "arguments": {"limit": 10} + }, + "id": 1 +} +``` + +### Semantic Search + +```json +{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "search_corpus", + "arguments": { + "corpus_slug": "my-corpus", + "query": "indemnification clause", + "limit": 5 + } + }, + "id": 2 +} +``` + +### Read Resource + +```json +{ + "jsonrpc": "2.0", + "method": "resources/read", + "params": { + "uri": "document://my-corpus/contract-2024" + }, + "id": 3 +} +``` + +--- + +## Architecture + +``` +┌─────────────────┐ POST /mcp/ ┌──────────────────────┐ +│ MCP Client │ ◄────────────────► │ StreamableHTTP │ +│ (Claude, etc) │ JSON-RPC 2.0 │ Session Manager │ +└─────────────────┘ │ (stateless mode) │ + └──────────┬───────────┘ + │ + ┌──────────▼───────────┐ + │ MCP Server │ + │ - Tools (7) │ + │ - Resources (4) │ + └──────────┬───────────┘ + │ + ┌──────────▼───────────┐ + │ Django ORM │ + │ visible_to_user() │ + │ (AnonymousUser) │ + └──────────────────────┘ +``` + +**Key files**: +- `opencontractserver/mcp/server.py` - Server setup, ASGI app, URI parsing +- `opencontractserver/mcp/tools.py` - Tool implementations +- `opencontractserver/mcp/resources.py` - Resource handlers +- `opencontractserver/mcp/formatters.py` - Response formatters +- `config/asgi.py` - HTTP routing (`/mcp/*` → MCP app) + +--- + +## Security Model + +- **Read-only**: No mutations, no writes +- **Public only**: Uses `AnonymousUser` for all permission checks +- **Slug-based**: All identifiers are URL-safe slugs (no internal IDs exposed) +- **No auth required**: Only public resources are accessible + +--- + +## Limitations + +- No authentication (future: JWT/API key support for private resources) +- No write operations (by design) +- No streaming of large documents (text returned in full) +- Semantic search requires corpus to have embeddings configured diff --git a/frontend/src/components/corpuses/CorpusSettings.tsx b/frontend/src/components/corpuses/CorpusSettings.tsx index 605ee05c2..15d28c5a7 100644 --- a/frontend/src/components/corpuses/CorpusSettings.tsx +++ b/frontend/src/components/corpuses/CorpusSettings.tsx @@ -12,7 +12,7 @@ import { useQuery, useReactiveVar, useMutation } from "@apollo/client"; import { toast } from "react-toastify"; import { useNavigate } from "react-router-dom"; import styled from "styled-components"; -import { editingCorpus } from "../../graphql/cache"; +import { editingCorpus, backendUserObj } from "../../graphql/cache"; import { GET_CORPUS_ACTIONS, GetCorpusActionsInput, @@ -52,6 +52,7 @@ interface CorpusSettingsProps { preferredEmbedder?: string | null; slug?: string | null; creator?: { + id?: string; email: string; username?: string; slug?: string; @@ -627,6 +628,7 @@ const SettingsContainer = styled.div` export const CorpusSettings: React.FC = ({ corpus }) => { const navigate = useNavigate(); + const currentUser = useReactiveVar(backendUserObj); // Check if myPermissions is already processed (array of PermissionTypes) or raw const permissions = @@ -639,6 +641,31 @@ export const CorpusSettings: React.FC = ({ corpus }) => { const canUpdate = permissions.includes(PermissionTypes.CAN_UPDATE); const canPermission = permissions.includes(PermissionTypes.CAN_PERMISSION); + + // Owner can always change visibility (matches backend SetCorpusVisibility permission check) + // Compare by ID first, fallback to email comparison for reliability + const isOwnerByIdentity = Boolean( + currentUser && + corpus.creator && + ((currentUser.id && + corpus.creator.id && + currentUser.id === corpus.creator.id) || + (currentUser.email && + corpus.creator.email && + currentUser.email === corpus.creator.email)) + ); + + // Fallback: If user has all core owner permissions, they're effectively the owner + // This handles cases where currentUser isn't loaded yet but permissions are + const hasFullOwnerPermissions = + permissions.includes(PermissionTypes.CAN_CREATE) && + permissions.includes(PermissionTypes.CAN_UPDATE) && + permissions.includes(PermissionTypes.CAN_READ) && + permissions.includes(PermissionTypes.CAN_PUBLISH) && + permissions.includes(PermissionTypes.CAN_REMOVE); + + const isOwner = isOwnerByIdentity || hasFullOwnerPermissions; + const canChangeVisibility = isOwner || canPermission; const [slugDraft, setSlugDraft] = useState(""); const [publicDraft, setPublicDraft] = useState( Boolean(corpus.isPublic) @@ -893,7 +920,7 @@ export const CorpusSettings: React.FC = ({ corpus }) => { Visibility & Slug - {!canUpdate && !canPermission && ( + {!canUpdate && !canChangeVisibility && (
= ({ corpus }) => { fontSize: "0.875rem", textTransform: "uppercase", letterSpacing: "0.08em", - color: !canPermission ? "#cbd5e1" : "#64748b", + color: !canChangeVisibility ? "#cbd5e1" : "#64748b", marginBottom: "0.75rem", fontWeight: 600, display: "flex", @@ -940,7 +967,7 @@ export const CorpusSettings: React.FC = ({ corpus }) => { }} > Public visibility - {!canPermission && ( + {!canChangeVisibility && ( = ({ corpus }) => { alignItems: "center", gap: "0.875rem", padding: "0.875rem 1rem", - background: !canPermission + background: !canChangeVisibility ? "linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%)" : "linear-gradient(135deg, #ffffff 0%, #fafbfc 100%)", border: "2px solid", - borderColor: !canPermission ? "#e2e8f0" : "#cbd5e1", + borderColor: !canChangeVisibility ? "#e2e8f0" : "#cbd5e1", borderRadius: "10px", transition: "all 0.3s ease", }} @@ -976,7 +1003,7 @@ export const CorpusSettings: React.FC = ({ corpus }) => { display: "flex", alignItems: "center", gap: "0.75rem", - cursor: !canPermission ? "not-allowed" : "pointer", + cursor: !canChangeVisibility ? "not-allowed" : "pointer", width: "100%", }} > @@ -984,13 +1011,15 @@ export const CorpusSettings: React.FC = ({ corpus }) => { id="corpus-is-public-checkbox" type="checkbox" checked={publicDraft} - disabled={!canPermission} + disabled={!canChangeVisibility} onChange={(e) => setPublicDraft(e.target.checked)} style={{ width: "20px", height: "20px", - cursor: !canPermission ? "not-allowed" : "pointer", - opacity: !canPermission ? 0.5 : 1, + cursor: !canChangeVisibility + ? "not-allowed" + : "pointer", + opacity: !canChangeVisibility ? 0.5 : 1, accentColor: "#6366f1", }} /> @@ -998,10 +1027,10 @@ export const CorpusSettings: React.FC = ({ corpus }) => { style={{ fontSize: "0.9375rem", fontWeight: 600, - color: !canPermission ? "#94a3b8" : "#1e293b", + color: !canChangeVisibility ? "#94a3b8" : "#1e293b", }} > - {publicDraft ? "Public" : "Private"} + Make corpus publicly accessible
@@ -1072,7 +1101,7 @@ export const CorpusSettings: React.FC = ({ corpus }) => {