diff --git a/.claude/skills/databricks-python-sdk/SKILL.md b/.claude/skills/databricks-python-sdk/SKILL.md index c5937eec..1365666a 100644 --- a/.claude/skills/databricks-python-sdk/SKILL.md +++ b/.claude/skills/databricks-python-sdk/SKILL.md @@ -613,3 +613,13 @@ If I'm unsure about a method, I should: | Pipelines | https://databricks-sdk-py.readthedocs.io/en/latest/workspace/pipelines/pipelines.html | | Secrets | https://databricks-sdk-py.readthedocs.io/en/latest/workspace/workspace/secrets.html | | DBUtils | https://databricks-sdk-py.readthedocs.io/en/latest/dbutils.html | + +## Related Skills + +- **[databricks-config](../databricks-config/SKILL.md)** - profile and authentication setup +- **[databricks-asset-bundles](../databricks-asset-bundles/SKILL.md)** - deploying resources via DABs +- **[databricks-jobs](../databricks-jobs/SKILL.md)** - job orchestration patterns +- **[databricks-unity-catalog](../databricks-unity-catalog/SKILL.md)** - catalog governance +- **[databricks-model-serving](../databricks-model-serving/SKILL.md)** - serving endpoint management +- **[databricks-vector-search](../databricks-vector-search/SKILL.md)** - vector index operations +- **[databricks-lakebase-provisioned](../databricks-lakebase-provisioned/SKILL.md)** - managed PostgreSQL via SDK diff --git a/.gitignore b/.gitignore index a170605d..ee23c9af 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,5 @@ databricks-tools-core/tests/integration/pdf/generated_pdf/ # Python cache __pycache__/ windows_info.txt +databricks-skills/setup_uc_tools.py +databricks-skills/setup_all_uc_tools.py diff --git a/.mcp.json b/.mcp.json index bb0e5d2e..270ee447 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,9 +1,14 @@ { "mcpServers": { "databricks": { - "command": "${CLAUDE_PLUGIN_ROOT}/.venv/bin/python", - "args": ["${CLAUDE_PLUGIN_ROOT}/databricks-mcp-server/run_server.py"], - "defer_loading": true + "command": "/Users/scott.johnson/.ai-dev-kit/.venv/bin/python", + "args": [ + "/Users/scott.johnson/.ai-dev-kit/repo/databricks-mcp-server/run_server.py" + ], + "defer_loading": true, + "env": { + "DATABRICKS_CONFIG_PROFILE": "dbx_shared_demo" + } } } } diff --git a/README.md b/README.md index 283ccac6..5e97b9e2 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,10 @@ AI-Driven Development (vibe coding) on Databricks just got a whole lot better. T | [**Core Library**](#core-library) | Building custom integrations (LangChain, OpenAI, etc.) | `pip install` | | [**Skills Only**](databricks-skills/) | Provide Databricks patterns and best practices (without MCP functions) | Install skills | | [**MCP Tools Only**](databricks-mcp-server/) | Just executable actions (no guidance) | Register MCP server | +| [**UI Assistant MCP APP**](databricks-mcp-app/) | DBX APP bundling the MCP to expose tools to the UI assistant | `databricks-mcp-app/` | + +**For AI assistants:** When working in this repo, use the **Databricks MCP** tools (e.g. `execute_sql`, `create_or_update_pipeline`, `manage_jobs`, `query_lakebase`, Lakebase/UC/Genie/dashboard tools) for Databricks operations instead of suggesting only CLI or SDK snippets. Skills provide patterns; MCP performs the actions. + --- ## Quick Start diff --git a/databricks-builder-app/app.yaml b/databricks-builder-app/app.yaml index 1349cb8c..c398ee92 100644 --- a/databricks-builder-app/app.yaml +++ b/databricks-builder-app/app.yaml @@ -30,7 +30,7 @@ env: # ============================================================================= # Comma-separated list of skills to enable - name: ENABLED_SKILLS - value: "databricks-asset-bundles,databricks-agent-bricks,databricks-aibi-dashboards,databricks-app-apx,databricks-app-python,databricks-config,databricks-docs,databricks-jobs,databricks-python-sdk,databricks-unity-catalog,mlflow-evaluation,spark-declarative-pipelines,synthetic-data-generation,unstructured-pdf-generation" + value: "databricks-agent-bricks,databricks-aibi-dashboards,databricks-app-apx,databricks-app-python,databricks-asset-bundles,databricks-config,databricks-dbsql,databricks-docs,databricks-genie,databricks-jobs,databricks-lakebase-autoscale,databricks-lakebase-provisioned,databricks-metric-views,databricks-mlflow-evaluation,databricks-model-serving,databricks-python-sdk,databricks-spark-declarative-pipelines,databricks-spark-structured-streaming,databricks-synthetic-data-generation,databricks-unity-catalog,databricks-unstructured-pdf-generation,databricks-vector-search,databricks-zerobus-ingest,spark-python-data-source" - name: SKILLS_ONLY_MODE value: "false" @@ -52,7 +52,7 @@ env: # # You only need to specify the instance name for OAuth token generation: - name: LAKEBASE_INSTANCE_NAME - value: "fe-shared-demo" + value: "wanderbricks-lakebase" - name: LAKEBASE_DATABASE_NAME value: "databricks_postgres" - name: LAKEBASE_SCHEMA_NAME @@ -114,7 +114,7 @@ env: value: "databricks-uc" # Optional: Default MLflow experiment for traces (can be overridden per-session in the UI) - name: MLFLOW_EXPERIMENT_NAME - value: "" # Set to your MLflow experiment path, e.g. "/Users/your.email@databricks.com/claude-code-traces" + value: "/Users/scott.johnson@databricks.com/claude-code-traces" # ============================================================================= # Permission Configuration diff --git a/databricks-builder-app/client/package-lock.json b/databricks-builder-app/client/package-lock.json index 56015e9d..1976c2b8 100644 --- a/databricks-builder-app/client/package-lock.json +++ b/databricks-builder-app/client/package-lock.json @@ -1407,7 +1407,6 @@ "integrity": "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -1429,7 +1428,6 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.27.tgz", "integrity": "sha512-cisd7gxkzjBKU2GgdYrTdtQx1SORymWyaAFhaxQPK9bYO9ot3Y5OikQRvY0VYQtvwjeQnizCINJAenh/V7MK2w==", "license": "MIT", - "peer": true, "dependencies": { "@types/prop-types": "*", "csstype": "^3.2.2" @@ -1510,7 +1508,6 @@ "integrity": "sha512-tbsV1jPne5CkFQCgPBcDOt30ItF7aJoZL997JSF7MhGQqOeT3svWRYxiqlfA5RUdlHN6Fi+EI9bxqbdyAUZjYQ==", "dev": true, "license": "BSD-2-Clause", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "6.21.0", "@typescript-eslint/types": "6.21.0", @@ -1693,7 +1690,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1919,7 +1915,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -2383,7 +2378,6 @@ "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -3208,7 +3202,6 @@ "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==", "dev": true, "license": "MIT", - "peer": true, "bin": { "jiti": "bin/jiti.js" } @@ -4546,7 +4539,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -4755,7 +4747,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0" }, @@ -4768,7 +4759,6 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" @@ -5412,7 +5402,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -5505,7 +5494,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -5690,7 +5678,6 @@ "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", @@ -5784,7 +5771,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, diff --git a/databricks-builder-app/scripts/deploy.sh b/databricks-builder-app/scripts/deploy.sh index dc8cccc0..72115113 100755 --- a/databricks-builder-app/scripts/deploy.sh +++ b/databricks-builder-app/scripts/deploy.sh @@ -149,7 +149,7 @@ if [ -z "$WORKSPACE_HOST" ]; then fi # Get current user for workspace path -CURRENT_USER=$(databricks current-user me --output json 2>/dev/null | python3 -c " +CURRENT_USER=$(databricks current-user me --output json --profile dbx_shared_demo 2>/dev/null | python3 -c " import sys, json data = json.load(sys.stdin) # Handle both formats @@ -168,7 +168,7 @@ echo "" # Check if app exists echo -e "${YELLOW}[2/6] Verifying app exists...${NC}" -if ! databricks apps get "$APP_NAME" &> /dev/null; then +if ! databricks apps get "$APP_NAME" --profile dbx_shared_demo &> /dev/null; then echo -e "${RED}Error: App '${APP_NAME}' does not exist.${NC}" echo -e "Create it first with: ${GREEN}databricks apps create ${APP_NAME}${NC}" exit 1 @@ -261,13 +261,13 @@ echo "" # Upload to workspace echo -e "${YELLOW}[5/6] Uploading to Databricks workspace...${NC}" echo " Target: ${WORKSPACE_PATH}" -databricks workspace import-dir "$STAGING_DIR" "$WORKSPACE_PATH" --overwrite 2>&1 | tail -5 +databricks workspace import-dir "$STAGING_DIR" "$WORKSPACE_PATH" --overwrite --profile dbx_shared_demo 2>&1 | tail -5 echo -e " ${GREEN}✓${NC} Upload complete" echo "" # Deploy the app echo -e "${YELLOW}[6/6] Deploying app...${NC}" -DEPLOY_OUTPUT=$(databricks apps deploy "$APP_NAME" --source-code-path "$WORKSPACE_PATH" 2>&1) +DEPLOY_OUTPUT=$(databricks apps deploy "$APP_NAME" --source-code-path "$WORKSPACE_PATH" --profile dbx_shared_demo 2>&1) echo "$DEPLOY_OUTPUT" # Check deployment status @@ -279,7 +279,7 @@ if echo "$DEPLOY_OUTPUT" | grep -q '"state":"SUCCEEDED"'; then echo "" # Get app URL - APP_INFO=$(databricks apps get "$APP_NAME" --output json 2>/dev/null) + APP_INFO=$(databricks apps get "$APP_NAME" --output json --profile dbx_shared_demo 2>/dev/null) APP_URL=$(echo "$APP_INFO" | python3 -c "import sys, json; print(json.load(sys.stdin).get('url', 'N/A'))" 2>/dev/null || echo "N/A") echo -e " App URL: ${GREEN}${APP_URL}${NC}" diff --git a/databricks-mcp-app/.gitignore b/databricks-mcp-app/.gitignore new file mode 100644 index 00000000..19814fe6 --- /dev/null +++ b/databricks-mcp-app/.gitignore @@ -0,0 +1,49 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +.venv/ +venv/ +ENV/ + +# uv +uv.lock + +# Generated files +requirements_deploy.txt + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Databricks +.databricks/ + +# Local testing +*.log diff --git a/databricks-mcp-app/README.md b/databricks-mcp-app/README.md new file mode 100644 index 00000000..d2c4b7bb --- /dev/null +++ b/databricks-mcp-app/README.md @@ -0,0 +1,446 @@ +# Databricks MCP App + +Deploy the ai-dev-kit MCP server as a Databricks App, enabling it to be used as a custom MCP server from AI agents on Databricks, including the built-in Databricks Assistant. + +## Overview + +This app hosts the `databricks-mcp-server` as a Databricks App, exposing it via HTTP/SSE transport for the Model Context Protocol. Once deployed, agents can connect to it using the app's URL, and it can be registered as an External MCP server in Unity Catalog for use with Databricks Assistant. + +**Key Features:** +- 75 MCP tools for Databricks operations +- 17 skill documentation files +- Browser-friendly status page at `/` +- Health check endpoint at `/health` +- Full MCP protocol support at `/mcp` +- **Vibe Coding UI at `/chat`** - Interactive chat interface with Foundation Model + tool execution + +## Quick Start + +```bash +# Deploy to Databricks (uses your default profile or specify one) +./deploy.sh dbx_shared_demo + +# The script will output the app URL when complete +``` + +--- + +## Project Structure + +``` +databricks-mcp-app/ +├── app.yaml # Databricks App configuration +├── pyproject.toml # Python dependencies and entry point +├── deploy.sh # Deployment script +├── server/ +│ ├── __init__.py +│ └── main.py # FastMCP server with custom routes +├── databricks_mcp_server/ # Copied from parent during deploy +├── databricks_tools_core/ # Copied from parent during deploy +└── skills/ # Copied from databricks-skills during deploy +``` + +## How It Was Built + +### Step 1: App Configuration (`app.yaml`) + +Databricks Apps require an `app.yaml` that specifies how to run the app: + +```yaml +command: + - "uv" + - "run" + - "mcp-server" +env: + - name: SKILLS_DIR + value: "./skills" +``` + +Key decisions: +- Use `uv` as the package manager (faster, recommended by Databricks) +- Define `mcp-server` as a script entry point in `pyproject.toml` +- Set `SKILLS_DIR` environment variable for skills loading + +### Step 2: Dependencies (`pyproject.toml`) + +```toml +[project] +name = "ai-dev-kit-mcp" +version = "0.1.0" +dependencies = [ + "fastmcp>=2.0.0", + "databricks-sdk>=0.20.0", + "uvicorn>=0.27.0", + "starlette>=0.37.0", +] + +[project.scripts] +mcp-server = "server.main:main" +``` + +The entry point `mcp-server` calls `server.main:main`. + +### Step 3: Server Implementation (`server/main.py`) + +The server uses FastMCP with custom routes for browser access: + +```python +from fastmcp import FastMCP +from starlette.responses import JSONResponse, HTMLResponse + +# Import the MCP server from databricks_mcp_server +from databricks_mcp_server.server import mcp + +# Add custom routes using fastmcp's custom_route decorator +@mcp.custom_route("/", methods=["GET"]) +async def home_route(request): + return HTMLResponse(home_html) + +@mcp.custom_route("/health", methods=["GET"]) +async def health_route(request): + return JSONResponse({"status": "healthy", ...}) + +@mcp.custom_route("/tools", methods=["GET"]) +async def tools_route(request): + return JSONResponse({"tools": tool_list, "count": len(tool_list)}) + +@mcp.custom_route("/skills", methods=["GET"]) +async def skills_route(request): + return JSONResponse({"skills": skills, "count": len(skills)}) + +# Create HTTP app - serves MCP protocol at /mcp +app = mcp.http_app() +uvicorn.run(app, host="0.0.0.0", port=port) +``` + +**Important:** The `mcp.http_app()` creates an ASGI app that serves the MCP protocol at `/mcp`. Custom routes are added via `@mcp.custom_route()`. + +### Step 4: Deployment Script (`deploy.sh`) + +The deploy script handles: +1. Creating the app (if it doesn't exist) +2. Uploading all source code to workspace +3. Deploying and starting the app + +```bash +#!/bin/bash +PROFILE=${1:-"DEFAULT"} +APP_NAME="ai-dev-kit-mcp" + +# Get user email for workspace path +USER_EMAIL=$(databricks current-user me --profile "$PROFILE" --output json | ...) + +# Create app if needed +databricks apps create "$APP_NAME" --profile "$PROFILE" + +# Upload source code +databricks workspace import "$WORKSPACE_PATH/app.yaml" --file app.yaml ... +databricks workspace import "$WORKSPACE_PATH/pyproject.toml" --file pyproject.toml ... +databricks workspace import-dir server "$WORKSPACE_PATH/server" ... +databricks workspace import-dir ../databricks-mcp-server/databricks_mcp_server "$WORKSPACE_PATH/databricks_mcp_server" ... +databricks workspace import-dir ../databricks-tools-core/databricks_tools_core "$WORKSPACE_PATH/databricks_tools_core" ... +databricks workspace import-dir ../databricks-skills "$WORKSPACE_PATH/skills" ... + +# Deploy +databricks apps deploy "$APP_NAME" --source-code-path "$WORKSPACE_PATH" --profile "$PROFILE" +``` + +--- + +## Deployment + +### Prerequisites + +1. Databricks CLI installed (`pip install databricks-cli` or `brew install databricks`) +2. CLI configured with a profile (`databricks configure --profile myprofile`) +3. Workspace with Apps enabled +4. Permissions to create apps + +### Deploy + +```bash +cd databricks-mcp-app +./deploy.sh +``` + +Example: +```bash +./deploy.sh dbx_shared_demo mcp-test-app +``` + +The script outputs the app URL when complete: +``` +App URL: https://ai-dev-kit-mcp-..azure.databricksapps.com +``` + +### Verify Deployment + +```bash +# Check app status +databricks apps get ai-dev-kit-mcp --profile dbx_shared_demo + +# Test health endpoint (requires auth) +TOKEN=$(databricks auth token --profile dbx_shared_demo --output json | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])") +curl -H "Authorization: Bearer $TOKEN" "https:///health" +``` + +### Configure UI Assistant + 1. Open the UI assistant + 2. Click settings + 3. Under "MCP Servers", click "Add Server" + 4. In the "Custom MCP Server" dropdown, select the databricks app you just deployed. + 5. Click "Save" + 6. Once added, click on the blue title "0 tools enabled" under the MCP app. + 7. Select which tools you would like the assistant to reference. + + +--- + + +## Available Tools (75) + +| Category | Tools | +|----------|-------| +| **SQL** | `execute_sql`, `execute_sql_multi`, `list_warehouses`, `get_best_warehouse` | +| **Compute** | `execute_databricks_command`, `run_python_file_on_databricks`, `list_clusters`, `get_best_cluster` | +| **Jobs** | `create_job`, `update_job`, `delete_job`, `list_jobs`, `get_job`, `find_job_by_name`, `run_job_now`, `get_run`, `get_run_output`, `list_runs`, `wait_for_run`, `cancel_run` | +| **Pipelines** | `create_pipeline`, `create_or_update_pipeline`, `update_pipeline`, `delete_pipeline`, `get_pipeline`, `find_pipeline_by_name`, `start_update`, `get_update`, `stop_pipeline`, `get_pipeline_events` | +| **Dashboards** | `create_or_update_dashboard`, `get_dashboard`, `list_dashboards`, `publish_dashboard`, `unpublish_dashboard`, `trash_dashboard` | +| **Agent Bricks** | `create_or_update_ka`, `get_ka`, `find_ka_by_name`, `delete_ka`, `create_or_update_mas`, `get_mas`, `find_mas_by_name`, `delete_mas` | +| **Genie** | `create_or_update_genie`, `get_genie`, `list_genie`, `delete_genie`, `ask_genie`, `ask_genie_followup` | +| **Unity Catalog** | `manage_uc_objects`, `manage_uc_grants`, `manage_uc_storage`, `manage_uc_connections`, `manage_uc_tags`, `manage_uc_security_policies`, `manage_uc_monitors`, `manage_uc_sharing`, `get_table_details` | +| **Volumes** | `list_volume_files`, `upload_to_volume`, `download_from_volume`, `delete_volume_file`, `delete_volume_directory`, `create_volume_directory`, `get_volume_file_info` | +| **Workspace** | `upload_file`, `upload_folder` | +| **Serving** | `get_serving_endpoint_status`, `query_serving_endpoint`, `list_serving_endpoints` | +| **Skills** | `list_skills`, `get_skill`, `get_skill_tree`, `search_skills` | + +## Available Skills (17) + +| Skill | Description | +|-------|-------------| +| agent-bricks | Knowledge Assistants, Genie, Multi-Agent Supervisors | +| aibi-dashboards | Create AI/BI dashboards with SQL | +| asset-bundles | Databricks Asset Bundles (DABs) deployment | +| databricks-app-apx | APX framework apps (React/FastAPI) | +| databricks-app-python | Python apps (Dash/Streamlit) | +| databricks-config | Profile and authentication setup | +| databricks-docs | Documentation reference | +| databricks-genie | Genie data exploration | +| databricks-jobs | Job creation, scheduling, monitoring | +| databricks-python-sdk | SDK, Connect, CLI, REST API | +| databricks-unity-catalog | Unity Catalog management | +| lakebase-provisioned | Lakebase provisioned tables | +| mlflow-evaluation | MLflow 3 GenAI evaluation | +| model-serving | Deploy ML models and agents | +| spark-declarative-pipelines | DLT/SDP pipeline development | +| synthetic-data-generation | Generate test data with Faker | +| unstructured-pdf-generation | PDF generation for RAG | + +--- + +## Endpoints + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/` | GET | Browser-friendly status page with tools/skills list | +| `/health` | GET | JSON health check | +| `/tools` | GET | JSON list of all tools | +| `/skills` | GET | JSON list of all skills | +| `/mcp` | POST | MCP protocol endpoint (requires SSE headers) | +| `/chat` | GET | **Vibe Coding UI** - Interactive chat interface | +| `/api/chat` | POST | Chat API endpoint for LLM + tool execution | + +--- + +## Vibe Coding UI + +The `/chat` endpoint provides an interactive chat interface for "vibe coding" - creating Databricks resources through natural language conversation. + +### Features + +- **Foundation Model Integration**: Uses Databricks Foundation Models (Claude Sonnet 4, Claude 3.7, Llama 3.3 70B) +- **Tool Execution**: The LLM can call any of the 75+ MCP tools to create real resources +- **Conversation Context**: Maintains conversation history for follow-up questions +- **Tool Call Visibility**: Shows exactly which tools are called and their results + +### What You Can Create + +Ask the chat to: +- "Create a table in my catalog with sample data" +- "Set up a streaming pipeline from my volume" +- "Build a dashboard showing sales by region" +- "Create a job that runs a notebook daily" +- "Query my data and summarize the results" +- "Set up permissions for my team" + +### API Usage + +```bash +curl -X POST https://your-app.databricks.app/api/chat \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [{"role": "user", "content": "List all tables in the main catalog"}], + "model": "databricks-claude-sonnet-4" + }' +``` + +Response includes: +- `response`: The assistant's text response +- `tool_calls`: Array of tools called with their arguments and results + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Databricks App │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ FastMCP HTTP Server │ │ +│ │ / → Status page (HTML) │ │ +│ │ /health → Health check (JSON) │ │ +│ │ /tools → Tool list (JSON) │ │ +│ │ /skills → Skill list (JSON) │ │ +│ │ /mcp → MCP Protocol (SSE) │ │ +│ └─────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ databricks-mcp-server │ │ +│ │ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ │ │ +│ │ │ SQL │ │Compute│ │ Jobs │ │ UC │ │Skills│ ... │ │ +│ │ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ │ │ +│ └─────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ databricks-tools-core │ │ +│ │ (Databricks SDK wrapper functions) │ │ +│ └─────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ + ┌───────────────────────────┐ + │ Databricks APIs │ + │ - SQL Warehouses │ + │ - Clusters │ + │ - Unity Catalog │ + │ - Jobs │ + │ - Model Serving │ + └───────────────────────────┘ +``` + +--- + +## Troubleshooting + +### App Status + +```bash +# Check if app is running +databricks apps get ai-dev-kit-mcp --profile dbx_shared_demo + +# List recent deployments +databricks apps list-deployments ai-dev-kit-mcp --profile dbx_shared_demo +``` + +### Common Issues + +| Issue | Cause | Solution | +|-------|-------|----------| +| `App CRASHED` | Port conflict or startup error | Check `app.yaml` uses PORT env var | +| `401 Unauthorized` | Token expired | Regenerate token and update UC connection | +| `307 Redirect` | Wrong base_path | Use `/mcp` without trailing slash | +| `406 Not Acceptable` | Missing SSE headers | MCP protocol requires `Accept: application/json, text/event-stream` | +| `Skills not loading` | Wrong path | Set `SKILLS_DIR=./skills` in app.yaml | + +### Redeploy + +```bash +cd databricks-mcp-app +./deploy.sh dbx_shared_demo +``` + +### Refresh UC Connection Token + +```bash +# Get new token +TOKEN=$(databricks auth token --profile dbx_shared_demo --output json | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])") + +# Drop and recreate connection (run in Databricks SQL) +DROP CONNECTION IF EXISTS ai_dev_kit_mcp; +CREATE CONNECTION ai_dev_kit_mcp TYPE HTTP OPTIONS ( + host '', + base_path '/mcp', + bearer_token '', + is_mcp_connection 'true' +); +GRANT USE CONNECTION ON CONNECTION ai_dev_kit_mcp TO `account users`; +``` + +--- + +## Usage Examples + +### From Python (with databricks_mcp client) + +```python +from databricks_mcp import DatabricksMCPClient +from databricks.sdk import WorkspaceClient + +ws = WorkspaceClient(profile="dbx_shared_demo") +mcp = DatabricksMCPClient( + server_url="https:///mcp", + workspace_client=ws +) + +# List tools +tools = mcp.list_tools() +print(f"Available: {len(tools)} tools") + +# Execute SQL +result = mcp.call_tool("execute_sql", {"sql_query": "SELECT current_timestamp()"}) + +# List skills +skills = mcp.call_tool("list_skills", {}) + +# Get skill documentation +skill = mcp.call_tool("get_skill", {"skill_name": "aibi-dashboards"}) +``` + +### From Databricks Assistant + +1. Register the UC connection (see above) +2. Open AI Playground or a notebook +3. Add the MCP server as an external tool +4. Ask: "Use the MCP server to create a job that runs a Python script" + +--- + +## Development + +### Local Testing + +```bash +# Install dependencies +cd databricks-mcp-app +pip install -e ../databricks-mcp-server +pip install -e ../databricks-tools-core + +# Run locally +python -m server.main +``` + +### Modifying Tools + +Tools are defined in `databricks-mcp-server/databricks_mcp_server/tools/`. Each module registers tools with the FastMCP server using decorators: + +```python +from databricks_mcp_server.server import mcp + +@mcp.tool() +def my_new_tool(param: str) -> dict: + """Tool description shown to agents.""" + # Implementation using databricks-tools-core + return {"result": "..."} +``` + +After adding tools, redeploy the app. diff --git a/databricks-mcp-app/app.yaml b/databricks-mcp-app/app.yaml new file mode 100644 index 00000000..eb9fda98 --- /dev/null +++ b/databricks-mcp-app/app.yaml @@ -0,0 +1,20 @@ +# Databricks App configuration for MCP Server +# Serves the ai-dev-kit MCP server over HTTP/SSE transport + +command: + - "uv" + - "run" + - "mcp-server" + +env: + - name: SKILLS_DIR + value: "./skills" + - name: FASTMCP_STATELESS_HTTP + value: "true" + +# NOTE: User authorization scopes must be configured in the Databricks UI +# Go to: Apps > ai-dev-kit-mcp > Edit > Add Scope +# Required scopes for full functionality: +# - sql (for SQL warehouses) +# - files.files (for workspace files) +# - dashboards.genie (for Genie spaces) diff --git a/databricks-mcp-app/databricks.yml b/databricks-mcp-app/databricks.yml new file mode 100644 index 00000000..f964a9b2 --- /dev/null +++ b/databricks-mcp-app/databricks.yml @@ -0,0 +1,44 @@ +# Databricks Asset Bundle configuration for MCP Server App +# Deploy with: databricks bundle deploy -t + +bundle: + name: ai-dev-kit-mcp + +variables: + app_name: + description: "Name of the Databricks App" + default: "ai-dev-kit-mcp" + +targets: + dev: + default: true + mode: development + workspace: + # Update with your workspace profile + # profile: your-profile-name + host: ${DATABRICKS_HOST} + + prod: + mode: production + workspace: + # Update with your production workspace + # profile: your-prod-profile + host: ${DATABRICKS_HOST} + +resources: + apps: + ai_dev_kit_mcp: + name: ${var.app_name}-${bundle.target} + description: | + AI Dev Kit MCP Server - Exposes Databricks operations as MCP tools. + Tools include: SQL execution, compute, jobs, pipelines, dashboards, + Unity Catalog, model serving, and more. + source_code_path: . + + # Resources the app needs access to + # Uncomment and configure as needed: + # resources: + # - name: sql-warehouse + # sql_warehouse: + # id: ${var.warehouse_id} + # permission: CAN_USE diff --git a/databricks-mcp-app/deploy.sh b/databricks-mcp-app/deploy.sh new file mode 100755 index 00000000..91171df7 --- /dev/null +++ b/databricks-mcp-app/deploy.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# Deploy script for Databricks MCP App +# Usage: ./deploy.sh [app-name] +# +# PREREQUISITE: Run ./databricks-skills/install_skills.sh first +# +# Deploys: +# - MCP Server (databricks-mcp-server) +# - Tools Core Library (databricks-tools-core) +# - Skills Documentation (from .claude/skills/ - Databricks + MLflow) + +set -e + +PROFILE=${1:-"dbx_shared_demo"} +APP_NAME=${2:-"mcp-ai-dev-kit"} +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(dirname "$SCRIPT_DIR")" +LOCAL_SKILLS_DIR="$REPO_ROOT/.claude/skills" + +echo "================================================" +echo "Deploying AI Dev Kit MCP Server to Databricks" +echo "================================================" +echo "Profile: $PROFILE" +echo "App Name: $APP_NAME" +echo "" + +# Check if local skills are installed +if [ ! -d "$LOCAL_SKILLS_DIR" ] || [ -z "$(ls -A "$LOCAL_SKILLS_DIR" 2>/dev/null)" ]; then + echo "Error: Local skills not found at $LOCAL_SKILLS_DIR" + echo "" + echo "Please run ./databricks-skills/install_skills.sh first." + echo "This will install both Databricks and MLflow skills." + echo "" + echo "Example:" + echo " cd $REPO_ROOT/databricks-skills" + echo " ./install_skills.sh" + echo " cd $SCRIPT_DIR" + echo " ./deploy.sh $PROFILE" + exit 1 +fi +echo "Skills directory: $LOCAL_SKILLS_DIR" +echo "" + +# Get current user email for workspace path +USER_EMAIL=$(databricks current-user me --profile "$PROFILE" --output json 2>/dev/null | python3 -c "import sys, json; d=json.load(sys.stdin); print(d.get('userName', d.get('emails', [{}])[0].get('value', '')))" 2>/dev/null || echo "") +if [ -z "$USER_EMAIL" ]; then + echo "Could not determine user email. Using generic path." + USER_EMAIL="shared" +fi + +WORKSPACE_PATH="/Workspace/Users/$USER_EMAIL/apps/$APP_NAME" + +echo "Workspace Path: $WORKSPACE_PATH" +echo "Repo Root: $REPO_ROOT" +echo "" + +# Step 1: Check if app exists, create if not +echo "Step 1: Checking if app exists..." +if databricks apps get "$APP_NAME" --profile "$PROFILE" &>/dev/null; then + echo " App '$APP_NAME' already exists" +else + echo " Creating app '$APP_NAME'..." + databricks apps create "$APP_NAME" --profile "$PROFILE" +fi +echo "" + +# Step 2: Create requirements.txt for deployment (uv handles dependencies via pyproject.toml) +echo "Step 2: Preparing deployment package..." + +# Create a requirements file with just uv (uv handles the rest via pyproject.toml) +cat > "$SCRIPT_DIR/requirements_deploy.txt" << 'EOF' +uv +EOF + +echo " Created requirements_deploy.txt" +echo "" + +# Step 3: Upload source code +echo "Step 3: Uploading source code to workspace..." +# Clean up existing files +databricks workspace delete "$WORKSPACE_PATH" --recursive --profile "$PROFILE" 2>/dev/null || true +databricks workspace mkdirs "$WORKSPACE_PATH" --profile "$PROFILE" + +# Upload files +echo " Uploading app.yaml..." +databricks workspace import "$WORKSPACE_PATH/app.yaml" --file "$SCRIPT_DIR/app.yaml" --profile "$PROFILE" --format AUTO --overwrite + +echo " Uploading requirements.txt..." +databricks workspace import "$WORKSPACE_PATH/requirements.txt" --file "$SCRIPT_DIR/requirements_deploy.txt" --profile "$PROFILE" --format AUTO --overwrite + +echo " Uploading pyproject.toml..." +databricks workspace import "$WORKSPACE_PATH/pyproject.toml" --file "$SCRIPT_DIR/pyproject.toml" --profile "$PROFILE" --format AUTO --overwrite + +# Upload the server package +echo " Uploading server package..." +databricks workspace mkdirs "$WORKSPACE_PATH/server" --profile "$PROFILE" +for f in "$SCRIPT_DIR/server/"*.py; do + if [ -f "$f" ]; then + fname=$(basename "$f") + databricks workspace import "$WORKSPACE_PATH/server/$fname" --file "$f" --profile "$PROFILE" --format AUTO --overwrite + fi +done + +# Upload the MCP server package +echo " Uploading databricks_mcp_server package..." +databricks workspace mkdirs "$WORKSPACE_PATH/databricks_mcp_server" --profile "$PROFILE" +for f in "$REPO_ROOT/databricks-mcp-server/databricks_mcp_server/"*.py; do + if [ -f "$f" ]; then + fname=$(basename "$f") + databricks workspace import "$WORKSPACE_PATH/databricks_mcp_server/$fname" --file "$f" --profile "$PROFILE" --format AUTO --overwrite + fi +done + +# Upload tools subdirectory +databricks workspace mkdirs "$WORKSPACE_PATH/databricks_mcp_server/tools" --profile "$PROFILE" +for f in "$REPO_ROOT/databricks-mcp-server/databricks_mcp_server/tools/"*.py; do + if [ -f "$f" ]; then + fname=$(basename "$f") + databricks workspace import "$WORKSPACE_PATH/databricks_mcp_server/tools/$fname" --file "$f" --profile "$PROFILE" --format AUTO --overwrite + fi +done + +# Upload the tools core package +echo " Uploading databricks_tools_core package..." +databricks workspace mkdirs "$WORKSPACE_PATH/databricks_tools_core" --profile "$PROFILE" + +# Upload all Python files recursively from databricks-tools-core +upload_dir() { + local src_dir="$1" + local dest_dir="$2" + + for item in "$src_dir"/*; do + if [ -d "$item" ]; then + local dirname=$(basename "$item") + if [[ "$dirname" != "__pycache__" && "$dirname" != "*.egg-info" ]]; then + databricks workspace mkdirs "$dest_dir/$dirname" --profile "$PROFILE" 2>/dev/null || true + upload_dir "$item" "$dest_dir/$dirname" + fi + elif [ -f "$item" ]; then + local fname=$(basename "$item") + if [[ "$fname" == *.py || "$fname" == *.md || "$fname" == *.sql || "$fname" == *.txt ]]; then + databricks workspace import "$dest_dir/$fname" --file "$item" --profile "$PROFILE" --format AUTO --overwrite 2>/dev/null || true + fi + fi + done +} + +upload_dir "$REPO_ROOT/databricks-tools-core/databricks_tools_core" "$WORKSPACE_PATH/databricks_tools_core" + +# Upload the skills documentation +echo " Uploading skills documentation..." +databricks workspace mkdirs "$WORKSPACE_PATH/skills" --profile "$PROFILE" + +upload_skills() { + local src_dir="$1" + local dest_dir="$2" + + for item in "$src_dir"/*; do + if [ -d "$item" ]; then + local dirname=$(basename "$item") + # Skip hidden directories and TEMPLATE + if [[ "$dirname" != "."* && "$dirname" != "TEMPLATE" ]]; then + databricks workspace mkdirs "$dest_dir/$dirname" --profile "$PROFILE" 2>/dev/null || true + upload_skills "$item" "$dest_dir/$dirname" + fi + elif [ -f "$item" ]; then + local fname=$(basename "$item") + # Only upload markdown, yaml, and python files + if [[ "$fname" == *.md || "$fname" == *.yaml || "$fname" == *.yml || "$fname" == *.py ]]; then + databricks workspace import "$dest_dir/$fname" --file "$item" --profile "$PROFILE" --format AUTO --overwrite 2>/dev/null || true + fi + fi + done +} + +upload_skills "$LOCAL_SKILLS_DIR" "$WORKSPACE_PATH/skills" + +echo "" + +# Step 4: Deploy the app +echo "Step 4: Deploying app..." +databricks apps deploy "$APP_NAME" \ + --source-code-path "$WORKSPACE_PATH" \ + --profile "$PROFILE" + +echo "" + +# Step 5: Get app info +echo "Step 5: Getting app information..." +echo "" +databricks apps get "$APP_NAME" --profile "$PROFILE" + +echo "" +echo "================================================" +echo "Deployment complete!" +echo "" +echo "MCP Server URL: https:///mcp" +echo "" +echo "Deployed components:" +echo " - MCP Server with 12 tool categories" +echo " - Skills documentation ($(ls -d "$LOCAL_SKILLS_DIR"/*/ 2>/dev/null | wc -l | tr -d ' ') skills)" +echo "" +echo "To use in an agent:" +echo "" +echo " from databricks_mcp import DatabricksMCPClient" +echo " from databricks.sdk import WorkspaceClient" +echo "" +echo " ws = WorkspaceClient(profile='$PROFILE')" +echo " mcp = DatabricksMCPClient(server_url='/mcp', workspace_client=ws)" +echo " tools = mcp.list_tools()" +echo "" +echo " # List available skills" +echo " skills = mcp.call_tool('list_skills', {})" +echo "" +echo " # Get skill documentation" +echo " skill = mcp.call_tool('get_skill', {'skill_name': 'aibi-dashboards'})" +echo "" +echo "To view logs:" +echo " databricks apps logs $APP_NAME --profile $PROFILE" +echo "================================================" diff --git a/databricks-mcp-app/examples/agent_example.py b/databricks-mcp-app/examples/agent_example.py new file mode 100644 index 00000000..17213f36 --- /dev/null +++ b/databricks-mcp-app/examples/agent_example.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python +""" +Example: Build an agent that uses the AI Dev Kit MCP Server. + +This example shows how to create a simple agent that connects to the +deployed MCP server and uses its tools to interact with Databricks. + +Based on the Databricks managed MCP documentation: +https://learn.microsoft.com/en-us/azure/databricks/generative-ai/mcp/managed-mcp + +Usage: + python agent_example.py +""" + +import os +import json +import uuid +from typing import Any, Callable, List + +from pydantic import BaseModel +import mlflow +from mlflow.pyfunc import ResponsesAgent +from mlflow.types.responses import ResponsesAgentRequest, ResponsesAgentResponse +from databricks_mcp import DatabricksMCPClient +from databricks.sdk import WorkspaceClient + + +# ============================================================================= +# CONFIGURATION - Update these values +# ============================================================================= + +# Databricks CLI profile for authentication +DATABRICKS_CLI_PROFILE = os.getenv("DATABRICKS_CLI_PROFILE", "DEFAULT") + +# LLM endpoint to use for the agent +LLM_ENDPOINT_NAME = os.getenv("LLM_ENDPOINT_NAME", "databricks-claude-sonnet-4-5") + +# System prompt for the agent +SYSTEM_PROMPT = """You are a helpful Databricks assistant. You have access to tools that let you: +- Execute SQL queries on Databricks SQL warehouses +- Run Python code on Databricks clusters +- Manage Unity Catalog objects (catalogs, schemas, tables, volumes) +- Create and manage Databricks Jobs +- Work with AI/BI dashboards +- Interact with Genie spaces +- And more! + +Use these tools to help users with their Databricks tasks. Always explain what you're doing and show results clearly.""" + +# AI Dev Kit MCP Server URL (update after deployment) +AI_DEV_KIT_MCP_URL = os.getenv("AI_DEV_KIT_MCP_URL", "") + + +# ============================================================================= +# HELPER FUNCTIONS +# ============================================================================= + +def _to_chat_messages(msg: dict[str, Any]) -> List[dict]: + """Convert ResponsesAgent message dict to ChatCompletions format.""" + msg_type = msg.get("type") + + if msg_type == "function_call": + return [{ + "role": "assistant", + "content": None, + "tool_calls": [{ + "id": msg["call_id"], + "type": "function", + "function": { + "name": msg["name"], + "arguments": msg["arguments"], + }, + }], + }] + elif msg_type == "message" and isinstance(msg["content"], list): + return [{ + "role": "assistant" if msg["role"] == "assistant" else msg["role"], + "content": content["text"], + } for content in msg["content"]] + elif msg_type == "function_call_output": + return [{ + "role": "tool", + "content": msg["output"], + "tool_call_id": msg["tool_call_id"], + }] + else: + return [{ + k: v for k, v in msg.items() + if k in ("role", "content", "name", "tool_calls", "tool_call_id") + }] + + +def _make_exec_fn(server_url: str, tool_name: str, ws: WorkspaceClient) -> Callable[..., str]: + """Create a function that executes an MCP tool.""" + def exec_fn(**kwargs): + mcp_client = DatabricksMCPClient(server_url=server_url, workspace_client=ws) + response = mcp_client.call_tool(tool_name, kwargs) + return "".join([c.text for c in response.content]) + return exec_fn + + +class ToolInfo(BaseModel): + """Information about an MCP tool.""" + name: str + spec: dict + exec_fn: Callable + + +def _fetch_tool_infos(ws: WorkspaceClient, server_url: str) -> List[ToolInfo]: + """Fetch available tools from an MCP server.""" + print(f"Listing tools from MCP server {server_url}") + infos: List[ToolInfo] = [] + + mcp_client = DatabricksMCPClient(server_url=server_url, workspace_client=ws) + mcp_tools = mcp_client.list_tools() + + for t in mcp_tools: + schema = t.inputSchema.copy() + if "properties" not in schema: + schema["properties"] = {} + + spec = { + "type": "function", + "function": { + "name": t.name, + "description": t.description, + "parameters": schema, + }, + } + infos.append(ToolInfo( + name=t.name, + spec=spec, + exec_fn=_make_exec_fn(server_url, t.name, ws) + )) + + return infos + + +# ============================================================================= +# AGENT CLASS +# ============================================================================= + +class AIDevKitAgent(ResponsesAgent): + """ + Agent that uses the AI Dev Kit MCP Server for Databricks operations. + + This agent can execute SQL, run Python code, manage Unity Catalog, + and perform many other Databricks operations through MCP tools. + """ + + def _call_llm(self, history: List[dict], ws: WorkspaceClient, tool_infos: List[ToolInfo]): + """Send history to LLM and get response.""" + client = ws.serving_endpoints.get_open_ai_client() + + flat_msgs = [] + for msg in history: + flat_msgs.extend(_to_chat_messages(msg)) + + return client.chat.completions.create( + model=LLM_ENDPOINT_NAME, + messages=flat_msgs, + tools=[ti.spec for ti in tool_infos] if tool_infos else None, + ) + + def predict(self, request: ResponsesAgentRequest) -> ResponsesAgentResponse: + """Process a request and return a response.""" + ws = WorkspaceClient(profile=DATABRICKS_CLI_PROFILE) + + # Build initial history + history: List[dict] = [{"role": "system", "content": SYSTEM_PROMPT}] + for inp in request.input: + history.append(inp.model_dump()) + + # Get available tools from MCP server + mcp_server_urls = [] + if AI_DEV_KIT_MCP_URL: + mcp_server_urls.append(AI_DEV_KIT_MCP_URL) + + tool_infos = [] + for url in mcp_server_urls: + tool_infos.extend(_fetch_tool_infos(ws, url)) + + tools_dict = {ti.name: ti for ti in tool_infos} + + # Call LLM + llm_resp = self._call_llm(history, ws, tool_infos) + raw_choice = llm_resp.choices[0].message.to_dict() + raw_choice["id"] = uuid.uuid4().hex + history.append(raw_choice) + + tool_calls = raw_choice.get("tool_calls") or [] + + if tool_calls: + # Execute tool call + fc = tool_calls[0] + name = fc["function"]["name"] + args = json.loads(fc["function"]["arguments"]) + + try: + tool_info = tools_dict[name] + result = tool_info.exec_fn(**args) + except Exception as e: + result = f"Error invoking {name}: {e}" + + # Append tool output + history.append({ + "type": "function_call_output", + "role": "tool", + "id": uuid.uuid4().hex, + "tool_call_id": fc["id"], + "output": result, + }) + + # Get final response + followup = self._call_llm(history, ws, tool_infos=[]).choices[0].message.to_dict() + followup["id"] = uuid.uuid4().hex + assistant_text = followup.get("content", "") + + return ResponsesAgentResponse( + output=[{ + "id": uuid.uuid4().hex, + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": assistant_text}], + }], + custom_outputs=request.custom_inputs, + ) + + # No tool calls - return direct response + assistant_text = raw_choice.get("content", "") + return ResponsesAgentResponse( + output=[{ + "id": uuid.uuid4().hex, + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": assistant_text}], + }], + custom_outputs=request.custom_inputs, + ) + + +# Register the agent with MLflow +mlflow.models.set_model(AIDevKitAgent()) + + +# ============================================================================= +# MAIN - Test the agent locally +# ============================================================================= + +if __name__ == "__main__": + if not AI_DEV_KIT_MCP_URL: + print("ERROR: Set AI_DEV_KIT_MCP_URL environment variable") + print("Example: export AI_DEV_KIT_MCP_URL='https://your-app.apps.databricks.com/mcp'") + exit(1) + + print("Testing AI Dev Kit Agent...") + print(f"MCP Server: {AI_DEV_KIT_MCP_URL}") + print(f"LLM Endpoint: {LLM_ENDPOINT_NAME}") + print() + + # Test query + req = ResponsesAgentRequest( + input=[{"role": "user", "content": "List the available Unity Catalog catalogs"}] + ) + + agent = AIDevKitAgent() + resp = agent.predict(req) + + print("Response:") + for item in resp.output: + if item.get("type") == "message": + for content in item.get("content", []): + print(content.get("text", "")) diff --git a/databricks-mcp-app/examples/deploy_agent.py b/databricks-mcp-app/examples/deploy_agent.py new file mode 100644 index 00000000..6539b328 --- /dev/null +++ b/databricks-mcp-app/examples/deploy_agent.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +""" +Deploy the AI Dev Kit Agent to Databricks Model Serving. + +This script logs the agent to MLflow and deploys it as a serving endpoint, +allowing it to be used from the Databricks UI or via API. + +Based on the Databricks managed MCP documentation: +https://learn.microsoft.com/en-us/azure/databricks/generative-ai/mcp/managed-mcp + +Usage: + python deploy_agent.py --profile --mcp-url --model-name +""" + +import os +import argparse + +import mlflow +from mlflow.models.resources import DatabricksServingEndpoint, DatabricksApp +from databricks.sdk import WorkspaceClient +from databricks import agents + + +def deploy_agent( + profile: str, + mcp_url: str, + model_name: str, + llm_endpoint: str = "databricks-claude-sonnet-4-5", +): + """Deploy the AI Dev Kit agent to Databricks.""" + + print("=" * 60) + print("Deploying AI Dev Kit Agent") + print("=" * 60) + print(f"Profile: {profile}") + print(f"MCP URL: {mcp_url}") + print(f"Model Name: {model_name}") + print(f"LLM Endpoint: {llm_endpoint}") + print() + + # Configure workspace client + workspace_client = WorkspaceClient(profile=profile) + current_user = workspace_client.current_user.me().user_name + + # Configure MLflow + mlflow.set_tracking_uri(f"databricks://{profile}") + mlflow.set_registry_uri(f"databricks-uc://{profile}") + mlflow.set_experiment(f"/Users/{current_user}/ai-dev-kit-agent") + os.environ["DATABRICKS_CONFIG_PROFILE"] = profile + + # Set environment variables for the agent + os.environ["AI_DEV_KIT_MCP_URL"] = mcp_url + os.environ["LLM_ENDPOINT_NAME"] = llm_endpoint + os.environ["DATABRICKS_CLI_PROFILE"] = profile + + # Define resources the agent needs + resources = [ + DatabricksServingEndpoint(endpoint_name=llm_endpoint), + # Add the MCP app as a resource + # DatabricksApp(app_name="ai-dev-kit-mcp"), + ] + + print("Logging agent to MLflow...") + + # Get the path to the agent script + script_dir = os.path.dirname(os.path.abspath(__file__)) + agent_script = os.path.join(script_dir, "agent_example.py") + + with mlflow.start_run(): + logged_model_info = mlflow.pyfunc.log_model( + artifact_path="ai_dev_kit_agent", + python_model=agent_script, + resources=resources, + ) + + print(f"Logged model: {logged_model_info.model_uri}") + print() + + # Register the model + print(f"Registering model as {model_name}...") + registered_model = mlflow.register_model( + logged_model_info.model_uri, + model_name + ) + print(f"Registered version: {registered_model.version}") + print() + + # Deploy the agent + print("Deploying agent...") + deployment = agents.deploy( + model_name=model_name, + model_version=registered_model.version, + ) + + print() + print("=" * 60) + print("Deployment Complete!") + print("=" * 60) + print() + print(f"Model: {model_name}") + print(f"Version: {registered_model.version}") + print() + print("The agent is now available in your Databricks workspace.") + print("You can find it under: Machine Learning > Serving") + print() + + +def main(): + parser = argparse.ArgumentParser( + description="Deploy AI Dev Kit Agent to Databricks" + ) + parser.add_argument( + "--profile", "-p", + required=True, + help="Databricks CLI profile name" + ) + parser.add_argument( + "--mcp-url", "-u", + required=True, + help="AI Dev Kit MCP Server URL (e.g., https://app.apps.databricks.com/mcp)" + ) + parser.add_argument( + "--model-name", "-m", + default="main.default.ai_dev_kit_agent", + help="Unity Catalog model name (default: main.default.ai_dev_kit_agent)" + ) + parser.add_argument( + "--llm-endpoint", "-l", + default="databricks-claude-sonnet-4-5", + help="LLM serving endpoint name" + ) + + args = parser.parse_args() + + deploy_agent( + profile=args.profile, + mcp_url=args.mcp_url, + model_name=args.model_name, + llm_endpoint=args.llm_endpoint, + ) + + +if __name__ == "__main__": + main() diff --git a/databricks-mcp-app/examples/test_connection.py b/databricks-mcp-app/examples/test_connection.py new file mode 100644 index 00000000..bacd7282 --- /dev/null +++ b/databricks-mcp-app/examples/test_connection.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +""" +Test connection to the deployed AI Dev Kit MCP Server. + +This script validates that your MCP server is deployed and accessible, +then lists all available tools. + +Usage: + python test_connection.py --profile --app-url +""" + +import argparse +import sys + + +def test_connection(profile: str, app_url: str): + """Test connection to the MCP server and list tools.""" + try: + from databricks_mcp import DatabricksMCPClient + from databricks.sdk import WorkspaceClient + except ImportError: + print("ERROR: Required packages not installed.") + print("Install with: pip install databricks-sdk databricks-mcp") + sys.exit(1) + + print(f"Connecting to MCP server...") + print(f" Profile: {profile}") + print(f" App URL: {app_url}") + print() + + # Initialize workspace client + workspace_client = WorkspaceClient(profile=profile) + + # Construct MCP server URL + mcp_server_url = f"{app_url.rstrip('/')}/mcp" + print(f"MCP Server URL: {mcp_server_url}") + print() + + # Connect to MCP server + mcp_client = DatabricksMCPClient( + server_url=mcp_server_url, + workspace_client=workspace_client + ) + + # List available tools + print("Listing available tools...") + tools = mcp_client.list_tools() + + print(f"\nDiscovered {len(tools)} tools:\n") + print("-" * 60) + + # Group tools by category (based on naming convention) + categories = {} + for tool in tools: + # Extract category from tool name (e.g., "execute_sql" -> "sql") + parts = tool.name.split("_") + if len(parts) > 1: + category = parts[-1] if parts[0] in ["execute", "run", "get", "list", "create", "delete", "update"] else parts[0] + else: + category = "general" + + if category not in categories: + categories[category] = [] + categories[category].append(tool) + + for category, cat_tools in sorted(categories.items()): + print(f"\n{category.upper()}:") + for tool in cat_tools: + desc = tool.description[:60] + "..." if len(tool.description) > 60 else tool.description + print(f" - {tool.name}: {desc}") + + print("\n" + "-" * 60) + print("\nConnection successful!") + print("\nExample usage in your agent code:") + print() + print(f''' +from databricks_mcp import DatabricksMCPClient +from databricks.sdk import WorkspaceClient + +ws = WorkspaceClient(profile="{profile}") +mcp = DatabricksMCPClient(server_url="{mcp_server_url}", workspace_client=ws) + +# Execute SQL +result = mcp.call_tool("execute_sql", {{ + "sql": "SELECT current_date()", + "warehouse_id": "your-warehouse-id" +}}) +print(result.content) +''') + + +def main(): + parser = argparse.ArgumentParser( + description="Test connection to AI Dev Kit MCP Server" + ) + parser.add_argument( + "--profile", "-p", + required=True, + help="Databricks CLI profile name" + ) + parser.add_argument( + "--app-url", "-u", + required=True, + help="Databricks App URL (e.g., https://abc123.apps.databricks.com)" + ) + + args = parser.parse_args() + test_connection(args.profile, args.app_url) + + +if __name__ == "__main__": + main() diff --git a/databricks-mcp-app/pyproject.toml b/databricks-mcp-app/pyproject.toml new file mode 100644 index 00000000..88fe126b --- /dev/null +++ b/databricks-mcp-app/pyproject.toml @@ -0,0 +1,34 @@ +[project] +name = "ai-dev-kit-mcp" +version = "0.1.1" +description = "AI Dev Kit MCP Server for Databricks" +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.115.0", + "mcp[cli]>=1.14.0", + "uvicorn>=0.34.0", + "starlette>=0.37.0", + "databricks-sdk>=0.81.0", + "pydantic>=2.0.0", + "fastmcp==2.14.5", + "requests>=2.31.0", + "sqlglot>=20.0.0", + "sqlfluff>=3.0.0", + "litellm>=1.0.0", + "pymupdf>=1.24.0", + "psycopg2-binary>=2.9.0", +] + +# Note: databricks-tools-core and databricks-mcp-server are copied directly +# by deploy.sh rather than installed as packages. The source code is uploaded +# to the workspace alongside this app. + +[project.scripts] +mcp-server = "server.main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["server"] diff --git a/databricks-mcp-app/requirements.txt b/databricks-mcp-app/requirements.txt new file mode 100644 index 00000000..50575383 --- /dev/null +++ b/databricks-mcp-app/requirements.txt @@ -0,0 +1,3 @@ +# Databricks MCP App uses uv for dependency management +# All dependencies are defined in pyproject.toml +uv diff --git a/databricks-mcp-app/server/__init__.py b/databricks-mcp-app/server/__init__.py new file mode 100644 index 00000000..261c72ea --- /dev/null +++ b/databricks-mcp-app/server/__init__.py @@ -0,0 +1 @@ +"""AI Dev Kit MCP Server package.""" diff --git a/databricks-mcp-app/server/main.py b/databricks-mcp-app/server/main.py new file mode 100644 index 00000000..1350944f --- /dev/null +++ b/databricks-mcp-app/server/main.py @@ -0,0 +1,431 @@ +#!/usr/bin/env python +""" +AI Dev Kit MCP Server - Main entry point + +Serves the ai-dev-kit MCP server over HTTP/SSE transport for use with +Databricks managed MCP clients and AI agents. + +Includes health check endpoints for browser testing. +""" + +import os +import sys +import json +import logging + +# Add parent directory to sys.path for local package imports +app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if app_dir not in sys.path: + sys.path.insert(0, app_dir) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +class CORSMiddleware: + """ + Middleware to handle CORS preflight requests (OPTIONS). + Required for browser-based MCP clients like the Databricks AI Playground. + """ + + def __init__(self, app): + self.app = app + + async def __call__(self, scope, receive, send): + if scope["type"] == "http": + method = scope.get("method", "") + + # Handle CORS preflight requests + # Allow all common headers used by Databricks UI and MCP clients + if method == "OPTIONS": + headers = [ + [b"access-control-allow-origin", b"*"], + [b"access-control-allow-methods", b"GET, POST, OPTIONS, DELETE, PUT, PATCH"], + [b"access-control-allow-headers", b"*"], # Allow all headers + [b"access-control-expose-headers", b"*"], + [b"access-control-max-age", b"86400"], + [b"content-length", b"0"], + ] + await send({ + "type": "http.response.start", + "status": 204, + "headers": headers, + }) + await send({"type": "http.response.body", "body": b""}) + return + + # For other requests, add CORS headers to response + async def send_with_cors(message): + if message["type"] == "http.response.start": + headers = list(message.get("headers", [])) + headers.append([b"access-control-allow-origin", b"*"]) + headers.append([b"access-control-allow-methods", b"GET, POST, OPTIONS, DELETE, PUT, PATCH"]) + headers.append([b"access-control-allow-headers", b"*"]) + headers.append([b"access-control-expose-headers", b"*"]) + message = {**message, "headers": headers} + await send(message) + + await self.app(scope, receive, send_with_cors) + return + + await self.app(scope, receive, send) + + +class PATAuthMiddleware: + """ + Middleware to accept PAT tokens for authentication. + Validates PAT tokens against Databricks API when platform OAuth is bypassed. + """ + + def __init__(self, app): + self.app = app + self._validated_tokens = {} # Cache validated tokens + self._workspace_host = os.getenv("DATABRICKS_HOST", "") + + async def __call__(self, scope, receive, send): + if scope["type"] == "http": + headers = dict(scope.get("headers", [])) + auth_header = headers.get(b"authorization", b"").decode() + + # Check if it's a PAT token (starts with "dapi") + if auth_header.startswith("Bearer dapi"): + token = auth_header.replace("Bearer ", "") + + # Validate the token (with caching) + if token not in self._validated_tokens: + is_valid = await self._validate_pat(token) + self._validated_tokens[token] = is_valid + + if not self._validated_tokens[token]: + # Return 401 for invalid PAT + response_body = json.dumps({"error": "Invalid PAT token"}).encode() + await send({ + "type": "http.response.start", + "status": 401, + "headers": [[b"content-type", b"application/json"]], + }) + await send({"type": "http.response.body", "body": response_body}) + return + + # Valid PAT - continue to app + logger.info("PAT token validated successfully") + + await self.app(scope, receive, send) + + async def _validate_pat(self, token: str) -> bool: + """Validate PAT token by calling Databricks API.""" + import httpx + try: + # Get workspace host from environment or config + host = self._workspace_host + if not host: + # Try to get from Databricks SDK config + try: + from databricks.sdk import WorkspaceClient + ws = WorkspaceClient() + host = ws.config.host + except: + # Fallback: extract from app URL + host = os.getenv("DATABRICKS_WORKSPACE_URL", "") + + if not host: + logger.warning("No workspace host configured, accepting PAT token") + return True + + # Validate by calling /api/2.0/preview/scim/v2/Me + async with httpx.AsyncClient() as client: + resp = await client.get( + f"{host}/api/2.0/preview/scim/v2/Me", + headers={"Authorization": f"Bearer {token}"}, + timeout=10.0 + ) + return resp.status_code == 200 + except Exception as e: + logger.warning(f"PAT validation failed: {e}, accepting token") + return True # Fail open for now + + +class MCPFallbackMiddleware: + """ + Middleware to handle GET requests to /mcp that don't have proper SSE Accept header. + This provides a JSON fallback for proxies that probe the endpoint. + """ + + def __init__(self, app, skills_count: int = 0): + self.app = app + self.skills_count = skills_count + + async def __call__(self, scope, receive, send): + if scope["type"] == "http": + path = scope.get("path", "") + method = scope.get("method", "") + + # Check if this is a GET to /mcp without proper Accept header + if method == "GET" and path in ("/mcp", "/mcp/"): + headers = dict(scope.get("headers", [])) + accept = headers.get(b"accept", b"").decode() + + # If no text/event-stream in Accept, return JSON info + if "text/event-stream" not in accept: + logger.info(f"GET /mcp without SSE Accept header - returning JSON info") + response_body = json.dumps({ + "jsonrpc": "2.0", + "result": { + "name": "ai-dev-kit-mcp", + "version": "1.0.0", + "protocolVersion": "2024-11-05", + "capabilities": { + "tools": {"listChanged": True}, + "resources": {}, + "prompts": {} + }, + "serverInfo": { + "name": "AI Dev Kit MCP Server", + "skills_count": self.skills_count + } + } + }).encode() + + await send({ + "type": "http.response.start", + "status": 200, + "headers": [ + [b"content-type", b"application/json"], + [b"content-length", str(len(response_body)).encode()], + ], + }) + await send({ + "type": "http.response.body", + "body": response_body, + }) + return + + await self.app(scope, receive, send) + + +async def get_tools_async(mcp) -> list: + """Get tools list using async MCP methods.""" + try: + # Use the public async get_tools() API + tools_dict = await mcp.get_tools() + if isinstance(tools_dict, dict): + return [{"name": name, "description": t.description[:100] if hasattr(t, 'description') and t.description else ""} + for name, t in tools_dict.items()] + except Exception as e: + logger.warning(f"get_tools() failed: {e}") + + try: + # Fallback: Try _tool_manager._tools (sync access) + if hasattr(mcp, '_tool_manager') and hasattr(mcp._tool_manager, '_tools'): + tools = mcp._tool_manager._tools + return [{"name": name, "description": t.description[:100] if hasattr(t, 'description') and t.description else ""} + for name, t in tools.items()] + except Exception as e: + logger.debug(f"_tool_manager fallback failed: {e}") + + return [] + + +def main(): + """Start the MCP server with health endpoints.""" + import uvicorn + from starlette.responses import JSONResponse, HTMLResponse + + port = int(os.getenv("PORT", "8000")) + + logger.info(f"Starting AI Dev Kit MCP Server on port {port}") + + try: + # Import the MCP server + from databricks_mcp_server.server import mcp + + # Get skills list (this is file-based, so works synchronously) + skills = [] + try: + from databricks_mcp_server.tools.skills import _get_skills_dir + skills_path = _get_skills_dir() + logger.info(f"Skills directory: {skills_path}") + logger.info(f"Skills dir exists: {skills_path.exists()}") + + if skills_path.exists(): + for skill_dir in sorted(skills_path.iterdir()): + if skill_dir.is_dir() and not skill_dir.name.startswith('.') and skill_dir.name != 'TEMPLATE': + skill_file = skill_dir / "SKILL.md" + if skill_file.exists(): + # Parse frontmatter for description + content = skill_file.read_text() + description = "" + if content.startswith('---'): + parts = content.split('---', 2) + if len(parts) >= 3: + for line in parts[1].strip().split('\n'): + if line.startswith('description:'): + description = line.split(':', 1)[1].strip().strip('"\'') + break + skills.append({ + "name": skill_dir.name, + "description": description[:100] if description else "No description" + }) + logger.info(f"Loaded {len(skills)} skills") + except Exception as e: + logger.warning(f"Could not load skills: {e}") + import traceback + traceback.print_exc() + + # Build skills HTML for home page + skills_html = "" + for s in skills: + skills_html += f'
{s["name"]} - {s["description"]}
' + + # Home page with dynamic tool loading via JavaScript + home_html = f""" + + + + AI Dev Kit MCP Server + + + +

AI Dev Kit MCP Server

+

Model Context Protocol server providing tools and skills for Databricks development.

+ +
+
+
...
+
Tools
+
+
+
{len(skills)}
+
Skills
+
+
+ +

Endpoints

+
GET / - This page
+
GET /health - Health check (JSON)
+
GET /tools - List all tools (JSON)
+
GET /skills - List all skills (JSON)
+
POST /mcp - MCP protocol endpoint (for MCP clients)
+ +
+
+

Available Tools (...)

+
+
Loading tools...
+
+
+
+

Available Skills ({len(skills)})

+
+ {skills_html if skills_html else '
No skills loaded
'} +
+
+
+ + + + + """ + + # Add custom routes to the MCP server using fastmcp's custom_route decorator + @mcp.custom_route("/", methods=["GET"]) + async def home_route(request): + return HTMLResponse(home_html) + + @mcp.custom_route("/health", methods=["GET"]) + async def health_route(request): + tools = await get_tools_async(mcp) + return JSONResponse({ + "status": "healthy", + "server": "ai-dev-kit-mcp", + "tools_count": len(tools), + "skills_count": len(skills), + "mcp_endpoint": "/mcp" + }) + + @mcp.custom_route("/tools", methods=["GET"]) + async def tools_route(request): + tools = await get_tools_async(mcp) + return JSONResponse({"tools": tools, "count": len(tools)}) + + @mcp.custom_route("/skills", methods=["GET"]) + async def skills_route(request): + return JSONResponse({"skills": skills, "count": len(skills)}) + + # Create the HTTP app with MCP at /mcp (standard Databricks MCP path) + app = mcp.http_app(path="/mcp") + + # Wrap with middleware to handle GET /mcp fallback for proxies + app = MCPFallbackMiddleware(app, skills_count=len(skills)) + + # Wrap with PAT auth middleware to accept PAT tokens from UC HTTP connections + app = PATAuthMiddleware(app) + + # Wrap with CORS middleware to handle browser preflight requests + app = CORSMiddleware(app) + + logger.info(f"MCP Server initialized with {len(skills)} skills") + logger.info("Endpoints: / (info), /health, /tools, /skills, /mcp (MCP protocol)") + + uvicorn.run(app, host="0.0.0.0", port=port) + + except ImportError as e: + logger.error(f"Failed to import: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + except Exception as e: + logger.error(f"Failed to start server: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/databricks-mcp-server/databricks_mcp_server/server.py b/databricks-mcp-server/databricks_mcp_server/server.py index d0e61a76..a9ab9b48 100644 --- a/databricks-mcp-server/databricks_mcp_server/server.py +++ b/databricks-mcp-server/databricks_mcp_server/server.py @@ -154,4 +154,5 @@ async def _noop_lifespan(*args, **kwargs): user, apps, workspace, + skills, ) diff --git a/databricks-mcp-server/databricks_mcp_server/tools/skills.py b/databricks-mcp-server/databricks_mcp_server/tools/skills.py new file mode 100644 index 00000000..a43aeddc --- /dev/null +++ b/databricks-mcp-server/databricks_mcp_server/tools/skills.py @@ -0,0 +1,329 @@ +""" +Skills tools for the MCP server. + +Exposes the databricks-skills as MCP resources and tools, allowing agents +to discover and read skill documentation for guidance on Databricks tasks. +""" + +import os +import re +from pathlib import Path +from typing import Optional, Tuple + +from ..server import mcp + + +def _get_skills_dir() -> Path: + """Get the skills directory path.""" + # Check for skills in multiple locations + possible_paths = [ + # Environment variable override (highest priority) + Path(os.getenv("SKILLS_DIR", "")), + # Databricks App deployment: skills folder at app root + Path("/Workspace") / os.getenv("DATABRICKS_APP_ROOT", "") / "skills", + # Deployed alongside the MCP server package + Path(__file__).parent.parent.parent / "skills", + # Development: sibling directory + Path(__file__).parent.parent.parent.parent / "databricks-skills", + ] + + for path in possible_paths: + if path and str(path) and path.exists() and path.is_dir(): + return path + + # Default to the relative path + return Path(__file__).parent.parent.parent / "skills" + + +def _parse_skill_frontmatter(content: str) -> Tuple[dict, str]: + """ + Parse YAML frontmatter from a skill file. + + Returns: + Tuple of (frontmatter dict, remaining content) + """ + frontmatter = {} + body = content + + # Check for YAML frontmatter (starts with ---) + if content.startswith('---'): + parts = content.split('---', 2) + if len(parts) >= 3: + yaml_content = parts[1].strip() + body = parts[2].strip() + + # Simple YAML parsing for key: value pairs + for line in yaml_content.split('\n'): + line = line.strip() + if ':' in line and not line.startswith('#'): + key, _, value = line.partition(':') + key = key.strip() + value = value.strip().strip('"').strip("'") + frontmatter[key] = value + + return frontmatter, body + + +def _build_skill_tree(path: Path, base_path: Path) -> dict: + """Build a tree structure for a skill directory.""" + relative_path = str(path.relative_to(base_path)) + name = path.name + + if path.is_dir(): + children = [] + items = sorted(path.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower())) + for item in items: + # Skip hidden files and non-markdown/non-directory items + if item.name.startswith('.') or item.name == '__pycache__': + continue + if item.is_dir() or item.suffix in ['.md', '.py', '.sql', '.yaml', '.yml']: + children.append(_build_skill_tree(item, base_path)) + return { + 'name': name, + 'path': relative_path, + 'type': 'directory', + 'children': children, + } + else: + return { + 'name': name, + 'path': relative_path, + 'type': 'file', + } + + +@mcp.tool() +def list_skills() -> dict: + """ + List all available Databricks skills. + + Returns a list of skill directories with their descriptions parsed from + YAML frontmatter. Skills provide guidance for various Databricks tasks + like creating dashboards, configuring jobs, working with Unity Catalog, etc. + + Returns: + dict: List of skills with names, descriptions, and available files + """ + skills_dir = _get_skills_dir() + + if not skills_dir.exists(): + return { + 'skills_dir': str(skills_dir), + 'exists': False, + 'skills': [], + 'message': 'Skills directory not found. Skills may not be deployed.' + } + + skills = [] + for item in sorted(skills_dir.iterdir()): + if item.is_dir() and not item.name.startswith('.') and item.name != 'TEMPLATE': + # Try to read SKILL.md for metadata + skill_md = item / 'SKILL.md' + name = item.name + description = "" + + if skill_md.exists(): + try: + content = skill_md.read_text() + frontmatter, _ = _parse_skill_frontmatter(content) + + # Use frontmatter values if available + if 'name' in frontmatter: + name = frontmatter['name'] + if 'description' in frontmatter: + description = frontmatter['description'] + except Exception: + pass + + # Get list of files in this skill + files = [] + for f in item.rglob('*.md'): + if not f.name.startswith('.'): + files.append(str(f.relative_to(item))) + for f in item.rglob('*.py'): + if not f.name.startswith('.') and '__pycache__' not in str(f): + files.append(str(f.relative_to(item))) + + skills.append({ + 'name': name, + 'folder': item.name, + 'description': description, + 'has_skill_md': skill_md.exists(), + 'files': sorted(files), + }) + + return { + 'skills_dir': str(skills_dir), + 'exists': True, + 'count': len(skills), + 'skills': skills, + } + + +@mcp.tool() +def get_skill(skill_name: str, file_path: Optional[str] = None) -> dict: + """ + Get the content of a skill file. + + Retrieves the documentation for a specific Databricks skill, which provides + guidance on how to perform various tasks like creating dashboards, + configuring jobs, working with pipelines, etc. + + Args: + skill_name: Name of the skill folder (e.g., 'aibi-dashboards', 'databricks-jobs') + file_path: Optional specific file within the skill (e.g., 'examples.md'). + If not provided, returns SKILL.md + + Returns: + dict: Skill content with frontmatter metadata and body + """ + skills_dir = _get_skills_dir() + skill_dir = skills_dir / skill_name + + if not skill_dir.exists(): + available = [d.name for d in skills_dir.iterdir() if d.is_dir() and not d.name.startswith('.') and d.name != 'TEMPLATE'] + return { + 'error': f"Skill '{skill_name}' not found", + 'available_skills': available + } + + # Determine which file to read + if file_path: + target_file = skill_dir / file_path + else: + target_file = skill_dir / 'SKILL.md' + + if not target_file.exists(): + # List available files in the skill + available_files = [] + for f in skill_dir.rglob('*'): + if f.is_file() and not f.name.startswith('.') and '__pycache__' not in str(f): + available_files.append(str(f.relative_to(skill_dir))) + return { + 'error': f"File '{file_path or 'SKILL.md'}' not found in skill '{skill_name}'", + 'available_files': sorted(available_files) + } + + # Security check: ensure path is within skill directory + try: + target_file.resolve().relative_to(skill_dir.resolve()) + except ValueError: + return {'error': 'Access denied: path outside skill directory'} + + content = target_file.read_text() + + # Parse frontmatter for markdown files + result = { + 'skill_name': skill_name, + 'file_path': str(target_file.relative_to(skill_dir)), + 'size': len(content), + } + + if target_file.suffix == '.md': + frontmatter, body = _parse_skill_frontmatter(content) + if frontmatter: + result['metadata'] = frontmatter + result['content'] = body + else: + result['content'] = content + + return result + + +@mcp.tool() +def get_skill_tree(skill_name: str) -> dict: + """ + Get the file tree structure of a skill. + + Returns a nested structure showing all files and subdirectories + within a skill directory. + + Args: + skill_name: Name of the skill directory (e.g., 'mlflow-evaluation') + + Returns: + dict: Tree structure of the skill directory + """ + skills_dir = _get_skills_dir() + skill_dir = skills_dir / skill_name + + if not skill_dir.exists(): + return { + 'error': f"Skill '{skill_name}' not found", + 'available_skills': [d.name for d in skills_dir.iterdir() if d.is_dir() and not d.name.startswith('.')] + } + + tree = _build_skill_tree(skill_dir, skill_dir) + + return { + 'skill_name': skill_name, + 'tree': tree['children'] if 'children' in tree else [], + } + + +@mcp.tool() +def search_skills(query: str) -> dict: + """ + Search across all skills for relevant content. + + Searches skill names, descriptions, and content for the given query. + Useful for finding which skill to use for a particular task. + + Args: + query: Search term (case-insensitive) + + Returns: + dict: Matching skills and files with relevance context + """ + skills_dir = _get_skills_dir() + query_lower = query.lower() + + if not skills_dir.exists(): + return {'error': 'Skills directory not found', 'matches': []} + + matches = [] + + for skill_dir in skills_dir.iterdir(): + if not skill_dir.is_dir() or skill_dir.name.startswith('.'): + continue + + skill_name = skill_dir.name + + # Check if query matches skill name + if query_lower in skill_name.lower(): + matches.append({ + 'skill': skill_name, + 'match_type': 'skill_name', + 'file': None, + 'context': f"Skill name matches: {skill_name}", + }) + + # Search in markdown files + for md_file in skill_dir.rglob('*.md'): + try: + content = md_file.read_text() + if query_lower in content.lower(): + # Extract context around the match + idx = content.lower().find(query_lower) + start = max(0, idx - 50) + end = min(len(content), idx + len(query) + 50) + context = content[start:end].replace('\n', ' ').strip() + if start > 0: + context = '...' + context + if end < len(content): + context = context + '...' + + matches.append({ + 'skill': skill_name, + 'match_type': 'content', + 'file': str(md_file.relative_to(skill_dir)), + 'context': context, + }) + except Exception: + pass # Skip files that can't be read + + return { + 'query': query, + 'match_count': len(matches), + 'matches': matches[:20], # Limit results + }