diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json index 511bd40..bc54d7c 100644 --- a/.cursor-plugin/plugin.json +++ b/.cursor-plugin/plugin.json @@ -1,10 +1,11 @@ { "name": "pinecone", - "description": "Pinecone vector database integration. Streamline your Pinecone development with powerful tools for managing vector indexes, querying data, and rapid prototyping. Use slash commands like /quickstart to generate AGENTS.md files and initialize Python projects and /query to quickly explore indexes. Access the Pinecone MCP server for creating, describing, upserting and querying indexes with Cursor. Perfect for developers building semantic search, RAG applications, recommendation systems, and other vector-based applications with Pinecone.", + "description": "Pinecone vector database integration. Streamline your Pinecone development with powerful tools for managing vector indexes, querying data, and rapid prototyping. Use slash commands like /quickstart to learn how to build with Pinecone and /query to quickly explore indexes. Access the Pinecone MCP server for creating, describing, upserting and querying indexes with Cursor. Perfect for developers building semantic search, RAG applications, recommendation systems, and other vector-based applications with Pinecone.", "version": "1.0.0", "author": { "name": "Pinecone" }, + "logo": "assets/logo.svg", "keywords": [ "pinecone", "semantic search", diff --git a/.github/workflows/contextualize-skills.yml b/.github/workflows/contextualize-skills.yml index 4c08337..268727e 100644 --- a/.github/workflows/contextualize-skills.yml +++ b/.github/workflows/contextualize-skills.yml @@ -2,7 +2,7 @@ name: Contextualize Incoming Skills on: pull_request: - types: [opened, synchronize] + types: [opened] jobs: contextualize: diff --git a/.mcp.json b/.mcp.json deleted file mode 100644 index 034191e..0000000 --- a/.mcp.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "mcpServers": { - "pinecone": { - "command": "npx", - "args": [ - "-y", - "@pinecone-database/mcp" - ], - "env": { - "PINECONE_API_KEY": "${PINECONE_API_KEY}" - } - } - } -} diff --git a/README.md b/README.md index e212713..e08baa4 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,84 @@ # Pinecone Cursor Plugin -Official Pinecone plugin for Cursor. Provides skills, rules, and a Pinecone MCP server integration for building with Pinecone. +Official [Pinecone](https://www.pinecone.io) plugin for [Cursor](https://cursor.com). Build semantic search, RAG, recommendation systems, and other vector-based applications with Pinecone — directly from your editor. -## Skills +## What's included -| Skill | Description | +### Skills + +Skills are specialized agent capabilities invoked automatically by Cursor Agent or manually via `/skill-name` in chat. + +| Skill | What it does | |-------|-------------| -| `quickstart` | Onboarding — create an index, upload data, run your first search | -| `query` | Natural language search across Pinecone indexes via MCP | -| `cli` | Pinecone CLI (`pc`) for index and vector management | -| `assistant` | Pinecone Assistants for document Q&A with citations | -| `mcp` | Reference docs for all MCP server tools and parameters | -| `docs` | Organized links to official Pinecone documentation | -| `help` | Overview of all skills and getting-started guidance | +| `/quickstart` | Step-by-step onboarding — create an index, upload data, and run your first search. Choose between a **Database** path (vector search) or **Assistant** path (document Q&A). | +| `/query` | Search integrated indexes using natural language text via the Pinecone MCP server. | +| `/cli` | Use the Pinecone CLI (`pc`) for terminal-based index and vector management. | +| `/assistant` | Create, manage, and chat with Pinecone Assistants for document Q&A with citations. Includes scripts for uploading files, syncing changes, and retrieving context. | +| `/mcp` | Reference documentation for all Pinecone MCP server tools and their parameters. | +| `/docs` | Curated links to official Pinecone documentation, organized by topic. | +| `/help` | Overview of all available skills and what you need to get started. | + +### MCP Server + +The plugin bundles the [Pinecone MCP server](https://github.com/pinecone-io/pinecone-mcp) (`@pinecone-database/mcp`), giving Cursor Agent direct access to your Pinecone resources: -## MCP Server +- Create, describe, and delete indexes +- Upsert and query vectors +- Search Pinecone documentation +- Manage index configurations -The plugin bundles the [Pinecone MCP server](https://github.com/pinecone-io/pinecone-mcp) (`@pinecone-database/mcp`). Requires a `PINECONE_API_KEY` environment variable. +### Bundled Scripts + +Several skills include Python scripts (run via [`uv`](https://docs.astral.sh/uv/)) for operations beyond what MCP provides: + +| Script | Skill | Purpose | +|--------|-------|---------| +| `upsert.py` | quickstart | Seed an index with sample data | +| `quickstart_complete.py` | quickstart | Standalone end-to-end quickstart | +| `create.py` | assistant | Create a new Pinecone Assistant | +| `upload.py` | assistant | Upload files to an assistant | +| `chat.py` | assistant | Chat with an assistant | +| `context.py` | assistant | Retrieve context snippets from an assistant | +| `list.py` | assistant | List all assistants in your account | +| `sync.py` | assistant | Sync local files to an assistant | ## Prerequisites -- [Pinecone account](https://app.pinecone.io) (free) -- Pinecone API key -- Node.js v18+ (for the MCP server) +- **Pinecone account** — free at [app.pinecone.io](https://app.pinecone.io/?sessionType=signup) +- **API key** — create one in the Pinecone console, then set it: + ```bash + export PINECONE_API_KEY="your-key" + ``` +- **Node.js v18+** — required for the MCP server (`npx`) + +### Optional + +| Tool | What it enables | Install | +|------|----------------|---------| +| [Pinecone CLI](https://docs.pinecone.io/guides/operations/pinecone-cli) (`pc`) | Terminal-based index management, batch operations | `brew tap pinecone-io/tap && brew install pinecone-io/tap/pinecone` | +| [uv](https://docs.astral.sh/uv/) | Run the bundled Python scripts | [Install guide](https://docs.astral.sh/uv/getting-started/installation/) | + +## Getting started + +1. Install the plugin from the [Cursor Marketplace](https://cursor.com/marketplace) +2. Set your `PINECONE_API_KEY` environment variable +3. Open Cursor Agent chat and type `/quickstart` to get started +4. Verify the MCP server is connected: Cursor Settings > Features > Model Context Protocol + +## Verifying the installation + +| Component | Where to check | +|-----------|---------------| +| Skills | Cursor Settings > Rules — listed under "Agent Decides" | +| MCP Server | Cursor Settings > Features > Model Context Protocol | +| Commands | Type `/` in Agent chat and search | + +## Links + +- [Pinecone Documentation](https://docs.pinecone.io) +- [Pinecone MCP Server](https://github.com/pinecone-io/pinecone-mcp) +- [Pinecone Discord](https://discord.gg/pinecone) + +## License + +[Apache-2.0](LICENSE) diff --git a/assets/logo.svg b/assets/logo.svg new file mode 100644 index 0000000..c734279 --- /dev/null +++ b/assets/logo.svg @@ -0,0 +1,3 @@ + + + diff --git a/scripts/validate-plugin.mjs b/scripts/validate-plugin.mjs index 7fe7a3a..c5cedc4 100644 --- a/scripts/validate-plugin.mjs +++ b/scripts/validate-plugin.mjs @@ -185,15 +185,17 @@ async function main() { } } - // 7. Check MCP config - const mcpPath = path.join(repoRoot, ".mcp.json"); - if (await pathExists(mcpPath)) { - const mcp = await readJsonFile(mcpPath, "MCP config"); + // 7. Check MCP config (mcp.json or .mcp.json) + const mcpPath = path.join(repoRoot, "mcp.json"); + const mcpPathDot = path.join(repoRoot, ".mcp.json"); + const mcpFile = (await pathExists(mcpPath)) ? mcpPath : (await pathExists(mcpPathDot)) ? mcpPathDot : null; + if (mcpFile) { + const mcp = await readJsonFile(mcpFile, "MCP config"); if (mcp && !mcp.mcpServers) { - addError('.mcp.json missing "mcpServers" key'); + addError(`${path.basename(mcpFile)} missing "mcpServers" key`); } } else { - addWarning("No .mcp.json found"); + addWarning("No mcp.json found"); } // 8. Check hooks diff --git a/skills/assistant/SKILL.md b/skills/assistant/SKILL.md new file mode 100644 index 0000000..8449ad9 --- /dev/null +++ b/skills/assistant/SKILL.md @@ -0,0 +1,78 @@ +--- +name: assistant +description: Create, manage, and chat with Pinecone Assistants for document Q&A with citations. Handles all assistant operations - create, upload, sync, chat, context retrieval, and list. Recognizes natural language like "create an assistant from my docs", "ask my assistant about X", or "upload my docs to Pinecone". +--- + +# Pinecone Assistant + +Pinecone Assistant is a fully managed RAG service. Upload documents, ask questions, get cited answers. No embedding pipelines or infrastructure required. + +> All scripts are in `scripts/` relative to this skill directory. +> Run with: `uv run scripts/script_name.py [arguments]` + +## Operations + +| What to do | Script | Key args | +|---|---|---| +| Create an assistant | `scripts/create.py` | `--name` `--instructions` `--region` | +| Upload files | `scripts/upload.py` | `--assistant` `--source` `--patterns` | +| Sync files (incremental) | `scripts/sync.py` | `--assistant` `--source` `--delete-missing` `--dry-run` | +| Chat / ask a question | `scripts/chat.py` | `--assistant` `--message` | +| Get context snippets | `scripts/context.py` | `--assistant` `--query` `--top-k` | +| List assistants | `scripts/list.py` | `--files` `--json` | + +For full workflow details on any operation, read the relevant file in `references/`. + +--- + +## Natural Language Recognition + +Proactively handle these patterns without requiring explicit commands: + +**Create:** "create an assistant", "make an assistant called X", "set up an assistant for my docs" +→ See [references/create.md](references/create.md) + +**Upload:** "upload my docs", "add files to my assistant", "index my documentation" +→ See [references/upload.md](references/upload.md) + +**Sync:** "sync my docs", "update my assistant", "keep assistant in sync", "refresh from ./docs" +→ See [references/sync.md](references/sync.md) + +**Chat:** "ask my assistant about X", "what does my assistant know about X", "chat with X" +→ See [references/chat.md](references/chat.md) + +**Context:** "search my assistant for X", "find context about X" +→ See [references/context.md](references/context.md) + +**List:** "show my assistants", "what assistants do I have" +→ Run `uv run scripts/list.py` + +--- + +## Conversation Memory + +Track the last assistant used within the conversation: +- When a user creates or first uses an assistant, remember its name +- If user says "my assistant", "it", or "the assistant" → use the last one +- Briefly confirm which assistant you're using: "Asking docs-bot..." +- If ambiguous and multiple exist → ask the user to clarify + +--- + +## Multi-Step Requests + +Handle chained requests naturally. Example: + +> "Create an assistant called docs-bot, upload my ./docs folder, and ask what the main features are" + +1. `uv run scripts/create.py --name docs-bot` +2. `uv run scripts/upload.py --assistant docs-bot --source ./docs` +3. `uv run scripts/chat.py --assistant docs-bot --message "what are the main features?"` + +--- + +## Prerequisites + +- `PINECONE_API_KEY` must be available — terminal: `export PINECONE_API_KEY="your-key"`, or add to a `.env` file and run scripts with `uv run --env-file .env scripts/...` +- `uv` must be installed — [install uv](https://docs.astral.sh/uv/getting-started/installation/) +- Get a free API key at: https://app.pinecone.io/?sessionType=signup diff --git a/skills/assistant/references/chat.md b/skills/assistant/references/chat.md new file mode 100644 index 0000000..d4a984f --- /dev/null +++ b/skills/assistant/references/chat.md @@ -0,0 +1,33 @@ +# Chat with Assistant + +Send a message to an assistant and receive a cited response. + +## Arguments + +- `--assistant` (required): Assistant name +- `--message` (required): The question or message +- `--stream` (optional flag): Enable streaming for faster perceived response + +## Workflow + +1. Parse arguments. If assistant missing, run `uv run scripts/list.py --json` and ask the user to select. +2. If message missing, prompt user for their question. +3. Execute: + ```bash + uv run scripts/chat.py \ + --assistant "assistant-name" \ + --message "user's question" + ``` +4. Display: + - Assistant's response + - Citations table: citation number, source file, page numbers, position + - Token usage statistics + +**Note:** File URLs in citations are temporary signed links (~1 hour). They are not displayed in output. + +## Troubleshooting + +**Assistant not found** — run list command, check for typos. +**No response or timeout** — verify assistant has files uploaded and status is "ready" (not "indexing"). +**Empty or poor responses** — assistant may lack relevant documents; suggest upload. +**PINECONE_API_KEY not set** — export the variable or add to a `.env` file, then restart your IDE/agent session. diff --git a/skills/assistant/references/context.md b/skills/assistant/references/context.md new file mode 100644 index 0000000..5208922 --- /dev/null +++ b/skills/assistant/references/context.md @@ -0,0 +1,39 @@ +# Retrieve Context Snippets + +Get raw context snippets from an assistant's knowledge base without generating a full chat response. Useful for debugging, custom RAG workflows, or quick lookups. + +## Arguments + +- `--assistant` (required): Assistant name +- `--query` (required): Search query text +- `--top-k` (optional): Number of snippets — default `5`, max `16` +- `--snippet-size` (optional): Max tokens per snippet — default `2048` +- `--json` (optional flag): JSON output + +## Workflow + +1. Parse arguments. If missing, list assistants and prompt for selection. +2. Execute: + ```bash + uv run scripts/context.py \ + --assistant "assistant-name" \ + --query "search text" \ + --top-k 5 + ``` +3. Display snippets: file name, page numbers, relevance score, content. + +## Context vs Chat + +**Use context when:** you want raw snippets, are debugging knowledge, need source material, or are building custom workflows. +**Use chat when:** you want synthesized answers, citations in a conversational response, or multi-turn Q&A. + +## Interpreting Results + +- **Score:** Higher (closer to 1.0) = more relevant +- **Low scores (<0.5):** Weak match, assistant may need more relevant documents, or query is too broad/specific + +## Troubleshooting + +**No results** — try broader search terms; suggest uploading more documents. +**context method not available** — update SDK: `pip install --upgrade pinecone` (requires v8.0.0+). +**Assistant not found** — check name for typos, run list command. diff --git a/skills/assistant/references/create.md b/skills/assistant/references/create.md new file mode 100644 index 0000000..16d8c57 --- /dev/null +++ b/skills/assistant/references/create.md @@ -0,0 +1,43 @@ +# Create Assistant + +Create a new Pinecone Assistant with custom configuration. + +## Arguments + +- `--name` (required): Unique name for the assistant +- `--instructions` (optional): Behavior directive (tone, format, language) +- `--region` (optional): `us` or `eu` — default `us` +- `--timeout` (optional): Seconds to wait for ready status — default `30` + +## Workflow + +1. Parse arguments. If name is missing, prompt the user. +2. Ask the user about region preference — US or EU. +3. Ask if user wants custom instructions. Offer examples: + - "Use professional technical tone and cite sources" + - "Respond in Spanish with formal language" +4. Execute: + ```bash + uv run scripts/create.py \ + --name "assistant-name" \ + --instructions "instructions" \ + --region "us" + ``` +5. Show assistant name, status, and host URL. +6. Offer to run upload next. + +## Naming Conventions + +Suggest: `{purpose}-{type}` — e.g. `docs-qa`, `support-bot`, `api-helper` +Avoid: `test`, `assistant1`, `my-assistant` + +## Post-Creation + +- Save the assistant host URL shown in output (needed for MCP config) +- View and manage at: https://app.pinecone.io/organizations/-/projects/-/assistant/ + +## Troubleshooting + +**Assistant name already exists** — list assistants and suggest a different name or delete the existing one. +**Timeout** — increase `--timeout 60`, check network connectivity. +**PINECONE_API_KEY not set** — export the variable or add to a `.env` file, then restart your IDE/agent session. diff --git a/skills/assistant/references/list.md b/skills/assistant/references/list.md new file mode 100644 index 0000000..078b387 --- /dev/null +++ b/skills/assistant/references/list.md @@ -0,0 +1,31 @@ +# List Assistants + +List all Pinecone Assistants in the account with optional file details. + +## Arguments + +- `--files` (optional flag): Show file details for each assistant +- `--json` (optional flag): JSON output + +## Usage + +```bash +# Basic listing +uv run scripts/list.py + +# With file details +uv run scripts/list.py --files + +# JSON output +uv run scripts/list.py --json + +# JSON with files (useful for scripting) +uv run scripts/list.py --files --json +``` + +## Output + +**Without `--files`:** Table with name, region, status, host. +**With `--files`:** Adds file count column, plus detailed file tables per assistant showing file name, status, and ID. + +File status is color-coded: green = available, yellow = processing. diff --git a/skills/assistant/references/sync.md b/skills/assistant/references/sync.md new file mode 100644 index 0000000..504f257 --- /dev/null +++ b/skills/assistant/references/sync.md @@ -0,0 +1,51 @@ +# Sync Files + +Incrementally sync local files to an assistant — only uploads new or changed files. Uses mtime and size to detect changes. + +## Arguments + +- `--assistant` (required): Assistant name +- `--source` (required): Local file or directory path +- `--delete-missing` (optional flag): Delete files from assistant that no longer exist locally +- `--dry-run` (optional flag): Preview changes without executing +- `--yes` / `-y` (optional flag): Skip confirmation prompt + +## Workflow + +1. Parse arguments. If missing, list assistants and prompt for selection. +2. Execute: + ```bash + uv run scripts/sync.py \ + --assistant "assistant-name" \ + --source "./docs" \ + [--delete-missing] \ + [--dry-run] \ + [--yes] + ``` +3. Script compares local files against stored metadata, shows summary, asks for confirmation (unless `--yes`). + +## Flags + +- **`--delete-missing`** — removes files from the assistant that no longer exist locally. Use when cleaning up removed content. +- **`--dry-run`** — shows exactly what would change with no side effects. Always recommend this first. +- **`--yes`** — skips confirmation. Useful for automation; combine with `--dry-run` to verify first. + +## Common Workflow + +```bash +# Preview first +uv run scripts/sync.py --assistant my-docs --source ./docs --dry-run + +# Then apply +uv run scripts/sync.py --assistant my-docs --source ./docs + +# Keep in sync after git pull +git pull +uv run scripts/sync.py --assistant my-docs --source ./docs --delete-missing +``` + +## Troubleshooting + +**Files showing as changed but content unchanged** — mtime updates on save even without content changes; harmless, file will be re-uploaded. +**Sync is slow** — each update = delete + re-upload (2 operations); use `--dry-run` first to check scope. +**No supported files found** — check source contains `.md`, `.txt`, `.pdf`, `.docx`, or `.json` files not in excluded directories. diff --git a/skills/assistant/references/upload.md b/skills/assistant/references/upload.md new file mode 100644 index 0000000..0c24292 --- /dev/null +++ b/skills/assistant/references/upload.md @@ -0,0 +1,49 @@ +# Upload Files + +Upload files or directory contents to a Pinecone Assistant. + +**Supported formats:** `.md`, `.txt`, `.pdf`, `.docx`, `.json` +**Not supported:** Source code (`.py`, `.js`, `.ts`, etc.) — Assistant is optimized for natural language documents. + +## Arguments + +- `--assistant` (required): Assistant name +- `--source` (required): File path or directory to upload +- `--patterns` (optional): Comma-separated glob patterns — default: `*.md,*.txt,*.pdf,*.docx,*.json` +- `--exclude` (optional): Directories to exclude — default: `node_modules,.venv,.git,build,dist` +- `--metadata` (optional): JSON string of additional metadata + +## Workflow + +1. Parse arguments. If missing, list assistants and prompt for selection. +2. Use Glob to preview files. Show count and types. +3. **If code files detected:** Warn user and automatically filter them out: + ``` + ⚠️ Found 50 Python files. Assistant works with documents only — I'll skip the code files. + Found 25 Markdown and 8 PDF files to upload instead. + ``` +4. Confirm with the user before proceeding. +5. Execute: + ```bash + uv run scripts/upload.py \ + --assistant "assistant-name" \ + --source "./docs" \ + --patterns "*.md,*.pdf" + ``` +6. Show progress and results. Remind user files are being indexed. + +## Default Exclusions + +`node_modules`, `.venv`, `venv`, `.git`, `build`, `dist`, `__pycache__`, `.next`, `.cache` + +## Metadata Best Practices + +```bash +--metadata '{"source":"github","repo":"owner/repo","branch":"main"}' +``` + +## Troubleshooting + +**No files found** — check patterns match file types in directory; verify path exists. +**Upload failures** — check file format is supported; try smaller batches. +**>100 files** — ask user if they want to be more selective; suggest `./docs` subdirectory. diff --git a/skills/assistant/scripts/chat.py b/skills/assistant/scripts/chat.py new file mode 100755 index 0000000..637f858 --- /dev/null +++ b/skills/assistant/scripts/chat.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "pinecone>=8.0.0", +# "typer>=0.15.0", +# "rich>=13.0.0", +# ] +# /// +""" +Chat with a Pinecone Assistant and receive cited responses. + +Usage: + uv run chat.py --assistant NAME --message "Your question" [--stream] + +Environment Variables: + PINECONE_API_KEY: Required Pinecone API key + +Output: + Assistant's response with citations to source documents +""" + +import os +import typer +from rich.console import Console +from rich.panel import Panel +from rich.table import Table +from pinecone import Pinecone +from pinecone_plugins.assistant.models.chat import Message + +app = typer.Typer() +console = Console() + + +@app.command() +def main( + assistant: str = typer.Option(..., "--assistant", "-a", help="Name of the assistant to chat with"), + message: str = typer.Option(..., "--message", "-m", help="Your question or message"), +): + """Chat with a Pinecone Assistant and receive answers with source citations.""" + + # Check for API key + api_key = os.environ.get("PINECONE_API_KEY") + if not api_key: + console.print("[red]Error: PINECONE_API_KEY environment variable not set[/red]") + console.print("\nGet your API key from: https://app.pinecone.io/?sessionType=signup") + raise typer.Exit(1) + + try: + # Initialize Pinecone client + pc = Pinecone(api_key=api_key,source_tag="cursor_plugin:assistant") + asst = pc.assistant.Assistant(assistant_name=assistant) + + # Create message + user_msg = Message(role="user", content=message) + + # Display user question + console.print(Panel(f"[bold cyan]Question:[/bold cyan] {message}", border_style="cyan")) + + # Get response + with console.status("[bold blue]Thinking...[/bold blue]"): + response = asst.chat(messages=[user_msg], stream=False) + + answer_content = response.message.content + citations = response.citations if hasattr(response, 'citations') else [] + usage = response.usage if hasattr(response, 'usage') else None + + # Display assistant's response (same for both modes) + console.print("\n[bold green]Answer:[/bold green]\n") + + if answer_content: + console.print(Panel(answer_content, border_style="green", title="Assistant Response")) + else: + console.print("[yellow]No response content received[/yellow]") + + # Display citations if available + if citations and len(citations) > 0: + + console.print("\n[bold yellow]Citations:[/bold yellow]\n") + + citations_table = Table(show_header=True, header_style="bold yellow") + citations_table.add_column("#", style="dim", width=4) + citations_table.add_column("File", style="cyan", width=40) + citations_table.add_column("Pages", style="blue", width=15) + citations_table.add_column("Position", style="green", width=10) + + citation_num = 0 + for citation in citations: + # Each citation has a list of references + if hasattr(citation, 'references') and citation.references: + for reference in citation.references: + citation_num += 1 + + # Get file name + file_name = "Unknown" + if hasattr(reference, 'file') and hasattr(reference.file, 'name'): + file_name = reference.file.name + + # Get pages + pages = [] + if hasattr(reference, 'pages') and reference.pages: + pages = reference.pages + + # Format pages + if pages: + pages_str = ", ".join(str(p) for p in pages) + else: + pages_str = "N/A" + + # Get position from citation + position = getattr(citation, 'position', 'N/A') + + citations_table.add_row( + str(citation_num), + file_name, + pages_str, + str(position) + ) + + console.print(citations_table) + + # Optionally show download links + console.print("\n[dim]Tip: File URLs are temporary signed links valid for ~1 hour[/dim]") + + # Display token usage + if usage: + usage_info = f"""[dim]Tokens used:[/dim] +• Prompt: {getattr(usage, 'prompt_tokens', 'N/A')} +• Completion: {getattr(usage, 'completion_tokens', 'N/A')} +• Total: {getattr(usage, 'total_tokens', 'N/A')}""" + console.print(Panel(usage_info, border_style="dim", title="Usage Stats")) + + # Follow-up suggestion + console.print(f"\n[dim]Continue the conversation with another message using the same command[/dim]") + + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(1) + + +if __name__ == "__main__": + app() diff --git a/skills/assistant/scripts/context.py b/skills/assistant/scripts/context.py new file mode 100755 index 0000000..438c13f --- /dev/null +++ b/skills/assistant/scripts/context.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "pinecone>=8.0.0", +# "typer>=0.15.0", +# "rich>=13.0.0", +# ] +# /// +""" +Retrieve context snippets from a Pinecone Assistant's knowledge base. + +Usage: + uv run context.py --assistant NAME --query "search text" [--top-k 5] [--json] + +Environment Variables: + PINECONE_API_KEY: Required Pinecone API key + +Output: + Relevant context snippets with file sources, page numbers, and relevance scores +""" + +import os +import json as json_module +import typer +from rich.console import Console +from rich.panel import Panel +from rich.table import Table +from rich.text import Text +from pinecone import Pinecone + +app = typer.Typer() +console = Console() + + +@app.command() +def main( + assistant: str = typer.Option(..., "--assistant", "-a", help="Name of the assistant"), + query: str = typer.Option(..., "--query", "-q", help="Search query text"), + top_k: int = typer.Option(5, "--top-k", "-k", help="Number of results to return (max 16)"), + snippet_size: int = typer.Option(1024, "--snippet-size", "-s", help="Maximum tokens per snippet"), + json: bool = typer.Option(False, "--json", help="Output in JSON format"), +): + """Retrieve relevant context snippets from an assistant's knowledge base.""" + + # Check for API key + api_key = os.environ.get("PINECONE_API_KEY") + if not api_key: + console.print("[red]Error: PINECONE_API_KEY environment variable not set[/red]") + console.print("\nGet your API key from: https://app.pinecone.io/?sessionType=signup") + raise typer.Exit(1) + + try: + # Initialize Pinecone client + pc = Pinecone(api_key=api_key, source_tag="cursor_plugin:assistant") + asst = pc.assistant.Assistant(assistant_name=assistant) + + # Display query + if not json: + console.print(Panel(f"[bold cyan]Query:[/bold cyan] {query}", border_style="cyan")) + + # Retrieve context + with console.status("[bold blue]Searching knowledge base...[/bold blue]", spinner="dots"): + response = asst.context(query=query, top_k=top_k, snippet_size=snippet_size) + + # Get snippets from response + snippets = response.snippets if hasattr(response, 'snippets') else [] + + if json: + # JSON output + results = [] + for snippet in snippets: + file_name = "Unknown" + pages = [] + if hasattr(snippet, 'reference') and snippet.reference: + ref = snippet.reference + if hasattr(ref, 'file') and hasattr(ref.file, 'name'): + file_name = ref.file.name + if hasattr(ref, 'pages') and ref.pages: + pages = ref.pages + + results.append({ + "file_name": file_name, + "pages": pages, + "content": getattr(snippet, 'content', ''), + "score": getattr(snippet, 'score', 0.0), + "type": getattr(snippet, 'type', 'text'), + }) + print(json_module.dumps({"snippets": results, "count": len(results)}, indent=2)) + else: + # Rich formatted output + if not snippets or len(snippets) == 0: + console.print("[yellow]No context found for this query[/yellow]") + return + + console.print(f"\n[bold]Found {len(snippets)} relevant snippet(s):[/bold]\n") + + for idx, snippet in enumerate(snippets, 1): + # Extract file info from reference + file_name = "Unknown" + pages = [] + if hasattr(snippet, 'reference') and snippet.reference: + ref = snippet.reference + if hasattr(ref, 'file') and hasattr(ref.file, 'name'): + file_name = ref.file.name + if hasattr(ref, 'pages') and ref.pages: + pages = ref.pages + + score = getattr(snippet, 'score', 0.0) + content = getattr(snippet, 'content', '') + + # Create header + header = f"#{idx} - {file_name}" + if pages: + pages_str = ", ".join(str(p) for p in pages) + header += f" (Page {pages_str})" + header += f" - Score: {score:.3f}" if isinstance(score, (int, float)) else f" - Score: {score}" + + console.print(Panel( + content, + title=header, + border_style="blue", + subtitle=f"[dim]Relevance: {score:.2%}[/dim]" if isinstance(score, (int, float)) else None + )) + console.print() + + # Suggest next action + next_action = f"""[bold]Next steps:[/bold] +• Ask a question: [cyan]/pinecone:assistant-chat assistant {assistant} message [your question][/cyan] +• Upload more files: [cyan]/pinecone:assistant-upload assistant {assistant} source [path][/cyan]""" + console.print(Panel(next_action, title="What's Next?", border_style="green")) + + except AttributeError as e: + # Handle case where context method doesn't exist or response structure is different + console.print(f"[red]Error: Context retrieval failed[/red]") + console.print(f"[dim]Details: {e}[/dim]") + console.print("\n[yellow]Note:[/yellow] Context API requires SDK version with assistant.context() support") + console.print("\n[yellow]Try using chat instead:[/yellow]") + console.print(f" /pinecone:assistant-chat assistant {assistant} message \"{query}\"") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(1) + + +if __name__ == "__main__": + app() diff --git a/skills/assistant/scripts/create.py b/skills/assistant/scripts/create.py new file mode 100755 index 0000000..724aa1c --- /dev/null +++ b/skills/assistant/scripts/create.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "pinecone>=8.0.0", +# "typer>=0.15.0", +# "rich>=13.0.0", +# ] +# /// +""" +Create a new Pinecone Assistant. + +Usage: + uv run create.py --name ASSISTANT_NAME [--instructions TEXT] [--region us|eu] [--timeout SECONDS] + +Environment Variables: + PINECONE_API_KEY: Required Pinecone API key + +Output: + Success message with assistant details including host URL for MCP configuration +""" + +import os +import typer +from rich.console import Console +from rich.panel import Panel +from rich.table import Table +from pinecone import Pinecone + +app = typer.Typer() +console = Console() + + +@app.command() +def main( + name: str = typer.Option(..., "--name", "-n", help="Unique name for the assistant"), + instructions: str = typer.Option( + "", + "--instructions", + "-i", + help="Instructions for assistant behavior (max 16KB)", + ), + region: str = typer.Option( + "us", + "--region", + "-r", + help="Deployment region: 'us' or 'eu'", + ), + timeout: int = typer.Option( + 30, + "--timeout", + "-t", + help="Seconds to wait for ready status", + ), +): + """Create a new Pinecone Assistant for document Q&A with citations.""" + + # Validate region + if region not in ["us", "eu"]: + console.print("[red]Error: Region must be 'us' or 'eu'[/red]") + raise typer.Exit(1) + + # Check for API key + api_key = os.environ.get("PINECONE_API_KEY") + if not api_key: + console.print("[red]Error: PINECONE_API_KEY environment variable not set[/red]") + console.print("\nGet your API key from: https://app.pinecone.io/?sessionType=signup") + raise typer.Exit(1) + + try: + # Initialize Pinecone client + with console.status(f"[bold blue]Creating assistant '{name}'...[/bold blue]"): + pc = Pinecone(api_key=api_key, source_tag="cursor_plugin:assistant") + + # Create assistant + assistant = pc.assistant.create_assistant( + assistant_name=name, + instructions=instructions if instructions else None, + region=region, + timeout=timeout, + metadata={"agentic-ide-source":"cursor-plugin"} + ) + + # Success message + console.print(f"\n[bold green]✓ Assistant '{name}' created successfully![/bold green]\n") + + # Display assistant details in a table + table = Table(show_header=False, box=None) + table.add_column("Property", style="cyan") + table.add_column("Value", style="white") + + table.add_row("Name", assistant.name) + table.add_row("Region", region) + table.add_row("Status", f"[yellow]{assistant.status}[/yellow]") + table.add_row("Host", getattr(assistant, "host", "N/A")) + if instructions: + instructions_preview = instructions[:80] + "..." if len(instructions) > 80 else instructions + table.add_row("Instructions", instructions_preview) + + console.print(table) + + # MCP configuration info + host = getattr(assistant, "host", "") + if host: + mcp_info = f"""[bold]MCP Endpoint:[/bold] +{host}/mcp/assistants/{name} + +[bold]Set environment variable:[/bold] +export PINECONE_ASSISTANT_HOST="{host}" +""" + console.print(Panel(mcp_info, title="MCP Configuration", border_style="blue")) + + # Next steps + next_steps = f"""[bold]Next steps:[/bold] +1. Upload files: [cyan]/pinecone:assistant-upload assistant {name} source [path][/cyan] +2. Chat: [cyan]/pinecone:assistant-chat assistant {name} message [your question][/cyan] +3. Get context: [cyan]/pinecone:assistant-context assistant {name} query [search][/cyan]""" + + console.print(Panel(next_steps, title="What's Next?", border_style="green")) + + except Exception as e: + console.print(f"[red]Error creating assistant: {e}[/red]") + raise typer.Exit(1) + + +if __name__ == "__main__": + app() diff --git a/skills/assistant/scripts/list.py b/skills/assistant/scripts/list.py new file mode 100755 index 0000000..c48c06b --- /dev/null +++ b/skills/assistant/scripts/list.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "pinecone>=8.0.0", +# "typer>=0.15.0", +# "rich>=13.0.0", +# ] +# /// +""" +List all Pinecone Assistants in the account. + +Usage: + uv run list.py [--json] [--files] + +Environment Variables: + PINECONE_API_KEY: Required Pinecone API key + +Output: + Formatted table or JSON list of assistants with name, region, status, and host + Optionally include files for each assistant with --files flag +""" + +import os +import sys +import json +import typer +from rich.console import Console +from rich.table import Table +from rich.panel import Panel +from pinecone import Pinecone + +app = typer.Typer() +console = Console() + + +@app.command() +def main( + json_output: bool = typer.Option(False, "--json", help="Output in JSON format"), + files: bool = typer.Option(False, "--files", "-f", help="Include file listing for each assistant"), +): + """List all Pinecone Assistants in your account.""" + + # Check for API key + api_key = os.environ.get('PINECONE_API_KEY') + if not api_key: + console.print("[red]Error: PINECONE_API_KEY environment variable not set[/red]") + console.print("\nGet your API key from: https://app.pinecone.io/?sessionType=signup") + raise typer.Exit(1) + + try: + # Initialize Pinecone client + pc = Pinecone(api_key=api_key, source_tag="cursor_plugin:assistant") + + # List assistants + assistants = pc.assistant.list_assistants() + + if not assistants: + if json_output: + print(json.dumps({"assistants": [], "count": 0})) + else: + console.print("[yellow]No assistants found.[/yellow]\n") + console.print("Create your first assistant with:") + console.print(" [cyan]/pinecone:assistant-create name [assistant-name][/cyan]") + return + + if json_output: + # JSON output + assistants_data = [] + for asst in assistants: + asst_data = { + "name": asst.name, + "region": getattr(asst, 'region', 'unknown'), + "status": asst.status, + "host": getattr(asst, 'host', ''), + } + + if files: + # Get files for this assistant + try: + assistant_instance = pc.assistant.Assistant(assistant_name=asst.name) + file_list = assistant_instance.list_files() + asst_data["files"] = [ + { + "name": f.name, + "id": f.id, + "status": f.status, + "metadata": getattr(f, 'metadata', {}), + } + for f in file_list + ] + asst_data["file_count"] = len(file_list) + except Exception as e: + asst_data["files"] = [] + asst_data["file_count"] = 0 + asst_data["file_error"] = str(e) + + assistants_data.append(asst_data) + + result = { + "assistants": assistants_data, + "count": len(assistants) + } + print(json.dumps(result, indent=2)) + else: + # Rich table output + console.print(f"\n[bold]Found {len(assistants)} assistant(s):[/bold]\n") + + # Assistants table + table = Table(show_header=True, header_style="bold cyan") + table.add_column("Name", style="green", width=30) + table.add_column("Region", style="blue", width=10) + table.add_column("Status", style="yellow", width=15) + if files: + table.add_column("Files", style="magenta", width=10) + table.add_column("Host", style="dim", width=40 if files else 50) + + for asst in assistants: + name = asst.name + region = getattr(asst, 'region', 'unknown') + status = asst.status + host = getattr(asst, 'host', '') + + # Color code status + if status == 'ready': + status_display = f"[green]{status}[/green]" + elif status == 'indexing': + status_display = f"[yellow]{status}[/yellow]" + else: + status_display = status + + if files: + # Get file count for this assistant + try: + assistant_instance = pc.assistant.Assistant(assistant_name=asst.name) + file_list = assistant_instance.list_files() + file_count = str(len(file_list)) + except Exception: + file_count = "?" + + table.add_row(name, region, status_display, file_count, host) + else: + table.add_row(name, region, status_display, host) + + console.print(table) + console.print() + + # If --files flag is set, show detailed file listing for each assistant + if files: + console.print("[bold]File Details:[/bold]\n") + for asst in assistants: + try: + assistant_instance = pc.assistant.Assistant(assistant_name=asst.name) + file_list = assistant_instance.list_files() + + if file_list: + # Create a table for this assistant's files + file_table = Table(show_header=True, header_style="bold blue", title=f"[cyan]{asst.name}[/cyan]") + file_table.add_column("#", style="dim", width=4) + file_table.add_column("File Name", style="green", width=50) + file_table.add_column("Status", style="yellow", width=15) + file_table.add_column("ID", style="dim", width=30) + + for idx, file_obj in enumerate(file_list, 1): + file_name = file_obj.name + file_id = file_obj.id + file_status = file_obj.status + + # Color code file status + if file_status == 'available': + file_status_display = f"[green]{file_status}[/green]" + elif file_status == 'processing': + file_status_display = f"[yellow]{file_status}[/yellow]" + else: + file_status_display = file_status + + file_table.add_row(str(idx), file_name, file_status_display, file_id) + + console.print(file_table) + console.print() + else: + console.print(f"[dim]{asst.name}: No files uploaded[/dim]\n") + except Exception as e: + console.print(f"[red]Error listing files for {asst.name}: {e}[/red]\n") + + # Next steps panel + next_steps = """[bold]Next steps:[/bold] +• List with files: [cyan]/pinecone:assistant-list --files[/cyan] +• Chat: [cyan]/pinecone:assistant-chat assistant [name] message [your question][/cyan] +• Upload: [cyan]/pinecone:assistant-upload assistant [name] source [path][/cyan] +• Context: [cyan]/pinecone:assistant-context assistant [name] query [search][/cyan]""" + + console.print(Panel(next_steps, title="Available Commands", border_style="blue")) + + except Exception as e: + console.print(f"[red]Error listing assistants: {e}[/red]") + raise typer.Exit(1) + + +if __name__ == "__main__": + app() diff --git a/skills/assistant/scripts/sync.py b/skills/assistant/scripts/sync.py new file mode 100644 index 0000000..94c884f --- /dev/null +++ b/skills/assistant/scripts/sync.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "pinecone>=8.0.0", +# "typer>=0.15.0", +# "rich>=13.0.0", +# ] +# /// +""" +Sync local files to a Pinecone Assistant, only uploading new or changed files. + +Usage: + uv run sync.py --assistant NAME --source PATH [--delete-missing] [--dry-run] + +Environment Variables: + PINECONE_API_KEY: Required Pinecone API key + +Output: + Shows files to add, update, and optionally delete, with confirmation prompt +""" + +import os +import hashlib +from pathlib import Path +from datetime import datetime, timezone +import typer +from rich.console import Console +from rich.panel import Panel +from rich.table import Table +from rich.progress import Progress, SpinnerColumn, TextColumn +from pinecone import Pinecone + +app = typer.Typer() +console = Console() + +# Supported file extensions +SUPPORTED_EXTENSIONS = {'.md', '.txt', '.pdf', '.docx', '.json'} + +# Directories to exclude +EXCLUDE_DIRS = {'node_modules', '.venv', '.git', 'build', 'dist', '__pycache__', '.pytest_cache'} + + +def should_exclude_path(path: Path, source_root: Path) -> bool: + """Check if path should be excluded based on directory patterns.""" + try: + rel_path = path.relative_to(source_root) + for part in rel_path.parts: + if part in EXCLUDE_DIRS or part.startswith('.'): + return True + except ValueError: + return True + return False + + +def find_files(source_path: Path) -> list[Path]: + """Find all supported files in source directory, excluding common build/dependency dirs.""" + files = [] + + if source_path.is_file(): + if source_path.suffix.lower() in SUPPORTED_EXTENSIONS: + return [source_path] + else: + return [] + + for file_path in source_path.rglob('*'): + if file_path.is_file(): + if file_path.suffix.lower() in SUPPORTED_EXTENSIONS: + if not should_exclude_path(file_path, source_path): + files.append(file_path) + + return sorted(files) + + +def get_file_info(file_path: Path): + """Get file modification time and size.""" + stat = file_path.stat() + return { + 'mtime': stat.st_mtime, + 'size': stat.st_size, + } + + +def file_changed(local_info: dict, remote_metadata: dict) -> bool: + """Check if local file differs from remote using mtime and size.""" + remote_mtime = remote_metadata.get('mtime') + remote_size = remote_metadata.get('size') + + if remote_mtime is None or remote_size is None: + # No stored metadata, assume changed + return True + + return (local_info['mtime'] != float(remote_mtime) or + local_info['size'] != int(remote_size)) + + +@app.command() +def main( + assistant: str = typer.Option(..., "--assistant", "-a", help="Name of the assistant"), + source: str = typer.Option(..., "--source", "-s", help="Local file or directory path"), + delete_missing: bool = typer.Option(False, "--delete-missing", help="Delete files from assistant that don't exist locally"), + dry_run: bool = typer.Option(False, "--dry-run", help="Show what would change without making changes"), + yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"), +): + """Sync local files to Pinecone Assistant, only uploading new or changed files.""" + + # Check for API key + api_key = os.environ.get("PINECONE_API_KEY") + if not api_key: + console.print("[red]Error: PINECONE_API_KEY environment variable not set[/red]") + console.print("\nGet your API key from: https://app.pinecone.io/?sessionType=signup") + raise typer.Exit(1) + + # Validate source path + source_path = Path(source).resolve() + if not source_path.exists(): + console.print(f"[red]Error: Source path does not exist: {source}[/red]") + raise typer.Exit(1) + + try: + # Initialize Pinecone client + pc = Pinecone(api_key=api_key, source_tag="cursor_plugin:assistant") + asst = pc.assistant.Assistant(assistant_name=assistant) + + console.print(Panel( + f"[bold cyan]Assistant:[/bold cyan] {assistant}\n" + f"[bold cyan]Source:[/bold cyan] {source_path}", + title="Sync Configuration", + border_style="cyan" + )) + + # Step 1: Get current files in assistant + with console.status("[bold blue]Fetching assistant files...[/bold blue]", spinner="dots"): + remote_files = asst.list_files() + + # Build map of file_path -> file object + remote_file_map = {} + for f in remote_files: + metadata = getattr(f, 'metadata', {}) or {} + file_path = metadata.get('file_path', f.name) + remote_file_map[file_path] = { + 'file_obj': f, + 'metadata': metadata + } + + console.print(f"[dim]Found {len(remote_files)} file(s) in assistant[/dim]\n") + + # Step 2: Find local files + with console.status("[bold blue]Scanning local files...[/bold blue]", spinner="dots"): + local_files = find_files(source_path) + + if not local_files: + console.print("[yellow]No supported files found in source path[/yellow]") + console.print(f"Supported extensions: {', '.join(sorted(SUPPORTED_EXTENSIONS))}") + raise typer.Exit(0) + + console.print(f"[dim]Found {len(local_files)} local file(s)[/dim]\n") + + # Step 3: Determine what needs syncing + to_upload = [] # New files + to_update = [] # Changed files (delete + re-upload) + to_delete = [] # Files in assistant but not local + unchanged = [] # Files that match + + # Track which remote files we've seen + seen_remote_paths = set() + + for local_file in local_files: + # Get relative path from source root + if source_path.is_file(): + rel_path = local_file.name + else: + rel_path = str(local_file.relative_to(source_path)) + + local_info = get_file_info(local_file) + + if rel_path in remote_file_map: + # File exists remotely, check if changed + seen_remote_paths.add(rel_path) + remote_info = remote_file_map[rel_path] + + if file_changed(local_info, remote_info['metadata']): + to_update.append({ + 'local_path': local_file, + 'rel_path': rel_path, + 'remote_file_id': remote_info['file_obj'].id, + 'local_info': local_info + }) + else: + unchanged.append(rel_path) + else: + # New file + to_upload.append({ + 'local_path': local_file, + 'rel_path': rel_path, + 'local_info': local_info + }) + + # Find files to delete (in remote but not local) + if delete_missing: + for rel_path, remote_info in remote_file_map.items(): + if rel_path not in seen_remote_paths: + to_delete.append({ + 'rel_path': rel_path, + 'remote_file_id': remote_info['file_obj'].id + }) + + # Step 4: Show summary + console.print("[bold]Sync Summary:[/bold]\n") + + summary_table = Table(show_header=True, header_style="bold cyan") + summary_table.add_column("Action", style="yellow", width=15) + summary_table.add_column("Count", style="green", width=10) + + summary_table.add_row("New files", str(len(to_upload))) + summary_table.add_row("Updated files", str(len(to_update))) + if delete_missing: + summary_table.add_row("Deleted files", str(len(to_delete))) + summary_table.add_row("Unchanged", str(len(unchanged))) + + console.print(summary_table) + console.print() + + # Show details if there are changes + if to_upload: + console.print("[bold green]Files to upload:[/bold green]") + for item in to_upload[:10]: # Show first 10 + console.print(f" + {item['rel_path']}") + if len(to_upload) > 10: + console.print(f" ... and {len(to_upload) - 10} more") + console.print() + + if to_update: + console.print("[bold yellow]Files to update:[/bold yellow]") + for item in to_update[:10]: + console.print(f" ~ {item['rel_path']}") + if len(to_update) > 10: + console.print(f" ... and {len(to_update) - 10} more") + console.print() + + if to_delete: + console.print("[bold red]Files to delete:[/bold red]") + for item in to_delete[:10]: + console.print(f" - {item['rel_path']}") + if len(to_delete) > 10: + console.print(f" ... and {len(to_delete) - 10} more") + console.print() + + # If no changes, exit early + if not (to_upload or to_update or to_delete): + console.print("[green]✓ All files are up to date![/green]") + return + + # Dry run mode + if dry_run: + console.print("[yellow]Dry run mode: No changes made[/yellow]") + return + + # Confirmation prompt + if not yes: + proceed = typer.confirm("\nProceed with sync?") + if not proceed: + console.print("[yellow]Sync cancelled[/yellow]") + return + + console.print() + + # Step 5: Execute sync + uploaded_count = 0 + updated_count = 0 + deleted_count = 0 + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console + ) as progress: + + # Upload new files + if to_upload: + task = progress.add_task(f"Uploading {len(to_upload)} new file(s)...", total=len(to_upload)) + for item in to_upload: + try: + asst.upload_file( + file_path=str(item['local_path']), + metadata={ + 'file_path': item['rel_path'], + 'mtime': item['local_info']['mtime'], + 'size': item['local_info']['size'], + 'uploaded_at': datetime.now(timezone.utc).isoformat(), + 'source': 'sync_script', + }, + timeout=None + ) + uploaded_count += 1 + progress.advance(task) + except Exception as e: + console.print(f"[red]Failed to upload {item['rel_path']}: {e}[/red]") + + # Update changed files (delete old + upload new) + if to_update: + task = progress.add_task(f"Updating {len(to_update)} file(s)...", total=len(to_update) * 2) + for item in to_update: + try: + # Delete old version + asst.delete_file(file_id=item['remote_file_id']) + progress.advance(task) + + # Upload new version + asst.upload_file( + file_path=str(item['local_path']), + metadata={ + 'file_path': item['rel_path'], + 'mtime': item['local_info']['mtime'], + 'size': item['local_info']['size'], + 'uploaded_at': datetime.now(timezone.utc).isoformat(), + 'source': 'sync_script', + }, + timeout=None + ) + updated_count += 1 + progress.advance(task) + except Exception as e: + console.print(f"[red]Failed to update {item['rel_path']}: {e}[/red]") + + # Delete missing files + if to_delete: + task = progress.add_task(f"Deleting {len(to_delete)} file(s)...", total=len(to_delete)) + for item in to_delete: + try: + asst.delete_file(file_id=item['remote_file_id']) + deleted_count += 1 + progress.advance(task) + except Exception as e: + console.print(f"[red]Failed to delete {item['rel_path']}: {e}[/red]") + + # Final summary + console.print() + console.print(Panel( + f"[green]✓ Sync complete![/green]\n\n" + f"Uploaded: {uploaded_count}\n" + f"Updated: {updated_count}\n" + + (f"Deleted: {deleted_count}\n" if delete_missing else "") + + f"Unchanged: {len(unchanged)}", + title="Results", + border_style="green" + )) + + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(1) + + +if __name__ == "__main__": + app() diff --git a/skills/assistant/scripts/upload.py b/skills/assistant/scripts/upload.py new file mode 100755 index 0000000..57ec966 --- /dev/null +++ b/skills/assistant/scripts/upload.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "pinecone>=8.0.0", +# "typer>=0.15.0", +# "rich>=13.0.0", +# ] +# /// +""" +Upload files or repository contents to a Pinecone Assistant. + +IMPORTANT: Only uploads DOCUMENTATION and DATA files. +Supported: DOCX (.docx), JSON (.json), Markdown (.md), PDF (.pdf), Text (.txt) +Code files are NOT supported by Pinecone Assistant. + +Usage: + uv run upload.py --assistant NAME --source PATH [--patterns "*.md,*.pdf,*.docx"] + +Environment Variables: + PINECONE_API_KEY: Required Pinecone API key + +Output: + Progress updates and summary of uploaded files +""" + +import os +import glob +from pathlib import Path +from typing import List +from datetime import datetime, timezone +import typer +from rich.console import Console +from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn +from rich.table import Table +from rich.panel import Panel +from pinecone import Pinecone + +app = typer.Typer() +console = Console() + +# Default file patterns - DOCUMENTATION ONLY +# Assistant supports: DOCX, JSON, Markdown, PDF, Text +DEFAULT_PATTERNS = ["**/*.md", "**/*.txt", "**/*.pdf", "**/*.docx", "**/*.json"] + +# Default directories to exclude +DEFAULT_EXCLUDES = ["node_modules", ".venv", "venv", ".git", "build", "dist", "__pycache__", ".next", ".cache"] + + +def find_files(source_path: str, patterns: List[str], excludes: List[str]) -> List[Path]: + """Find files matching patterns, excluding certain directories.""" + source = Path(source_path) + + if not source.exists(): + console.print(f"[red]Error: Path '{source_path}' does not exist[/red]") + raise typer.Exit(1) + + # If it's a single file, return it + if source.is_file(): + return [source] + + # Otherwise, scan directory + files = [] + for pattern in patterns: + matched = glob.glob(str(source / pattern), recursive=True) + files.extend([Path(f) for f in matched]) + + # Filter out excluded directories + filtered_files = [] + for file_path in files: + # Check if any exclude pattern is in the path + if not any(excl in str(file_path) for excl in excludes): + filtered_files.append(file_path) + + return sorted(set(filtered_files)) + + +@app.command() +def main( + assistant: str = typer.Option(..., "--assistant", "-a", help="Name of the assistant to upload to"), + source: str = typer.Option(..., "--source", "-s", help="File or directory path to upload"), + patterns: str = typer.Option( + ",".join(DEFAULT_PATTERNS), + "--patterns", + "-p", + help="Comma-separated glob patterns for documentation files (e.g., '*.md,*.pdf')", + ), + exclude: str = typer.Option( + ",".join(DEFAULT_EXCLUDES), + "--exclude", + "-e", + help="Comma-separated directories to exclude", + ), + metadata_json: str = typer.Option( + "", + "--metadata", + "-m", + help="Additional metadata as JSON string", + ), +): + """Upload documentation files to a Pinecone Assistant. + + NOTE: Only documentation files (markdown, text, PDF) are supported. + Code files are not recommended for Pinecone Assistant. + """ + + # Check for API key + api_key = os.environ.get("PINECONE_API_KEY") + if not api_key: + console.print("[red]Error: PINECONE_API_KEY environment variable not set[/red]") + console.print("\nGet your API key from: https://app.pinecone.io/?sessionType=signup") + raise typer.Exit(1) + + # Parse patterns and excludes + pattern_list = [p.strip() for p in patterns.split(",")] + exclude_list = [e.strip() for e in exclude.split(",")] + + # Parse additional metadata if provided + extra_metadata = {} + if metadata_json: + import json + try: + extra_metadata = json.loads(metadata_json) + except json.JSONDecodeError: + console.print("[red]Error: Invalid JSON in --metadata parameter[/red]") + raise typer.Exit(1) + + try: + # Initialize Pinecone client + pc = Pinecone(api_key=api_key, source_tag="cursor_plugin:assistant") + asst = pc.assistant.Assistant(assistant_name=assistant) + + # Find files to upload + console.print(f"\n[bold]Scanning for documentation files in:[/bold] {source}") + console.print(f"[dim]Patterns: {', '.join(pattern_list)}[/dim]\n") + + files = find_files(source, pattern_list, exclude_list) + + if not files: + console.print("[yellow]No documentation files found matching the specified patterns[/yellow]") + console.print("\n[dim]Tip: Pinecone Assistant works with .md, .txt, and .pdf files[/dim]") + return + + console.print(f"[green]Found {len(files)} documentation file(s) to upload[/green]\n") + + # Upload files with progress bar + uploaded = 0 + failed = 0 + failed_files = [] + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TaskProgressColumn(), + console=console, + ) as progress: + task = progress.add_task("[cyan]Uploading files...", total=len(files)) + + for file_path in files: + try: + # Build metadata + rel_path = os.path.relpath(str(file_path), source) + stat = file_path.stat() + metadata = { + "source": "upload_script", + "file_path": rel_path, + "file_type": file_path.suffix, + "content_type": "documentation", + "mtime": stat.st_mtime, + "size": stat.st_size, + "uploaded_at": datetime.now(timezone.utc).isoformat(), + **extra_metadata, + } + + # Upload file + asst.upload_file( + file_path=str(file_path), + metadata=metadata, + timeout=None, + ) + uploaded += 1 + progress.update(task, advance=1, description=f"[cyan]Uploaded: {rel_path}") + + except Exception as e: + failed += 1 + failed_files.append((str(file_path), str(e))) + progress.update(task, advance=1) + + # Summary table + console.print() + summary = Table(show_header=False, box=None) + summary.add_column("Status", style="bold") + summary.add_column("Count") + + summary.add_row("[green]✓ Uploaded[/green]", str(uploaded)) + if failed > 0: + summary.add_row("[red]✗ Failed[/red]", str(failed)) + + console.print(Panel(summary, title="Upload Summary", border_style="blue")) + + # Show failed files if any + if failed_files: + console.print("\n[bold red]Failed uploads:[/bold red]") + for file_path, error in failed_files: + console.print(f" • {file_path}: [red]{error}[/red]") + + # Next steps + if uploaded > 0: + next_steps = f"""[bold]Next steps:[/bold] +• Chat: [cyan]/pinecone:assistant-chat assistant {assistant} message [your question][/cyan] +• Context: [cyan]/pinecone:assistant-context assistant {assistant} query [search][/cyan] + +[dim]Note: Files are being processed and will be available shortly[/dim]""" + console.print(Panel(next_steps, title="What's Next?", border_style="green")) + + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(1) + + +if __name__ == "__main__": + app() diff --git a/skills/cli/SKILL.md b/skills/cli/SKILL.md new file mode 100644 index 0000000..c7c30e7 --- /dev/null +++ b/skills/cli/SKILL.md @@ -0,0 +1,156 @@ +--- +name: cli +description: Guide for using the Pinecone CLI (pc) to manage Pinecone resources from the terminal. The CLI supports ALL index types (standard, integrated, sparse) and all vector operations — unlike the MCP which only supports integrated indexes. Use for batch operations, vector management, backups, namespaces, CI/CD automation, and full control over Pinecone resources. +argument-hint: install | auth | index [op] | vector [op] | backup | namespace +--- + +# Pinecone CLI (`pc`) + +Manage Pinecone from the terminal. The CLI is especially valuable for vector operations across **all index types** — something the MCP currently can't do. + +## CLI vs MCP + +| | CLI | MCP | +|---|---|---| +| Index types | All (standard, integrated, sparse) | Integrated only | +| Vector ops (upsert, query, fetch, update, delete) | ✅ | ❌ | +| Text search on integrated indexes | ✅ | ✅ | +| Backups, namespaces, org/project mgmt | ✅ | ❌ | +| CI/CD / scripting | ✅ | ❌ | + +--- + +## Setup + +### Install (macOS) +```bash +brew tap pinecone-io/tap +brew install pinecone-io/tap/pinecone +``` + +Other platforms (Linux, Windows) — download from [GitHub Releases](https://github.com/pinecone-io/cli/releases). + +### Authenticate + +```bash +# Interactive (recommended for local dev) +pc login +pc target -o "my-org" -p "my-project" + +# Service account (recommended for CI/CD) +pc auth configure --client-id "$PINECONE_CLIENT_ID" --client-secret "$PINECONE_CLIENT_SECRET" + +# API key (quick testing) +pc config set-api-key $PINECONE_API_KEY +``` + +Check status: `pc auth status` · `pc target --show` + +> **Note for agent sessions**: If you need to run `pc login` inside an agent loop, the browser auth link may not surface correctly. It's best to authenticate **before** starting an agent session. Run `pc login` in your terminal directly, then invoke the agent once you're authenticated. + +### Authenticating the CLI does not set `PINECONE_API_KEY` + +`pc login` authenticates the CLI tool itself — it does **not** set `PINECONE_API_KEY` in your environment. Python scripts, Node.js SDKs, and other tools that use the Pinecone SDK need `PINECONE_API_KEY` set separately. + +Use the CLI to create a key and export it in one step: + +```bash +KEY=$(pc api-key create --name agent-sdk-key --json | jq -r '.value') +export PINECONE_API_KEY="$KEY" +``` + +Without `jq`: run `pc api-key create --name agent-sdk-key --json` and copy the `"value"` field manually. + +--- + +## Common Commands + +| Task | Command | +|---|---| +| List indexes | `pc index list` | +| Create serverless index | `pc index create -n my-index -d 1536 -m cosine -c aws -r us-east-1` | +| Index stats | `pc index stats -n my-index` | +| Upload vectors from file | `pc index vector upsert -n my-index --file ./vectors.json` | +| Query by vector | `pc index vector query -n my-index --vector '[0.1, ...]' -k 10 --include-metadata` | +| Query by vector ID | `pc index vector query -n my-index --id "doc-123" -k 10` | +| Fetch vectors by ID | `pc index vector fetch -n my-index --ids '["vec1","vec2"]'` | +| List vector IDs | `pc index vector list -n my-index` | +| Delete vectors by filter | `pc index vector delete -n my-index --filter '{"genre":"classical"}'` | +| List namespaces | `pc index namespace list -n my-index` | +| Create backup | `pc backup create -i my-index -n "my-backup"` | +| JSON output (for scripting) | Add `-j` to any command | + +--- + +## Interesting Things You Can Do + +### Query with custom vectors (not just text) +Unlike the MCP, the CLI lets you query any index with raw vector values — useful when you generate embeddings externally (OpenAI, HuggingFace, etc.): +```bash +pc index vector query -n my-index \ + --vector '[0.1, 0.2, ..., 0.9]' \ + --filter '{"source":{"$eq":"docs"}}' \ + -k 20 --include-metadata +``` + +### Pipe embeddings directly into queries +```bash +jq -c '.embedding' doc.json | pc index vector query -n my-index --vector - -k 10 +``` + +### Bulk metadata update with preview +```bash +# Preview first +pc index vector update -n my-index \ + --filter '{"env":{"$eq":"staging"}}' \ + --metadata '{"env":"production"}' \ + --dry-run + +# Apply +pc index vector update -n my-index \ + --filter '{"env":{"$eq":"staging"}}' \ + --metadata '{"env":"production"}' +``` + +### Backup and restore +```bash +# Snapshot before a migration +pc backup create -i my-index -n "pre-migration" + +# Restore to a new index if something goes wrong +pc backup restore -i -n my-index-restored +``` + +### Automate in CI/CD +```bash +export PINECONE_CLIENT_ID="..." +export PINECONE_CLIENT_SECRET="..." +pc auth configure --client-id "$PINECONE_CLIENT_ID" --client-secret "$PINECONE_CLIENT_SECRET" +pc index vector upsert -n my-index --file ./vectors.jsonl --batch-size 1000 +``` + +### Script against JSON output +```bash +# Get all index names as a list +pc index list -j | jq -r '.[] | .name' + +# Check if an index exists before creating +if ! pc index describe -n my-index -j 2>/dev/null | jq -e '.name' > /dev/null; then + pc index create -n my-index -d 1536 -m cosine -c aws -r us-east-1 +fi +``` + +--- + +## Reference Files + +- [Full command reference](references/command-reference.md) — all commands with flags and examples +- [Troubleshooting & best practices](references/troubleshooting.md) + +## Documentation + +- [CLI Quickstart](https://docs.pinecone.io/reference/cli/quickstart) +- [Command Reference](https://docs.pinecone.io/reference/cli/command-reference) +- [Authentication](https://docs.pinecone.io/reference/cli/authentication) +- [Target Context](https://docs.pinecone.io/reference/cli/target-context) +- [GitHub Releases](https://github.com/pinecone-io/cli/releases) diff --git a/skills/cli/references/command-reference.md b/skills/cli/references/command-reference.md new file mode 100644 index 0000000..0091ad2 --- /dev/null +++ b/skills/cli/references/command-reference.md @@ -0,0 +1,237 @@ +# Pinecone CLI — Full Command Reference + +## Index Management + +### Create Index +```bash +# Serverless index +pc index create -n my-index -d 1536 -m cosine -c aws -r us-east-1 + +# With integrated embedding model +pc index create -n my-index -m cosine -c aws -r us-east-1 \ + --model multilingual-e5-large \ + --field-map text=chunk_text + +# Sparse vector index +pc index create -n sparse-index -m dotproduct -c aws -r us-east-1 --vector-type sparse + +# With deletion protection +pc index create -n my-index -d 1536 -m cosine -c aws -r us-east-1 --deletion-protection enabled + +# From collection +pc index create -n my-index -d 1536 -m cosine -c aws -r us-east-1 --source-collection my-collection +``` + +### List / Describe / Stats +```bash +pc index list # Summary view +pc index list --wide # Additional columns (host, embed, tags) +pc index list -j # JSON output + +pc index describe -n my-index +pc index describe -n my-index -j + +pc index stats -n my-index +pc index stats -n my-index --filter '{"genre":{"$eq":"rock"}}' +``` + +### Configure / Delete +```bash +# Enable deletion protection +pc index configure -n my-index --deletion-protection enabled + +# Add tags +pc index configure -n my-index --tags environment=production,team=ml + +# Switch to dedicated read capacity +pc index configure -n my-index \ + --read-mode dedicated \ + --read-node-type b1 \ + --read-shards 2 \ + --read-replicas 2 + +pc index delete -n my-index +``` + +--- + +## Vector Operations + +### Upsert +```bash +# From JSON file (with "vectors" array) +pc index vector upsert -n my-index --file ./vectors.json + +# From JSONL file (one vector per line) +pc index vector upsert -n my-index --file ./vectors.jsonl + +# Inline JSON +pc index vector upsert -n my-index --file '{"vectors": [{"id": "vec1", "values": [0.1, 0.2, 0.3]}]}' + +# From stdin +cat vectors.json | pc index vector upsert -n my-index --file - + +# With namespace, custom batch size +pc index vector upsert -n my-index --namespace tenant-a --file ./vectors.json --batch-size 1000 +``` + +**File formats:** +```json +// JSON (vectors.json) +{"vectors": [{"id": "vec1", "values": [0.1, 0.2, 0.3], "metadata": {"genre": "comedy"}}]} + +// JSONL (vectors.jsonl) +{"id": "vec1", "values": [0.1, 0.2, 0.3], "metadata": {"genre": "comedy"}} +{"id": "vec2", "values": [0.4, 0.5, 0.6], "metadata": {"genre": "drama"}} +``` + +### Query +```bash +# By vector values +pc index vector query -n my-index --vector '[0.1, 0.2, 0.3]' -k 10 --include-metadata + +# By vector ID +pc index vector query -n my-index --id "doc-123" -k 10 --include-metadata + +# With metadata filter +pc index vector query -n my-index \ + --vector '[0.1, 0.2, 0.3]' \ + --filter '{"genre":{"$eq":"sci-fi"}}' \ + --include-metadata + +# Sparse vectors +pc index vector query -n my-index \ + --sparse-indices '[0, 5, 12]' \ + --sparse-values '[0.5, 0.3, 0.8]' \ + -k 15 + +# From stdin +jq -c '.embedding' doc.json | pc index vector query -n my-index --vector - -k 10 +``` + +### Fetch +```bash +pc index vector fetch -n my-index --ids '["vec1","vec2","vec3"]' +pc index vector fetch -n my-index --filter '{"genre":{"$eq":"rock"}}' +pc index vector fetch -n my-index --namespace tenant-a --ids '["doc-123"]' +pc index vector fetch -n my-index --filter '{"genre":{"$eq":"rock"}}' --limit 100 +``` + +### List / Update / Delete +```bash +# List vector IDs +pc index vector list -n my-index +pc index vector list -n my-index --namespace tenant-a --limit 50 + +# Update metadata or values +pc index vector update -n my-index --id "vec1" --metadata '{"category":"updated"}' +pc index vector update -n my-index --id "vec1" --values '[0.2, 0.3, 0.4]' + +# Bulk update with dry-run +pc index vector update -n my-index \ + --filter '{"genre":{"$eq":"sci-fi"}}' \ + --metadata '{"genre":"fantasy"}' \ + --dry-run + +# Delete by IDs or filter +pc index vector delete -n my-index --ids '["vec1","vec2"]' +pc index vector delete -n my-index --filter '{"genre":"classical"}' +pc index vector delete -n my-index --namespace old-data --all-vectors +``` + +--- + +## Namespace Management + +```bash +pc index namespace create -n my-index --name tenant-a +pc index namespace create -n my-index --name tenant-b --schema "category,brand" +pc index namespace list -n my-index +pc index namespace list -n my-index --prefix "tenant-" +pc index namespace describe -n my-index --name tenant-a +pc index namespace delete -n my-index --name tenant-a # WARNING: deletes all vectors +``` + +--- + +## Backup and Restore + +```bash +# Create / list / describe +pc backup create -i my-index -n "nightly-backup" -d "Backup before deployment" +pc backup list +pc backup list --index-name my-index +pc backup describe -i + +# Restore (creates a new index) +pc backup restore -i -n restored-index +pc backup restore -i -n restored-index --deletion-protection enabled + +# Check restore job status +pc backup restore list +pc backup restore describe -i rj-abc123 + +# Delete backup +pc backup delete -i +``` + +--- + +## Project Management + +```bash +pc project list +pc project create -n "demo-project" +pc project create -n "demo-project" --target +pc project describe -i proj-abc123 +pc project update -i proj-abc123 -n "new-name" +pc project delete -i proj-abc123 +``` + +--- + +## Organization Management + +```bash +pc organization list +pc organization describe -i org-abc123 +pc organization update -i org-abc123 -n "new-name" +pc organization delete -i org-abc123 # WARNING: highly destructive +``` + +--- + +## API Key Management + +```bash +pc api-key create -n "my-key" +pc api-key create -n "my-key" --store +pc api-key create -n "my-key" -i proj-abc123 +pc api-key list +pc api-key describe -i key-abc123 +pc api-key update -i key-abc123 --roles ProjectEditor +pc api-key delete -i key-abc123 +``` + +--- + +## Global Flags + +Available on all commands: +- `-h, --help` — Show help +- `-j, --json` — JSON output (great for scripting) +- `-q, --quiet` — Suppress output +- `--timeout` — Command timeout (default: 60s, 0 to disable) + +## Exit Codes + +- `0` — success +- `1` — error + +```bash +if pc index describe -n my-index 2>/dev/null; then + echo "Index exists" +else + pc index create -n my-index -d 1536 -m cosine -c aws -r us-east-1 +fi +``` diff --git a/skills/cli/references/troubleshooting.md b/skills/cli/references/troubleshooting.md new file mode 100644 index 0000000..6458bd8 --- /dev/null +++ b/skills/cli/references/troubleshooting.md @@ -0,0 +1,136 @@ +# Pinecone CLI — Troubleshooting & Best Practices + +## Troubleshooting + +### Authentication Issues + +**"Not authenticated" or "Invalid credentials"** +```bash +pc auth status +pc logout +pc login +pc target -o "my-org" -p "my-project" +``` + +**Service account can't access resources** +```bash +pc target --show # Verify correct project is targeted +``` + +### API Key Issues + +**API key not working** +```bash +pc config get-api-key # Verify key is set +# API keys are scoped to org + project — get a new one if needed +pc api-key create -n "new-key" --store +``` + +### Target Context Issues + +**"Project not found" or "Organization not found"** +```bash +pc target --show +pc target --clear +pc target -o "my-org" -p "my-project" +``` + +### Index Issues + +**Index operations failing** +```bash +pc index describe -n my-index +# "Initializing" → wait and retry +# "Terminating" → recreate it +``` + +**Can't delete index** +```bash +# Check if deletion protection is on +pc index describe -n my-index +pc index configure -n my-index --deletion-protection disabled +pc index delete -n my-index +``` + +### Vector Upload Issues + +**Upsert fails with dimension mismatch** +```bash +pc index describe -n my-index # Check configured dimension +# Ensure all vectors have exactly that many values +``` + +**Large file upload is slow** +```bash +# Use max batch size +pc index vector upsert -n my-index --file ./large.json --batch-size 1000 + +# Or split JSONL and loop +split -l 10000 large.jsonl chunk- +for file in chunk-*; do + pc index vector upsert -n my-index --file "$file" +done +``` + +### Query Issues + +**Query returns no results** +```bash +pc index stats -n my-index # Check if data exists +pc index namespace list -n my-index # Verify namespace +# Filters use MongoDB query syntax — double-check filter format +``` + +### Backup Issues + +**Backup creation fails** +```bash +pc index describe -n my-index +# Backups are only supported for serverless indexes in "Ready" state +``` + +**Can't find backup ID** +```bash +pc backup list --index-name my-index +# Use the UUID (e.g. c84725e5-...) not the name for restore/delete +``` + +--- + +## Best Practices + +### Use the right auth method +- **Interactive dev**: `pc login` +- **CI/CD pipelines**: service accounts +- **Quick testing**: `pc api-key create -n "my-key" --store` + +### Check status before operating +```bash +pc auth status +pc target --show +pc index describe -n my-index +``` + +### Use JSON output for scripts +```bash +pc index list -j | jq -r '.[] | .name' +``` + +### Preview destructive operations +```bash +pc index vector update -n my-index \ + --filter '{"genre":{"$eq":"old"}}' \ + --metadata '{"genre":"new"}' \ + --dry-run +``` + +### Protect production indexes +```bash +pc index create -n prod-index -d 1536 -m cosine -c aws -r us-east-1 \ + --deletion-protection enabled +``` + +### Automate backups +```bash +pc backup create -i my-index -n "daily-backup-$(date +%Y%m%d)" +``` diff --git a/skills/docs/SKILL.md b/skills/docs/SKILL.md new file mode 100644 index 0000000..63d65f5 --- /dev/null +++ b/skills/docs/SKILL.md @@ -0,0 +1,85 @@ +--- +name: docs +description: Curated documentation reference for developers building with Pinecone. Contains links to official docs organized by topic and data format references. Use when writing Pinecone code, looking up API parameters, or needing the correct format for vectors or records. +--- + +# Pinecone Developer Reference + +A curated index of Pinecone documentation. Fetch the relevant page(s) for the task at hand rather than relying on training data. + +--- + +## NOTE TO AGENT +Please attempt to fetch the url listed when relevant. If you run into an error, please attempt to append ".md" to the url to retrieve the markdown version of the Docs page. + +In case you need it: A full reference to ALL relevant URLs can be found here: https://docs.pinecone.io/llms.txt + +Use this as a last resort if you cannot find the relevant page below. + +--- + +## Getting Started + +| Topic | URL | +|---|---| +| Quickstart for all languages and coding environments (Cursor, n8n, Python, JavaScript, Java, Go, C#) | https://docs.pinecone.io/guides/get-started/quickstart | +| Pinecone concepts — namespaces, terminology, and key database concepts | https://docs.pinecone.io/guides/get-started/concepts | +| Data modeling for text and vectors | https://docs.pinecone.io/guides/index-data/data-modeling | +| Architecture of Pinecone | https://docs.pinecone.io/guides/get-started/database-architecture | +| Pinecone Assistant overview | https://docs.pinecone.io/guides/assistant/overview | + +--- + +## Indexes + +| Topic | URL | +|---|---| +| Create an index | https://docs.pinecone.io/guides/index-data/create-an-index | +| Index types and conceptual overview | https://docs.pinecone.io/guides/index-data/indexing-overview | +| Integrated inference (built-in embedding models) | https://docs.pinecone.io/guides/index-data/indexing-overview#integrated-embedding | +| Dedicated read nodes — predictable low-latency performance at high query volumes | https://docs.pinecone.io/guides/index-data/dedicated-read-nodes | + +--- + +## Upsert & Data + +| Topic | URL | +|---|---| +| Upsert vectors and text | https://docs.pinecone.io/guides/index-data/upsert-data | +| Multitenancy with namespaces | https://docs.pinecone.io/guides/index-data/implement-multitenancy | + +--- + +## Search + +| Topic | URL | +|---|---| +| Semantic search | https://docs.pinecone.io/guides/search/semantic-search | +| Hybrid search | https://docs.pinecone.io/guides/search/hybrid-search | +| Lexical search | https://docs.pinecone.io/guides/search/lexical-search | +| Metadata filtering — narrow results and speed up searches | https://docs.pinecone.io/guides/search/filter-by-metadata | + +--- + +## API & SDK Reference + +| Topic | URL | +|---|---| +| Python SDK reference | https://docs.pinecone.io/reference/sdks/python/overview | +| Example Colab notebooks | https://docs.pinecone.io/examples/notebooks | + +--- + +## Production + +| Topic | URL | +|---|---| +| Production checklist — preparing your index for production | https://docs.pinecone.io/guides/production/production-checklist | +| Common errors and what they mean | https://docs.pinecone.io/guides/production/error-handling | +| Targeting indexes correctly — don't use index names in prod | https://docs.pinecone.io/guides/manage-data/target-an-index#target-by-index-host-recommended | + +--- + +## Data Formats + +See [references/data-formats.md](references/data-formats.md) for vector and record schemas. diff --git a/skills/docs/references/data-formats.md b/skills/docs/references/data-formats.md new file mode 100644 index 0000000..2a55f45 --- /dev/null +++ b/skills/docs/references/data-formats.md @@ -0,0 +1,81 @@ +# Data Formats + +## Integrated Index Records + +Used with `upsert_records()` (Python SDK) or `upsert-records` (MCP). Records are automatically embedded using the index's configured model. + +**JSON** +```json +[ + { + "_id": "rec1", + "chunk_text": "Your text content here.", + "category": "example" + }, + { + "_id": "rec2", + "chunk_text": "Another piece of text.", + "category": "example" + } +] +``` + +- `_id` — unique record identifier (required) +- The text field name must match the index's `fieldMap` (e.g. `chunk_text` if `fieldMap: {text: "chunk_text"}`) +- All other fields are stored as metadata and can be used for filtering +- Do **not** nest extra fields under a `metadata` key — put them directly on the record + +--- + +## Standard Index Vectors + +Used with `upsert()` (Python SDK) or `pc index vector upsert` (CLI). + +**JSON (with `vectors` array)** +```json +{ + "vectors": [ + { + "id": "vec1", + "values": [0.1, 0.2, 0.3], + "metadata": { "genre": "comedy", "year": 2021 } + }, + { + "id": "vec2", + "values": [0.4, 0.5, 0.6], + "metadata": { "genre": "drama", "year": 2019 } + } + ] +} +``` + +**JSONL (one vector per line)** +```jsonl +{"id": "vec1", "values": [0.1, 0.2, 0.3], "metadata": {"genre": "comedy"}} +{"id": "vec2", "values": [0.4, 0.5, 0.6], "metadata": {"genre": "drama"}} +``` + +- `id` — unique vector identifier (required) +- `values` — dense vector as float array, length must match index dimension (required) +- `metadata` — arbitrary key-value pairs for filtering (optional) + +--- + +## Sparse Vectors + +Used for keyword or hybrid search with sparse indexes. + +```json +{ + "id": "vec1", + "values": [0.1, 0.2, 0.3], + "sparse_values": { + "indices": [10, 45, 316], + "values": [0.5, 0.3, 0.8] + }, + "metadata": { "genre": "comedy" } +} +``` + +- `sparse_values.indices` — non-zero dimension indices +- `sparse_values.values` — corresponding float values, same length as `indices` diff --git a/skills/help/SKILL.md b/skills/help/SKILL.md new file mode 100644 index 0000000..a545893 --- /dev/null +++ b/skills/help/SKILL.md @@ -0,0 +1,61 @@ +--- +name: help +description: Overview of all available Pinecone skills and what a user needs to get started. Invoke when a user asks what skills are available, how to get started with Pinecone, or what they need to set up before using any Pinecone skill. +--- + +# Pinecone Skills — Help & Overview + +Pinecone is the leading vector database for building accurate and performant AI applications at scale in production. It's useful for building semantic search, retrieval augmented generation, recommendation systems, and agentic applications. + +Here's everything you need to get started and a summary of all available skills. + +--- + +## What You Need + +### Required +- **Pinecone account** — free to create at https://app.pinecone.io/?sessionType=signup +- **API key** — create one in the Pinecone console after signing up, then either export it in your terminal: + ```bash + export PINECONE_API_KEY="your-key" + ``` + Or add it to a `.env` file if your IDE doesn't inherit shell variables: `PINECONE_API_KEY=your-key` + +### Optional (unlock more capabilities) + +| Tool | What it enables | Install | +|---|---|---| +| **Pinecone MCP server** | Use Pinecone directly inside your AI agent/IDE without writing code | [Setup guide](https://docs.pinecone.io/guides/operations/mcp-server#tools) | +| **Pinecone CLI (`pc`)** | Manage all index types from the terminal, batch operations, backups, CI/CD | `brew tap pinecone-io/tap && brew install pinecone-io/tap/pinecone` | +| **uv** | Run the packaged Python scripts included in these skills | [Install uv](https://docs.astral.sh/uv/getting-started/installation/) | + +--- + +## Available Skills + +| Skill | What it does | +|---|---| +| `quickstart` | Step-by-step onboarding — create an index, upload data, and run your first search | +| `query` | Search integrated indexes using natural language text via the Pinecone MCP | +| `cli` | Use the Pinecone CLI (`pc`) for terminal-based index and vector management | +| `assistant` | Create, manage, and chat with Pinecone Assistants for document Q&A with citations | +| `mcp` | Reference for all Pinecone MCP server tools and their parameters | +| `docs` | Curated links to official Pinecone documentation, organized by topic | + +--- + +## Which skill should I use? + +**Just getting started?** → `quickstart` + +**Want to search an index you already have?** +- Integrated index (built-in embedding model) → `query` (uses MCP) +- Any other index type → `cli` + +**Working with documents and Q&A?** → `assistant` + +**Need to manage indexes, bulk upload vectors, or automate workflows?** → `cli` + +**Looking up API parameters or SDK usage?** → `docs` + +**Need to understand what MCP tools are available?** → `mcp` diff --git a/skills/mcp/SKILL.md b/skills/mcp/SKILL.md new file mode 100644 index 0000000..354fcd9 --- /dev/null +++ b/skills/mcp/SKILL.md @@ -0,0 +1,106 @@ +--- +name: mcp +description: Reference for the Pinecone MCP server tools. Documents all available tools - list-indexes, describe-index, describe-index-stats, create-index-for-model, upsert-records, search-records, cascading-search, and rerank-documents. Use when an agent needs to understand what Pinecone MCP tools are available, how to use them, or what parameters they accept. +--- + +# Pinecone MCP Tools Reference + +The Pinecone MCP server exposes the following tools to AI agents and IDEs. For setup and installation instructions, see the [MCP server guide](https://docs.pinecone.io/guides/operations/mcp-server#tools). + +> **Key Limitation:** The Pinecone MCP only supports **integrated indexes** — indexes created with a built-in Pinecone embedding model. It does not work with standard indexes using external embedding models. For those, use the Pinecone CLI. + +--- + +## `list-indexes` + +List all indexes in the current Pinecone project. + +--- + +## `describe-index` + +Get configuration details for a specific index — cloud, region, dimension, metric, embedding model, field map, and status. + +**Parameters:** +- `name` (required) — Index name + +--- + +## `describe-index-stats` + +Get statistics for an index including total record count and per-namespace breakdown. + +**Parameters:** +- `name` (required) — Index name + +--- + +## `create-index-for-model` + +Create a new serverless index with an integrated embedding model. Pinecone handles embedding automatically — no external model needed. + +**Parameters:** +- `name` (required) — Index name +- `cloud` (required) — `aws`, `gcp`, or `azure` +- `region` (required) — Cloud region (e.g. `us-east-1`) +- `embed.model` (required) — Embedding model: `llama-text-embed-v2`, `multilingual-e5-large`, or `pinecone-sparse-english-v0` +- `embed.fieldMap.text` (required) — The record field that contains text to embed (e.g. `chunk_text`) + +--- + +## `upsert-records` + +Insert or update records in an integrated index. Records are automatically embedded using the index's configured model. + +**Parameters:** +- `name` (required) — Index name +- `namespace` (required) — Namespace to upsert into +- `records` (required) — Array of records. Each record must have an `id` or `_id` field and contain the text field specified in the index's `fieldMap`. Do not nest fields under `metadata` — put them directly on the record. + +**Example record:** +```json +{ "_id": "rec1", "chunk_text": "The Eiffel Tower was built in 1889.", "category": "architecture" } +``` + +--- + +## `search-records` + +Semantic text search against an integrated index. Pass plain text — the MCP embeds the query automatically using the index's model. + +**Parameters:** +- `name` (required) — Index name +- `namespace` (required) — Namespace to search +- `query.inputs.text` (required) — The text query +- `query.topK` (required) — Number of results to return +- `query.filter` (optional) — Metadata filter using MongoDB-style operators (`$eq`, `$ne`, `$in`, `$gt`, `$gte`, `$lt`, `$lte`) +- `rerank.model` (optional) — Reranking model: `bge-reranker-v2-m3`, `cohere-rerank-3.5`, or `pinecone-rerank-v0` +- `rerank.rankFields` (optional) — Fields to rerank on (e.g. `["chunk_text"]`) +- `rerank.topN` (optional) — Number of results to return after reranking + +--- + +## `cascading-search` + +Search across multiple indexes simultaneously, then deduplicate and rerank results into a single ranked list. + +**Parameters:** +- `indexes` (required) — Array of `{ name, namespace }` objects to search across +- `query.inputs.text` (required) — The text query +- `query.topK` (required) — Number of results to retrieve per index before reranking +- `rerank.model` (required) — Reranking model: `bge-reranker-v2-m3`, `cohere-rerank-3.5`, or `pinecone-rerank-v0` +- `rerank.rankFields` (required) — Fields to rerank on +- `rerank.topN` (optional) — Final number of results to return after reranking + +--- + +## `rerank-documents` + +Rerank a set of documents or records against a query without performing a vector search first. + +**Parameters:** +- `model` (required) — `bge-reranker-v2-m3`, `cohere-rerank-3.5`, or `pinecone-rerank-v0` +- `query` (required) — The query to rerank against +- `documents` (required) — Array of strings or records to rerank +- `options.topN` (required) — Number of results to return +- `options.rankFields` (optional) — If documents are records, the field(s) to rerank on diff --git a/skills/query/SKILL.md b/skills/query/SKILL.md new file mode 100644 index 0000000..d722839 --- /dev/null +++ b/skills/query/SKILL.md @@ -0,0 +1,84 @@ +--- +name: query +description: Query integrated indexes using text with Pinecone MCP. IMPORTANT - This skill ONLY works with integrated indexes (indexes with built-in Pinecone embedding models like multilingual-e5-large). For standard indexes or advanced vector operations, use the CLI skill instead. Requires PINECONE_API_KEY environment variable and Pinecone MCP server to be configured. +argument-hint: query [q] index [indexName] namespace [ns] topK [k] reranker [rerankModel] +--- + +# Pinecone Query Skill + +Search for records in Pinecone integrated indexes using natural language text queries via the Pinecone MCP server. + +## What is this skill for? + +This skill provides a simple way to query **integrated indexes** (indexes with built-in Pinecone embedding models) using text queries. The MCP server automatically converts your text into embeddings and searches the index. + +### Prerequisites + +**Required:** +1. ✅ **Pinecone MCP server must be configured** - Check if MCP tools are available +2. ✅ **PINECONE_API_KEY environment variable must be set** - Get a free API key at https://app.pinecone.io/?sessionType=signup +3. ✅ **Index must be an integrated index** - Uses Pinecone embedding models (e.g., multilingual-e5-large, llama-text-embed-v2, pinecone-sparse-english-v0) + +### When NOT to use this skill + +**Use the CLI skill instead if:** +- ❌ Your index is a standard index (no integrated embedding model) +- ❌ You need to query with custom vector values (not text) +- ❌ You need advanced vector operations (fetch by ID, list vectors, bulk operations) +- ❌ Your index uses third-party embedding models (OpenAI, HuggingFace, Cohere) + +**MCP Limitation**: The Pinecone MCP currently only supports integrated indexes. For all other use cases, use the Pinecone CLI skill. + +## How it works + +Utilize Pinecone MCP's `search-records` tool to search for records within a specified Pinecone integrated index using a text query. + +## Workflow + +**IMPORTANT: Before proceeding, verify the Pinecone MCP tools are available.** If MCP tools are not accessible: +- Inform the user that the Pinecone MCP server needs to be configured +- Check if `PINECONE_API_KEY` environment variable is set +- Direct them to the MCP setup documentation or the `help` skill + +1. Parse the user's input for: + - `query` (required): The text to search for. + - `index` (required): The name of the Pinecone index to search. + - `namespace` (optional): The namespace within the index. + - `reranker` (optional): The reranking model to use for improved relevance. + +2. If the user omits required arguments: + - If only the index name is provided, use the `describe-index` tool to retrieve available namespaces and ask the user to choose. + - If only a query is provided, use `list-indexes` to get available indexes, ask the user to pick one, then use `describe-index` for namespaces if needed. + +3. Call the `search-records` tool with the gathered arguments to perform the search. + +4. Format and display the returned results in a clear, readable table including field highlights (such as ID, score, and relevant metadata). + +--- + +## Troubleshooting + +**`PINECONE_API_KEY` is required.** Get a free key at https://app.pinecone.io/?sessionType=signup + +If you get an access error, the key is likely missing. Ask the user to set it and restart their IDE or agent session: +- Terminal: `export PINECONE_API_KEY="your-key"` +- IDE without shell inheritance: add `PINECONE_API_KEY=your-key` to a `.env` file + +**IMPORTANT** At the moment, the /query command can only be used with integrated indexes, which use hosted Pinecone embedding models to embed and search for data. +If a user attempts to query an index that uses a third party API model such as OpenAI, or HuggingFace embedding models, remind them that this capability is not available yet +with the Pinecone MCP server. + +- If required arguments are missing, prompt the user to supply them, using Pinecone MCP tools as needed (e.g., `list-indexes`, `describe-index`). +- Guide the user interactively through argument selection until the search can be completed. +- If an invalid value is provided for any argument (e.g., nonexistent index or namespace), surface the error and suggest valid options. + +## Tools Reference + +- `search-records`: Search records in a given index with optional metadata filtering and reranking. +- `list-indexes`: List all available Pinecone indexes. +- `describe-index`: Get index configuration and namespaces. +- `describe-index-stats`: Get stats including record counts and namespaces. +- `rerank-documents`: Rerank returned documents using a specified reranking model. +- Ask the user interactively to clarify missing information when needed. + +--- diff --git a/skills/quickstart/SKILL.md b/skills/quickstart/SKILL.md new file mode 100644 index 0000000..9d2426e --- /dev/null +++ b/skills/quickstart/SKILL.md @@ -0,0 +1,230 @@ +--- +name: quickstart +description: Interactive Pinecone quickstart for new developers. Choose between two paths - Database (create an integrated index, upsert data, and query using Pinecone MCP + Python) or Assistant (create a Pinecone Assistant for document Q&A). Use when a user wants to get started with Pinecone for the first time or wants a guided tour of Pinecone's tools. +--- + +# Pinecone Quickstart + +Welcome! This skill walks you through your first Pinecone experience using the tools available to you. In this quickstart, +you will learn how to do a simple form of semantic search over some example data. + +## Prerequisites + +Before starting either path, verify the API key works by calling `list-indexes` via the Pinecone MCP. If it succeeds, proceed. If it fails, ask the user to set their key: + +- Terminal: `export PINECONE_API_KEY="your-key"` +- Or create a `.env` file in the project root: `PINECONE_API_KEY=your-key` + +Then retry `list-indexes` to confirm. + +## Step 0: Choose Your Path + +Ask the user which path they want: + +- **Database** – Build a vector search index. Best for developers who want to store and search embeddings. Uses the Pinecone MCP + a Python upsert script. +- **Assistant** – Build a document Q&A assistant. Best for users who want to upload files and ask questions with cited answers. No code required. + +--- + +## Path A: Database Quickstart + +For each step, explain to the user what will happen. An overview is here: + +1. Check if MCP is set +2. Create an integrated index with MCP +3. Upsert sample data using the bundled script (9 sentences across productivity, health, and nature themes) +4. Run a semantic search query and explore further queries +5. Optionally try reranking +6. Offer the complete standalone script + +### Step 1 – Verify MCP is Available + +The prerequisite check already called `list-indexes`. If it succeeded, the MCP is working — proceed to Step 2. + +If it failed because MCP tools were unavailable (not an auth error): +- Tell the user the MCP server needs to be configured +- Point them to: https://docs.pinecone.io/reference/tools/mcp + +### Step 2 – Create an Integrated Index + +Use the MCP `create-index-for-model` tool to create a serverless index with integrated embeddings: + +``` +name: quickstart-skills +cloud: aws +region: us-east-1 +embed: + model: llama-text-embed-v2 + fieldMap: + text: chunk_text +``` + +**Explain to the user what's happening:** +- An *integrated index* uses a built-in Pinecone embedding model (`llama-text-embed-v2`) +- This means you send plain text and Pinecone handles the embedding automatically +- The `field_map` tells Pinecone which field in your records contains the text to embed + +Wait for the index to become ready before proceeding. Waiting a few seconds is sufficient. + +### Step 3 – Upsert Sample Data + +Run the bundled upsert script to seed the index with sample records. + +If `PINECONE_API_KEY` is set in the environment: +```bash +uv run scripts/upsert.py --index quickstart-skills +``` + +If using a `.env` file: +```bash +uv run --env-file .env scripts/upsert.py --index quickstart-skills +``` + +**Explain to the user what's happening:** +- The script uploads 9 sample records across three themes: **productivity** (getting work done), **health** (feeling unwell), and **nature** (outdoors/wildlife) +- The dataset is intentionally varied so semantic search can show its value — the queries below use completely different words than the records, but the right ones still surface +- Each record has an `_id`, a `chunk_text` field (the text that gets embedded), and a `category` field +- This is the same structure you'd use for your own data — just replace the records + +### Step 4 – Query with the MCP + +Use the MCP `search-records` tool to run the first semantic search: + +``` +index: quickstart-skills +namespace: example-namespace +query: + topK: 3 + inputs: + text: "getting things done efficiently" +``` + +Display the results in a clean table: ID, score, and `chunk_text`. + +**Explain to the user what's happening:** +- Notice the query shares no keywords with the records — but it surfaces the productivity sentences +- That's semantic search: it finds meaning, not just matching words +- You sent plain text — Pinecone embedded the query using the same model as the index + +**Offer to explore further:** Ask the user if they'd like to try another query to see the effect more clearly: +- Option A: `"feeling under the weather"` — should surface the health records +- Option B: `"wildlife spotting outside"` — should surface the nature records +- Option C: No thanks, move on + +Run whichever query they choose and display the results the same way. If they want to try both, do both. After each result, point out which theme surfaced and why. + +If they decline or are done exploring, proceed to Step 5 or offer to skip ahead to the complete script. + +### Step 5 – Try Reranking (Optional) + +Ask the user if they want to try reranking. + +If yes, use `search-records` again with reranking enabled: + +``` +rerank: + model: bge-reranker-v2-m3 + rankFields: [chunk_text] + topN: 3 +``` + +**Explain**: Reranking runs a second-pass model over the results to improve relevance ordering. + +### Step 6 – Wrap Up + +Congratulate the user on completing the quickstart. Ask if they'd like a standalone Python script that does everything in one go — create index, upsert, query, and rerank. + +If yes, copy it to their working directory: + +```bash +cp scripts/quickstart_complete.py ./pinecone_quickstart.py +``` + +Tell the user: +- The script is at `./pinecone_quickstart.py` +- Run it with: `uv run pinecone_quickstart.py` +- It uses `uv` inline dependencies — no separate install needed +- They can swap in their own `records` list to build something real + +--- + +## Path B: Assistant Quickstart + +Guide the user through the Pinecone Assistant workflow using the existing assistant skills: + +### Step 1 – Check for Documents + +Before anything else, ask the user if they have files to upload. Pinecone Assistant accepts `.pdf`, `.md`, `.txt`, and `.docx` files — a single file or a folder of files both work. + +**If they have files:** ask for the path and proceed to Step 2. + +**If they don't have files:** offer two options: +- **Generate sample docs** — create a few short markdown files in `./sample-docs/` so they can complete the quickstart right now. Ask what topics they'd like (or default to: a product FAQ, a short how-to guide, and a brief company overview). Write 3 files, each 150–250 words. +- **Come back later** — let them know they can return once they have documents and pick up from Step 2. + +### Step 2 – Create an Assistant + +Invoke `assistant` or run (add `--env-file .env` if using a `.env` file): +```bash +uv run ../assistant/scripts/create.py --name my-assistant +``` + +Explain: The assistant is a fully managed RAG service — upload documents, ask questions, get cited answers. + +### Step 3 – Upload Documents + +Invoke `assistant` or run (add `--env-file .env` if using a `.env` file): +```bash +uv run ../assistant/scripts/upload.py --assistant my-assistant --source ./your-docs +``` + +Explain: Pinecone handles chunking, embedding, and indexing automatically — no configuration needed. + +### Step 4 – Chat with the Assistant + +Invoke `assistant` or run (add `--env-file .env` if using a `.env` file): +```bash +uv run ../assistant/scripts/chat.py --assistant my-assistant --message "What are the main topics in these documents?" +``` + +Explain: Responses include citations with source file and page number. + +### Next Steps for Assistant + +- Invoke `assistant` to keep the assistant up to date as documents change +- Use the `assistant` skill to retrieve raw context snippets for custom workflows +- Every assistant is also an MCP server — see https://docs.pinecone.io/guides/assistant/mcp-server + +--- + +## Troubleshooting + +**`PINECONE_API_KEY` not set** + +Terminal environments: +```bash +export PINECONE_API_KEY="your-key" +``` +IDEs that don't inherit shell variables: create a `.env` file in the project root: +``` +PINECONE_API_KEY=your-key +``` +Then use `uv run --env-file .env` when running scripts. Restart your IDE/agent session after setting. + +**MCP tools not available** +- Verify the Pinecone MCP server is configured in your IDE's MCP settings +- Check that `PINECONE_API_KEY` is set before the MCP server starts + +**Index already exists** +- The upsert script is safe to re-run — it will upsert over existing records +- Or delete and recreate: use `pc index delete -n quickstart-skills` via the CLI + +**`uv` not installed** +See the [uv installation guide](https://docs.astral.sh/uv/getting-started/installation/). + +## Further Reading + +- Quickstart docs: https://docs.pinecone.io/guides/get-started/quickstart +- Integrated indexes: https://docs.pinecone.io/guides/index-data/create-an-index +- Python SDK: https://docs.pinecone.io/guides/get-started/python-sdk +- MCP server: https://docs.pinecone.io/reference/tools/mcp diff --git a/skills/quickstart/scripts/quickstart_complete.py b/skills/quickstart/scripts/quickstart_complete.py new file mode 100644 index 0000000..0404873 --- /dev/null +++ b/skills/quickstart/scripts/quickstart_complete.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "pinecone>=8.0.0", +# ] +# /// + +import os +from pinecone import Pinecone + +api_key = os.environ.get("PINECONE_API_KEY") +if not api_key: + raise ValueError("PINECONE_API_KEY environment variable not set") + +pc = Pinecone(api_key=api_key, source_tag="cursor_plugin:index_quickstart") + +# 1. Create a serverless index with an integrated embedding model +index_name = "quickstart" + +if not pc.has_index(index_name): + pc.create_index_for_model( + name=index_name, + cloud="aws", + region="us-east-1", + embed={ + "model": "llama-text-embed-v2", + "field_map": {"text": "chunk_text"} + } + ) + +# 2. Upsert records +# Three distinct themes — notice the queries below use different words than the records. +# That's semantic search: finding meaning, not just matching keywords. +records = [ + # Health / feeling unwell + {"_id": "rec1", "chunk_text": "I've been sneezing all day and my nose won't stop running.", "category": "health"}, + {"_id": "rec2", "chunk_text": "She stayed home with a pounding headache and a low-grade fever.", "category": "health"}, + {"_id": "rec3", "chunk_text": "He felt completely drained after waking up with a sore throat and chills.", "category": "health"}, + # Productivity / work + {"_id": "rec4", "chunk_text": "She blocked off two hours in the morning to focus without interruptions.", "category": "productivity"}, + {"_id": "rec5", "chunk_text": "He finished all his tasks ahead of schedule by prioritizing the hardest ones first.", "category": "productivity"}, + {"_id": "rec6", "chunk_text": "Turning off notifications helped her get into a deep flow state.", "category": "productivity"}, + # Outdoors / nature + {"_id": "rec7", "chunk_text": "A red fox darted across the trail and disappeared into the underbrush.", "category": "nature"}, + {"_id": "rec8", "chunk_text": "The hikers paused to watch a bald eagle circle lazily over the valley.", "category": "nature"}, + {"_id": "rec9", "chunk_text": "Fireflies lit up the meadow as the sun dipped below the treeline.", "category": "nature"}, +] + +dense_index = pc.Index(index_name) +dense_index.upsert_records("example-namespace", records) + +# 3. Search records +# The query uses different words than the records — semantic search finds meaning, not keywords. +query = "feeling ill and run down" + +results = dense_index.search( + namespace="example-namespace", + query={"top_k": 3, "inputs": {"text": query}} +) + +print("Search results:") +for hit in results["result"]["hits"]: + print(f" id: {hit['_id']} | score: {round(hit['_score'], 2)} | text: {hit['fields']['chunk_text']}") + +# 4. Search with reranking +reranked_results = dense_index.search( + namespace="example-namespace", + query={"top_k": 3, "inputs": {"text": query}}, + rerank={"model": "bge-reranker-v2-m3", "top_n": 3, "rank_fields": ["chunk_text"]} +) + +print("\nReranked results:") +for hit in reranked_results["result"]["hits"]: + print(f" id: {hit['_id']} | score: {round(hit['_score'], 2)} | text: {hit['fields']['chunk_text']}") diff --git a/skills/quickstart/scripts/upsert.py b/skills/quickstart/scripts/upsert.py new file mode 100644 index 0000000..ac428ee --- /dev/null +++ b/skills/quickstart/scripts/upsert.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "pinecone>=8.0.0", +# "typer>=0.15.0", +# ] +# /// + +import os +import typer +from pinecone import Pinecone + +app = typer.Typer() + +@app.command() +def main( + index: str = typer.Option(..., "--index", help="Name of the Pinecone index to upsert into"), + namespace: str = typer.Option("example-namespace", "--namespace", help="Namespace to upsert into"), +): + api_key = os.environ.get("PINECONE_API_KEY") + if not api_key: + typer.echo("Error: PINECONE_API_KEY environment variable not set", err=True) + raise typer.Exit(1) + + pc = Pinecone(api_key=api_key, source_tag="cursor_plugin:upsert") + + records = [ + # Health / feeling unwell + {"_id": "rec1", "chunk_text": "I've been sneezing all day and my nose won't stop running.", "category": "health"}, + {"_id": "rec2", "chunk_text": "She stayed home with a pounding headache and a low-grade fever.", "category": "health"}, + {"_id": "rec3", "chunk_text": "He felt completely drained after waking up with a sore throat and chills.", "category": "health"}, + # Productivity / work + {"_id": "rec4", "chunk_text": "She blocked off two hours in the morning to focus without interruptions.", "category": "productivity"}, + {"_id": "rec5", "chunk_text": "He finished all his tasks ahead of schedule by prioritizing the hardest ones first.", "category": "productivity"}, + {"_id": "rec6", "chunk_text": "Turning off notifications helped her get into a deep flow state.", "category": "productivity"}, + # Outdoors / nature + {"_id": "rec7", "chunk_text": "A red fox darted across the trail and disappeared into the underbrush.", "category": "nature"}, + {"_id": "rec8", "chunk_text": "The hikers paused to watch a bald eagle circle lazily over the valley.", "category": "nature"}, + {"_id": "rec9", "chunk_text": "Fireflies lit up the meadow as the sun dipped below the treeline.", "category": "nature"}, + ] + + idx = pc.Index(index) + idx.upsert_records(namespace, records) + typer.echo(f"Upserted {len(records)} records into '{index}' (namespace: '{namespace}')") + +if __name__ == "__main__": + app()