crewAIInc · ChrisCates · May 19, 2025
diff --git a/gestell-crew/.env.sample b/gestell-crew/.env.sample
@@ -0,0 +1,2 @@
+OPENAI_API_KEY="sk-..."
+GESTELL_API_KEY="gestell-...-..."
diff --git a/gestell-crew/.gitignore b/gestell-crew/.gitignore
@@ -0,0 +1,4 @@
+files
+__pycache__
+agent
+db
diff --git a/gestell-crew/.python-version b/gestell-crew/.python-version
@@ -0,0 +1 @@
+3.11
diff --git a/gestell-crew/README.md b/gestell-crew/README.md
@@ -0,0 +1,58 @@
+# Gestell Crew Orchestration Guide
+
+In this guide, you’ll learn how to use the Gestell MCP Server to create Crew Agents that:
+
+1. Create and configure a new collection
+2. Upload documents into your collection
+3. Navigate table categories in the Gestell Console
+4. Use an AI agent to research your collection and generate summaries
+
+View a video walkthrough of this end-to-end orchestration on [Gestell’s YouTube channel](https://youtu.be/V73zXKUuQHc).
+
+## Why use Gestell?
+
+Gestell is an ETL for AI search and reasoning. Connect any unstructured data source and Gestell will enable at-scale, efficient and accurate search and reasoning for your application. No need for complex and costly processing and RAG pipelines – simply instruct Gestell in natural language and get production-scale databases setup instantly.
+
+### Advantages
+
+- Ease: Add Gestell to your CrewAI implementation in just a few lines of code to get your unstructured datasets integrated to your Crew
+- Scale: Process massive unstructured databases with best-in-class performance
+- Simplicity: No need for complex RAG or agentic search architectures - simply instruct in natural language how you want your data structured
+- Customization: Create specific categories tailored to your business workflows using natural language instructions. Extract features, overlay concepts, and build specialized knowledge bases
+
+### How Gestell Works with CrewAI
+
+CrewAI excels at orchestrating AI agents to tackle complex tasks, while Gestell transforms raw data into AI-ready, structured information. Together, they create a powerful system where:
+
+1. Context Delivery: Gestell's search and reasoning platform provides CrewAI agents with richer, more accurate context for decision-making
+2. Specialized Knowledge Bases: Create domain-specific knowledge structures that CrewAI agents can efficiently query and reason with
+3. Scalable Reasoning: As your data grows, Gestell's ETL pipeline ensures your CrewAI implementation maintains high performance without degradation
+4. Seamless Integration: The integration process is straightforward, allowing you to focus on building your AI application rather than wrestling with data preparation
+5. End-to-End Solution: From data ingestion to agent execution, get a complete workflow that handles the complexity of unstructured data and multi-agent coordination
+
+## Running The Examples
+
+- You need `node` installed and `uv` installed
+
+- You also need a Gestell API Key which you can get from the [Gestell Platform](https://platform.gestell.ai)
+
+- Copy the `.env.sample` file to `.env` and fill in the values
+
+### Usage
+
+```bash
+# Edit src/download.py to download the wikipedia files you want to download
+uv run src/download.py
+# NOTE: If you don't want wikipedia articles
+# You can also put video, audio, or any other file in `files/`. Gestell will process it for you.
+
+# After you have items in `files/`, start the document analysis and collection creation Crew
+uv run src/create.py
+
+# Review the status of documents being processed on https://platform.gestell.ai
+# Once all documents are processed, run the research Crew
+uv run src/research.py
+
+# You will be given a research report in `agent/research_report.md`
+cat agent/research_report.md
+```
diff --git a/gestell-crew/config/crew.yaml b/gestell-crew/config/crew.yaml
@@ -0,0 +1,36 @@
+collection_planner_agent:
+  role: >
+    Document Collection Planner
+  goal: >
+    Organize and manage document collections efficiently
+  backstory: >
+    You are an expert in document management and organization. Your role is to
+    analyze, categorize, and structure document collections to make them easily
+    searchable and accessible. You understand document relationships and can
+    create intuitive organizational systems.
+
+document_analyzer_agent:
+  role: >
+    Document Analysis Specialist
+  goal: >
+    Extract meaningful insights and information from documents
+  backstory: >
+    You specialize in analyzing document contents to identify key information,
+    patterns, and relationships. Your expertise helps in understanding the
+    context and significance of documents within a collection. You're skilled at
+    summarizing, categorizing, and extracting structured data from various
+    document formats.
+
+research_agent:
+  role: >
+    Research Specialist
+  goal: >
+    Conduct thorough research and generate comprehensive reports
+  backstory: >
+    You are an expert researcher with a keen eye for detail and a talent for
+    synthesizing information from multiple sources. Your ability to analyze,
+    interpret, and present complex information in a clear and concise manner
+    makes you invaluable for generating high-quality research reports. You excel
+    at identifying key insights, drawing connections between different pieces
+    of information, and presenting findings in an organized, professional format.
+
diff --git a/gestell-crew/config/task.yaml b/gestell-crew/config/task.yaml
@@ -0,0 +1,74 @@
+analyze_document_task:
+  description: >
+    Analyze all provided documents in the directory and extract key information, including:
+    - Main topics and themes
+    - Key entities (people, places, organizations)
+    - Important dates and events
+    - Relationships between entities
+    - Any specific requirements or actions needed
+
+    Provide concise and clear markdown that segments summaries of each document.
+    Make sure to provide the **exact** file path when summarizing each document.
+  expected_output: >
+    A concise GFM markdown analysis of each document including all key information.
+    This is to be used as input for the collection creation task.
+  output_file: "agent/analysis.md"
+
+create_collection_task:
+  description: >
+    Create a collection based on the provided analysis from the analyzed document task.
+
+    Ensure that the instructions are comprehensive and based off of the analysis provided.
+    You are only allowed to create **table** categories, any other category is not allowed.
+
+    - Tables must have bullet point list of what each column should contain
+    - All instructions should be clear concise bullet points
+  expected_output: >
+    Provide a concise GFM markdown summary of what the collection is about.
+    After running the createCollection tool, make sure to explicitly state the collection id.
+  output_file: "agent/collection.md"
+
+upload_document_task:
+  description: >
+    After a collection has been created. Upload all documents to the collection just created.
+
+    To upload a document you are required to do the following:
+    - You are required to provide the **exact** path to the tool for the "file" field.
+    - You are required to provide the "name" field with an extension.
+    - Additional instructions are NOT needed for each file uploaded and should be an empty string.
+  expected_output: >
+    A concise GFM markdown list confirmation of successful upload including:
+    - List of uploaded documents
+    - Any warnings or issues encountered
+    - Summary of the upload process
+  output_file: "agent/upload_summary.md"
+
+research_task:
+  description: >
+    Analyze the collection and then conduct research and generate a comprehensive GFM markdown report.
+
+    Identify the types of questions to ask using the file provided. As well as to retrieve the collection id.
+
+    Use the searchCollectionSimple and promptCollectionSimple tool calls to research the collection.
+    The prompt field should be used to ask questions about the collection.
+
+    The report should be well-structured and include:
+    - Executive summary of key findings
+    - Main topics and themes from the documents
+    - Important data points and statistics
+    - Key insights and recommendations
+    - Any relevant quotes or references
+
+    Format the report using GitHub Flavored Markdown with proper:
+    - Headers and subheaders
+    - Bullet points and numbered lists
+    - Tables for data comparison
+    - Code blocks for any technical details
+    - Links to sources where applicable
+
+    Keep the language clear, concise, and professional.
+  expected_output: >
+    A well-formatted GFM markdown report that provides a comprehensive overview
+    of the research findings, ready for sharing and collaboration.
+  output_file: "agent/research_report.md"
+
diff --git a/gestell-crew/pyproject.toml b/gestell-crew/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "gestell-crew"
+version = "1.0.0"
+description = "Gestell CrewAI Example Workflow"
+readme = "README.md"
+authors = [{ name = "Chris Cates", email = "[email protected]" }]
+requires-python = ">=3.11"
+dependencies = [
+    "crewai-tools[mcp]>=0.45.0",
+    "mcp>=1.9.0",
+    "python-dotenv>=1.1.0",
+]
diff --git a/gestell-crew/src/__init__.py b/gestell-crew/src/__init__.py
diff --git a/gestell-crew/src/create.py b/gestell-crew/src/create.py
@@ -0,0 +1,7 @@
+from crewai_tools import MCPServerAdapter
+from src.gestell_mcp import server_params
+from src.crews.create import GestellCreateCrew
+
+with MCPServerAdapter(server_params) as tools:
+    result = GestellCreateCrew(tools=tools).crew().kickoff()
+    print(result)
diff --git a/gestell-crew/src/crews/__init__.py b/gestell-crew/src/crews/__init__.py
diff --git a/gestell-crew/src/crews/create.py b/gestell-crew/src/crews/create.py
@@ -0,0 +1,86 @@
+from crewai import Agent, Crew, Process, Task
+from crewai.project import CrewBase, agent, crew, task
+from typing import List, Optional
+from crewai.tools import BaseTool
+from pathlib import Path
+from crewai_tools import DirectoryReadTool, PDFSearchTool
+
+files_dir = Path.cwd() / "files"
+
+
+@CrewBase
+class GestellCreateCrew:
+    """Gestell Crew to Create a Collection and Upload Documents"""
+
+    agents_config = "../../config/crew.yaml"
+    tasks_config = "../../config/task.yaml"
+
+    def __init__(self, tools: Optional[List[BaseTool]] = None):
+        """Initialize crew with custom tools"""
+        super().__init__()
+        self.tools = tools or []
+
+    @agent
+    def document_analyzer_agent(self) -> Agent:
+        return Agent(
+            config=self.agents_config["document_analyzer_agent"],
+            allow_delegation=False,
+            verbose=True,
+            llm="gpt-4.1-mini",
+        )
+
+    @agent
+    def collection_planner_agent(self) -> Agent:
+        return Agent(
+            config=self.agents_config["collection_planner_agent"],
+            allow_delegation=False,
+            verbose=True,
+            llm="gpt-4.1-mini",
+        )
+
+    @task
+    def analyze_document_task(self) -> Task:
+        return Task(
+            config=self.tasks_config["analyze_document_task"],
+            agent=self.document_analyzer_agent(),
+            tools=[DirectoryReadTool(directory=str(files_dir)), PDFSearchTool()],
+        )
+
+    @task
+    def create_collection_task(self) -> Task:
+        create_collection_tools = [
+            tool
+            for tool in self.tools
+            if hasattr(tool, "name")
+            and tool.name
+            in ["createCollection", "listOrganizations", "listCollections"]
+        ]
+        return Task(
+            config=self.tasks_config["create_collection_task"],
+            agent=self.collection_planner_agent(),
+            tools=create_collection_tools,
+        )
+
+    @task
+    def upload_document_task(self) -> Task:
+        upload_doc_tool = [
+            tool
+            for tool in self.tools
+            if hasattr(tool, "name")
+            and tool.name in ["uploadDocument", "listCollections", "listDocuments"]
+        ]
+        return Task(
+            config=self.tasks_config["upload_document_task"],
+            agent=self.document_analyzer_agent(),
+            tools=upload_doc_tool,
+        )
+
+    @crew
+    def crew(self) -> Crew:
+        """Creates the GestellCrew"""
+        return Crew(
+            agents=self.agents,
+            tasks=self.tasks,
+            process=Process.sequential,
+            verbose=True,
+        )
diff --git a/gestell-crew/src/crews/research.py b/gestell-crew/src/crews/research.py
@@ -0,0 +1,45 @@
+from crewai import Agent, Crew, Process, Task
+from crewai.project import CrewBase, agent, crew, task
+from typing import List, Optional
+from crewai.tools import BaseTool
+
+
+@CrewBase
+class GestellResearchCrew:
+    """Gestell Crew to Conduct Research"""
+
+    agents_config = "../../config/crew.yaml"
+    tasks_config = "../../config/task.yaml"
+
+    def __init__(self, tools: Optional[List[BaseTool]] = None):
+        """Initialize crew with custom tools"""
+        super().__init__()
+        # Use provided tools or default to empty list
+        self.tools = tools or []
+
+    @agent
+    def research_agent(self) -> Agent:
+        return Agent(
+            config=self.agents_config["research_agent"],
+            allow_delegation=False,
+            verbose=True,
+            tools=self.tools,
+            llm="gpt-4.1-mini",
+        )
+
+    @task
+    def research_task(self) -> Task:
+        return Task(
+            config=self.tasks_config["research_task"],
+            agent=self.research_agent(),
+        )
+
+    @crew
+    def crew(self) -> Crew:
+        """Creates the GestellCrew"""
+        return Crew(
+            agents=self.agents,
+            tasks=self.tasks,
+            process=Process.sequential,
+            verbose=True,
+        )
diff --git a/gestell-crew/src/download.py b/gestell-crew/src/download.py
@@ -0,0 +1,51 @@
+import os
+from typing import List
+import requests
+
+URLS: List[str] = [
+    "https://en.wikipedia.org/wiki/Babylon",
+    "https://en.wikipedia.org/wiki/Mesopotamia",
+    "https://en.wikipedia.org/wiki/Achaemenid_Empire",
+]
+
+
+def download_wikipedia_pdf(title: str, output_dir: str) -> None:
+    """
+    Download the Wikipedia page with the given title as a PDF and save it.
+
+    :param title: The title of the Wikipedia page to download.
+    :param output_dir: Directory where the PDF file will be saved.
+    """
+    endpoint = f"https://en.wikipedia.org/api/rest_v1/page/pdf/{title}"
+    response = requests.get(endpoint, stream=True)
+    response.raise_for_status()
+
+    os.makedirs(output_dir, exist_ok=True)
+    filepath = os.path.join(output_dir, f"{title}.pdf")
+    with open(filepath, "wb") as pdf_file:
+        for chunk in response.iter_content(chunk_size=8192):
+            pdf_file.write(chunk)
+
+
+def extract_title_from_url(url: str) -> str:
+    """
+    Extract the page title from a Wikipedia URL.
+
+    :param url: Full URL of the Wikipedia page.
+    :return: The page title suitable for the PDF-API endpoint.
+    """
+    if "/wiki/" not in url:
+        raise ValueError(f"Invalid Wikipedia URL: {url}")
+    return url.split("/wiki/")[1].split("?", 1)[0]
+
+
+def main() -> None:
+    output_dir = "files"
+    for url in URLS:
+        title = extract_title_from_url(url)
+        download_wikipedia_pdf(title, output_dir)
+        print(f"✔ Saved: {os.path.join(output_dir, title)}.pdf")
+
+
+if __name__ == "__main__":
+    main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		OPENAI_API_KEY="sk-..."
		GESTELL_API_KEY="gestell-...-..."