From 0147d2f052d1a3be3de4a2e446c431b08694c5a4 Mon Sep 17 00:00:00 2001 From: Acuspeedster Date: Sat, 31 May 2025 14:03:52 +0530 Subject: [PATCH 1/3] Add documentation for adding examples to the vector database Signed-off-by: Acuspeedster --- README.md | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/README.md b/README.md index 478a1de..c47d5a5 100644 --- a/README.md +++ b/README.md @@ -436,6 +436,75 @@ Process repeats until successful or max attempts reached --- +## 📊 Adding to the Vector Database + +The system uses vector embeddings to find similar projects and error examples, which helps improve code generation quality. Here's how to add your own examples: + +### Method 1: Using Python API Directly + +```python +from app.llm_client import LlamaEdgeClient +from app.vector_store import QdrantStore + +# Initialize the components +llm_client = LlamaEdgeClient() +vector_store = QdrantStore() + +# Ensure collections exist +vector_store.create_collection("project_examples") # or "error_examples" + +# 1. Prepare your data +project_data = { + "query": "A command-line calculator in Rust", + "example": "Your full project example with code here..." +} + +# 2. Get embedding for the query text +embedding = llm_client.get_embeddings([project_data["query"]])[0] + +# 3. Add to vector database +vector_store.add_item( + collection_name="project_examples", + vector=embedding, + item=project_data +) +``` + +### Method 2: Adding Multiple Examples from JSON Files +Place JSON files in the appropriate directories: + +Project examples: ```project_examples``` +Error examples: ```error_examples``` +Format for project examples: +``` +{ + "query": "Description of the project", + "example": "Full example code or description" +} +``` +Format for error examples: +``` +{ + "error": "Rust compiler error message", + "solution": "How to fix the error", + "context": "Additional explanation (optional)" +} +``` +Then run the data loading script: +``` +python -c "from app.load_data import load_project_examples, load_error_examples; load_project_examples(); load_error_examples()" +``` + +### Method 3: Using the ```parse_and_save_qna.py``` Script +For bulk importing from a Q&A format text file: + +Place your Q&A pairs in a text file with format similar to ```QnA_pair.txt``` +Modify the ```parse_and_save_qna.py``` script to point to your file +Run the script: +``` +python parse_and_save_qna.py +``` + ## 🤝 Contributing Contributions are welcome! This project uses the Developer Certificate of Origin (DCO) to certify that contributors have the right to submit their code. Follow these steps: @@ -458,3 +527,6 @@ This certifies that you wrote or have the right to submit the code you're contri ## 📜 License Licensed under [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html). + + + From eead3aac72b4a535fdeb11584cc6f84718bb9482 Mon Sep 17 00:00:00 2001 From: Acuspeedster Date: Sat, 31 May 2025 14:47:28 +0530 Subject: [PATCH 2/3] Add instructions for creating vector collections and updating environment variables in README Signed-off-by: Acuspeedster --- README.md | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c47d5a5..63af9f7 100644 --- a/README.md +++ b/README.md @@ -440,6 +440,33 @@ Process repeats until successful or max attempts reached The system uses vector embeddings to find similar projects and error examples, which helps improve code generation quality. Here's how to add your own examples: +### 🔧 Creating Vector Collections + +First, you need to create the necessary collections in Qdrant using these curl commands: + +```bash +# Create project_examples collection with 1536 dimensions (default) +curl -X PUT "http://localhost:6333/collections/project_examples" \ + -H "Content-Type: application/json" \ + -d '{ + "vectors": { + "size": 1536, + "distance": "Cosine" + } + }' + +# Create error_examples collection with 1536 dimensions (default) +curl -X PUT "http://localhost:6333/collections/error_examples" \ + -H "Content-Type: application/json" \ + -d '{ + "vectors": { + "size": 1536, + "distance": "Cosine" + } + }' +``` +Note: If you've configured a different embedding size via ```LLM_EMBED_SIZE``` environment variable, replace 1536 with that value. + ### Method 1: Using Python API Directly ```python @@ -470,16 +497,50 @@ vector_store.add_item( ) ``` +For Error Examples: +```python +from app.llm_client import LlamaEdgeClient +from app.vector_store import QdrantStore + +# Initialize the components +llm_client = LlamaEdgeClient() +vector_store = QdrantStore() + +# Ensure collection exists +vector_store.create_collection("error_examples") + +# 1. Prepare your error data +error_data = { + "error": "error[E0502]: cannot borrow `*self` as mutable because it is also borrowed as immutable", + "solution": "Ensure mutable and immutable borrows don't overlap by using separate scopes", + "context": "This error occurs when you try to borrow a value mutably while an immutable borrow exists" +} + +# 2. Get embedding for the error message +embedding = llm_client.get_embeddings([error_data["error"]])[0] + +# 3. Add to vector database +vector_store.add_item( + collection_name="error_examples", + vector=embedding, + item=error_data +) +``` + ### Method 2: Adding Multiple Examples from JSON Files Place JSON files in the appropriate directories: Project examples: ```project_examples``` Error examples: ```error_examples``` -Format for project examples: -``` +Format for project examples (with optional project_files field): +```json { "query": "Description of the project", - "example": "Full example code or description" + "example": "Full example code or description", + "project_files": { + "src/main.rs": "// File content here", + "Cargo.toml": "// File content here" + } } ``` Format for error examples: @@ -487,7 +548,8 @@ Format for error examples: { "error": "Rust compiler error message", "solution": "How to fix the error", - "context": "Additional explanation (optional)" + "context": "Additional explanation (optional)", + "example": "// Code example showing the fix (optional)" } ``` Then run the data loading script: @@ -505,6 +567,20 @@ Run the script: python parse_and_save_qna.py ``` +## ⚙️ Environment Variables for Vector Search +The SKIP_VECTOR_SEARCH environment variable controls whether the system uses vector search: + +```SKIP_VECTOR_SEARCH```=true - Disables vector search functionality +```SKIP_VECTOR_SEARCH```=false (or not set) - Enables vector search +In your current .env file, you have: +``` +SKIP_VECTOR_SEARCH=true +``` +This means vector search is currently disabled. To enable it: +- Change this value to false or remove the line completely +- Ensure you have a running Qdrant instance (via Docker Compose or standalone) +- Create the collections as shown above + ## 🤝 Contributing Contributions are welcome! This project uses the Developer Certificate of Origin (DCO) to certify that contributors have the right to submit their code. Follow these steps: From fe723d920bdc33e47708a036b5dcef699472562a Mon Sep 17 00:00:00 2001 From: Acuspeedster Date: Sat, 31 May 2025 15:09:11 +0530 Subject: [PATCH 3/3] Add project examples and error handling context to README Signed-off-by: Acuspeedster --- README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 63af9f7..3024b24 100644 --- a/README.md +++ b/README.md @@ -469,6 +469,7 @@ Note: If you've configured a different embedding size via ```LLM_EMBED_SIZE``` e ### Method 1: Using Python API Directly +#### For Project Examples ```python from app.llm_client import LlamaEdgeClient from app.vector_store import QdrantStore @@ -477,13 +478,17 @@ from app.vector_store import QdrantStore llm_client = LlamaEdgeClient() vector_store = QdrantStore() -# Ensure collections exist -vector_store.create_collection("project_examples") # or "error_examples" +# Ensure collection exists +vector_store.create_collection("project_examples") # 1. Prepare your data project_data = { "query": "A command-line calculator in Rust", - "example": "Your full project example with code here..." + "example": "Your full project example with code here...", + "project_files": { + "src/main.rs": "fn main() {\n println!(\"Hello, calculator!\");\n}", + "Cargo.toml": "[package]\nname = \"calculator\"\nversion = \"0.1.0\"\nedition = \"2021\"\n\n[dependencies]" + } } # 2. Get embedding for the query text @@ -513,7 +518,8 @@ vector_store.create_collection("error_examples") error_data = { "error": "error[E0502]: cannot borrow `*self` as mutable because it is also borrowed as immutable", "solution": "Ensure mutable and immutable borrows don't overlap by using separate scopes", - "context": "This error occurs when you try to borrow a value mutably while an immutable borrow exists" + "context": "This error occurs when you try to borrow a value mutably while an immutable borrow exists", + "example": "// Before (error)\nfn main() {\n let mut v = vec![1, 2, 3];\n let first = &v[0];\n v.push(4); // Error: cannot borrow `v` as mutable\n println!(\"{}\", first);\n}\n\n// After (fixed)\nfn main() {\n let mut v = vec![1, 2, 3];\n {\n let first = &v[0];\n println!(\"{}\", first);\n } // immutable borrow ends here\n v.push(4); // Now it's safe to borrow mutably\n}" } # 2. Get embedding for the error message