Add .gitignore and enhance response parsing logic

Acuspeedster · Acuspeedster · commit 541ef1755a43 · 2025-05-31T12:24:56.000+05:30
- Introduced .gitignore to exclude environment and data files.
- Improved response parsing in ResponseParser to handle various file formats and ensure essential files are created if missing.
- Added error handling for vector store initialization in main.py.
- Updated JSON responses for better clarity on build success and failure.

Signed-off-by: Acuspeedster &lt;arnavrajsingh@gmail.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+.env
+qdrant_data
diff --git a/app/main.py b/app/main.py
@@ -38,18 +38,25 @@
 compiler = RustCompiler()
 
 # Initialize vector store
-vector_store = QdrantStore(embedding_size=llm_embed_size)
-vector_store.create_collection("project_examples")
-vector_store.create_collection("error_examples")
-
-# After initializing vector store
-from app.load_data import load_project_examples, load_error_examples
-
-# Check if collections are empty and load data if needed
-if vector_store.count("project_examples") == 0:
-    load_project_examples()
-if vector_store.count("error_examples") == 0:
-    load_error_examples()
+try:
+    vector_store = QdrantStore(embedding_size=llm_embed_size)
+    if os.getenv("SKIP_VECTOR_SEARCH", "").lower() != "true":
+        vector_store.create_collection("project_examples")
+        vector_store.create_collection("error_examples")
+        
+        # After initializing vector store
+        from app.load_data import load_project_examples, load_error_examples
+
+        # Check if collections are empty and load data if needed
+        if vector_store.count("project_examples") == 0:
+            load_project_examples()
+        if vector_store.count("error_examples") == 0:
+            load_error_examples()
+except Exception as e:
+    print(f"Warning: Vector store initialization failed: {e}")
+    print("Continuing without vector store functionality...")
+    # Create a dummy vector store
+    vector_store = None
 
 # Project generation request
 class ProjectRequest(BaseModel):
@@ -161,10 +168,6 @@ async def compile_and_fix_rust(request: dict):
     
     # Pre-process code to fix common syntax errors
     code = request["code"]
-    # Fix missing parenthesis in println! macro
-    # if "println!(" in code and ");" not in code:
-    #     code = code.replace("println!(\"", "println!(\"") 
-    #     code = code.replace("\" //", "\"); //")
     
     # Create temp directory
     with tempfile.TemporaryDirectory() as temp_dir:
@@ -201,8 +204,17 @@ async def compile_and_fix_rust(request: dict):
                 for filename, content in current_files.items():
                     output_text += f"[filename: {filename}]\n{content}\n\n"
                 
-                # For successful fixes, return a text response with the combined code
-                return PlainTextResponse(content=output_text.strip())
+                # Return JSON response instead of plain text
+                return JSONResponse(content={
+                    "status": "success",
+                    "message": "Code fixed and compiled successfully",
+                    "attempts": attempts,
+                    "combined_text": output_text.strip(),
+                    "files": current_files,
+                    "build_output": output or "Build successful",
+                    "run_output": run_output if run_success else None,
+                    "build_success": True
+                })
             
             # If we've reached max attempts without success, stop
             if attempt == max_attempts - 1:
@@ -211,14 +223,15 @@ async def compile_and_fix_rust(request: dict):
             # Extract error context for LLM
             error_context = compiler.extract_error_context(output)
             
-            # Find similar errors in vector DB (commented out for now)
+            # Find similar errors in vector DB
             similar_errors = []
-            try:
-                # Find similar errors in vector DB
-                error_embedding = llm_client.get_embeddings([error_context["full_error"]])[0]
-                similar_errors = vector_store.search("error_examples", error_embedding, limit=3)
-            except Exception as e:
-                print(f"Vector search error (non-critical): {e}")
+            if vector_store is not None and os.getenv("SKIP_VECTOR_SEARCH", "").lower() != "true":
+                try:
+                    # Find similar errors in vector DB
+                    error_embedding = llm_client.get_embeddings([error_context["full_error"]])[0]
+                    similar_errors = vector_store.search("error_examples", error_embedding, limit=3)
+                except Exception as e:
+                    print(f"Vector search error (non-critical): {e}")
             
             # Generate fix prompt
             fix_examples = ""
@@ -257,15 +270,26 @@ async def compile_and_fix_rust(request: dict):
         for filename, content in current_files.items():
             output_text += f"[filename: {filename}]\n{content}\n\n"
         
+        # Add explanation for build failure
+        output_text += "\n# Build failed\n"
+        output_text += f"\n# Note: The build failed after {max_attempts} fix attempts. Common reasons include:\n"
+        output_text += "# - Complex syntax errors that are difficult to fix automatically\n"
+        output_text += "# - Dependencies that cannot be resolved\n"
+        output_text += "# - Logical errors in the code structure\n"
+        if len(attempts) > 0:
+            output_text += f"# The final error was: {attempts[-1]['output'].splitlines()[0] if attempts[-1]['output'] else 'Unknown error'}\n"
+        
         # If we've exhausted all attempts, return error
         return JSONResponse(content={
-            "status": "error",
-            "message": f"Failed to fix code: {attempts[-1]['output']}",
+            "status": "failed",
+            "message": f"Failed to fix code after {max_attempts} attempts",
             "attempts": attempts,
             "combined_text": output_text.strip(),
-            "final_files": current_files
+            "files": current_files,
+            "build_output": attempts[-1]['output'] if attempts else "No compilation attempts were made",
+            "build_success": False
         })
-
+        
 async def handle_project_generation(
     project_id: str, 
     project_dir: str, 
@@ -391,7 +415,7 @@ async def handle_project_generation(
                 fix_examples = "Here are some examples of similar errors and their fixes:\n\n"
                 for i, err in enumerate(similar_errors):
                     fix_examples += f"Example {i+1}:\n{err['error']}\nFix: {err['solution']}\n\n"
-            
+        
             fix_prompt = f"""
 Here is a Rust project that failed to compile. Help me fix the compilation errors.
 
@@ -592,7 +616,6 @@ async def generate_project_sync(request: ProjectRequest):
                 })
                 
                 # DON'T save status here - remove this line
-                # save_status(temp_dir, status)
                 
                 # Extract error context
                 error_context = compiler.extract_error_context(output)
@@ -654,32 +677,26 @@ async def generate_project_sync(request: ProjectRequest):
                 except Exception as e:
                     print(f"Error reading file {f}: {e}")
             
-            if success:
-                # Project compiled successfully
-                status.update({
-                    "status": "completed",
-                    "message": "Project generated successfully",
-                    "build_output": output
-                })
-                
-                # Add build status
-                all_files_content += "\n# Build succeeded\n"
-            else:
-                # Build failed
-                status.update({
-                    "status": "failed",
-                    "message": "Failed to generate working project",
-                    "build_output": output
-                })
-                
-                # Add build status
-                all_files_content += "\n# Build failed\n"
-            
-            # DON'T save status here - remove this line
-            # save_status(temp_dir, status)
+            # Add build status to the combined text
+            all_files_content += "\n# Build " + ("succeeded" if success else "failed") + "\n"
             
-            # Return the response while still inside the with block
-            return PlainTextResponse(content=all_files_content)
+            # Add explanation when build fails
+            if not success:
+                all_files_content += f"\n# Note: The build failed because of errors in the generated code. Common reasons include:\n"
+                all_files_content += "# - Incorrect or non-existent crate versions specified in Cargo.toml\n"
+                all_files_content += "# - Improper API usage in the generated code\n"
+                all_files_content += "# - Missing or incompatible dependencies\n"
+                all_files_content += f"# The specific error was: {output.splitlines()[0] if output else 'Unknown error'}\n"
+            
+            # Return JSON response instead of plain text
+            return JSONResponse(content={
+                "status": "success" if success else "failed",
+                "message": "Project generated successfully" if success else "Failed to generate working project",
+                "combined_text": all_files_content.strip(),
+                "files": {f: open(os.path.join(temp_dir, f), 'r').read() for f in file_paths if os.path.exists(os.path.join(temp_dir, f))},
+                "build_output": output,
+                "build_success": success
+            })
                 
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error generating project: {str(e)}")
diff --git a/app/response_parser.py b/app/response_parser.py
@@ -12,40 +12,120 @@ def parse_response(self, response: str) -> Dict[str, str]:
         """Parse response into a dictionary of files"""
         files = {}
         
-        # Pattern to match both [filename: path] and filename: path formats
-        file_matches = re.findall(r'\[filename: (.+?)\]|\*\*(.+?):\*\*|```(.+?)\s', response, re.DOTALL)
-        content_blocks = re.split(r'\[filename: .+?\]|\*\*(.+?):\*\*|```(.+?)\s', response)
+        # First, try to extract using explicit filename markers
+        file_blocks = re.findall(r'\[filename:\s*(.*?)\](.*?)(?=\[filename:|$)', response, re.DOTALL)
+        if file_blocks:
+            for filename, content in file_blocks:
+                # Clean the filename and content
+                clean_filename = filename.strip()
+                # Remove leading/trailing backticks and language identifiers from content
+                clean_content = self._clean_code_block(content)
+                
+                if clean_filename and clean_content:
+                    files[clean_filename] = clean_content
         
-        # Clean up content blocks
-        cleaned_blocks = []
-        for block in content_blocks:
-            if block and block.strip():
-                # Find code block content
-                code_match = re.search(r'```(?:\w+)?\s*(.*?)```', block, re.DOTALL)
-                if code_match:
-                    cleaned_blocks.append(code_match.group(1).strip())
-                else:
-                    cleaned_blocks.append(block.strip())
-        
-        # Match filenames with content
-        for i, match in enumerate(file_matches):
-            if i < len(cleaned_blocks):
-                # Find first non-empty group in the match
-                filename = next((name for name in match if name), "").strip()
-                if filename and not filename.startswith("```"):
-                    files[filename] = cleaned_blocks[i]
-        
-        # If nothing was parsed but there are code blocks, try simpler parsing
+        # If no files found with explicit markers, try to identify standard Rust project files
         if not files:
+            # Look for code blocks and try to identify their file type by content
             code_blocks = re.findall(r'```(?:\w+)?\s*(.*?)```', response, re.DOTALL)
-            file_headers = re.findall(r'(?:^|\n)#+\s*(.+\.rs|Cargo\.toml|README\.md)', response)
             
-            if len(code_blocks) == len(file_headers):
-                for i, header in enumerate(file_headers):
-                    files[header.strip()] = code_blocks[i].strip()
+            cargo_toml = None
+            main_rs = None
+            readme_md = None
+            
+            for block in code_blocks:
+                clean_block = block.strip()
+                if "[package]" in clean_block and "name =" in clean_block and "version =" in clean_block:
+                    cargo_toml = clean_block
+                elif "fn main()" in clean_block:
+                    main_rs = clean_block
+                elif clean_block.startswith("# ") or "# " in clean_block[:20]:
+                    readme_md = clean_block
+            
+            if cargo_toml:
+                files["Cargo.toml"] = cargo_toml
+            if main_rs:
+                files["src/main.rs"] = main_rs
+            if readme_md:
+                files["README.md"] = readme_md
+        
+        # If still no files found, use a more aggressive approach to extract content
+        if not files:
+            # Last resort: extract based on common patterns in the response
+            if "Cargo.toml" in response:
+                cargo_section = self._extract_section(response, "Cargo.toml")
+                if cargo_section:
+                    files["Cargo.toml"] = cargo_section
+            
+            if "main.rs" in response:
+                main_section = self._extract_section(response, "main.rs")
+                if main_section:
+                    files["src/main.rs"] = main_section
+            
+            if "README" in response:
+                readme_section = self._extract_section(response, "README")
+                if readme_section:
+                    files["README.md"] = readme_section
+        
+        # Ensure we don't have language identifiers as filenames
+        common_lang_identifiers = ["toml", "rust", "markdown", "bash"]
+        for lang in common_lang_identifiers:
+            if lang in files and len(files[lang]) < 100:  # Only remove if it's short (likely a lang identifier)
+                del files[lang]
+        
+        # Ensure we have the essential files with proper content
+        if not files.get("Cargo.toml") or not files.get("src/main.rs"):
+            # Create fallback files if missing
+            if not files.get("Cargo.toml"):
+                files["Cargo.toml"] = """[package]
+name = "rust_project"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+"""
+            
+            if not files.get("src/main.rs"):
+                files["src/main.rs"] = """fn main() {
+    println!("Hello, world!");
+}
+"""
         
         return files
     
+    def _clean_code_block(self, text: str) -> str:
+        """Clean code blocks by removing backticks and language identifiers"""
+        # Remove leading/trailing whitespace
+        text = text.strip()
+        
+        # Remove triple backticks and language identifier at start
+        text = re.sub(r'^```\w*\s*', '', text)
+        
+        # Remove trailing triple backticks
+        text = re.sub(r'\s*```$', '', text)
+        
+        return text
+    
+    def _extract_section(self, response: str, identifier: str) -> str:
+        """Extract a section from the response based on an identifier"""
+        parts = response.split(identifier)
+        if len(parts) < 2:
+            return ""
+        
+        # Get the part after the identifier
+        section = parts[1]
+        
+        # Find the next code block or section
+        next_section = re.search(r'(\[filename:|```\w*)', section)
+        if next_section:
+            section = section[:next_section.start()]
+        
+        # Clean up the section
+        section = re.sub(r'^[^\w]*', '', section, flags=re.DOTALL)  # Remove non-word chars at start
+        section = section.strip()
+        
+        return section
+    
     def write_files(self, files: Dict[str, str], project_dir: str) -> List[str]:
         """Write files to the project directory"""
         file_paths = []