diff --git a/CLAUDE.md b/CLAUDE.md index c0beba0..b480a57 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,9 +11,10 @@ Nova is an AI research and personal assistant written in Python that provides: - Chat history saved to markdown files - **Multi-provider AI integration** (OpenAI, Anthropic, Ollama) - **Custom prompt templating system** with built-in templates and user-defined prompts +- **Enhanced web search with intelligent query optimization** using YAKE keyword extraction and semantic analysis - Modular architecture for extensibility -**Current Status:** Phase 4 complete (Tools Integration), supports OpenAI, Anthropic, and Ollama with custom prompt templates and comprehensive tools system with profile-based configuration. +**Current Status:** Enhanced Search implemented with intelligent query optimization, supports OpenAI, Anthropic, and Ollama with custom prompt templates and comprehensive tools system with profile-based configuration. ## Package Management Commands @@ -59,6 +60,87 @@ Use these commands: - Override specific settings per profile (permission mode, enabled modules, etc.) - Use "Global" or "Custom" tools configuration per profile +## Enhanced Web Search Commands + +Nova includes intelligent search with context-aware query enhancement through both tools and chat commands. + +### /search Command (Enhanced) + +**Basic Usage:** +```bash +/search # Uses your configured default enhancement +/s # Short form +``` + +**Advanced Usage:** +```bash +/search --enhancement fast # YAKE keyword extraction (~50ms) +/search --enhancement semantic # KeyBERT semantic analysis (~200-500ms) +/search --enhancement hybrid # Combined YAKE + KeyBERT (~300-600ms) +/search --enhancement disabled # Direct search without enhancement + +/search --provider google --max 3 # Existing options still work +/search --technical-level expert # Adjust query complexity +/search --timeframe recent # Prefer recent results +``` + +### Tool Usage (Alternative) + +```bash +/tool web_search query="Python async programming" enhancement="fast" +/tool web_search query="machine learning deployment" enhancement="semantic" +``` + +### Search Enhancement Modes + +- **auto**: Automatically choose best enhancement (YAKE + context) +- **disabled**: No enhancement, direct search +- **fast**: YAKE-only enhancement (~50ms) - **Default** +- **semantic**: KeyBERT semantic analysis (~200-500ms, requires additional dependencies) +- **hybrid**: Combined YAKE + KeyBERT (~300-600ms) + +### Search Enhancement Configuration + +Configure default search behavior in your configuration file: + +```yaml +search: + # Basic search settings + enabled: true + default_provider: "duckduckgo" + max_results: 5 + use_ai_answers: true + + # Enhancement defaults (users can override per search) + default_enhancement: "fast" # auto, disabled, fast, semantic, hybrid + enable_conversation_context: true # Use chat history for context + default_technical_level: "intermediate" # beginner, intermediate, expert + default_timeframe: "any" # recent, past_year, any + + # Performance settings + performance_mode: true # Prioritize speed over accuracy + enhancement_timeout: 30.0 # Query enhancement timeout (seconds) + request_timeout: 10.0 # HTTP request timeout (seconds) + + # Advanced: Enable semantic analysis (optional) + enable_keybert: false # Set to true for KeyBERT + extraction_backend: "yake_only" # yake_only, keybert_only, hybrid +``` + +### Performance Guidance + +- Use **fast** or default for most queries (optimal speed/accuracy balance) +- Use **semantic** for complex technical topics or research (requires: `uv add keybert sentence-transformers`) +- Use **disabled** for exact phrase searches or when speed is critical +- The system automatically uses conversation context to improve search relevance + +### Timeout Configuration + +- **enhancement_timeout**: Controls how long the AI-powered query enhancement phase can take before falling back to the original query (default: 30 seconds) +- **request_timeout**: Sets the HTTP timeout for individual search engine requests (default: 10 seconds) +- If query enhancement times out, the search will continue with the original query and display a warning +- Increase `enhancement_timeout` if you have slow AI responses but want more thorough enhancement + ## Testing Commands - Run all tests: `uv run pytest` diff --git a/SEARCH_ENHANCEMENT_SIMPLIFICATION_PLAN.md b/SEARCH_ENHANCEMENT_SIMPLIFICATION_PLAN.md new file mode 100644 index 0000000..3a8d21a --- /dev/null +++ b/SEARCH_ENHANCEMENT_SIMPLIFICATION_PLAN.md @@ -0,0 +1,98 @@ +# Search Enhancement Simplification Plan + +## Current Problems - Brittleness Analysis + +### 1. **Over-Complex Pipeline Architecture** +- Three-stage pipeline: NLP extraction → LLM planning → JSON execution +- Each stage can fail independently, requiring complex error handling +- Far too complex for basic search query enhancement + +### 2. **Multiple Fragile Dependencies** +- spaCy (requires model download: `en_core_web_sm`) +- YAKE keyword extraction +- KeyBERT + sentence-transformers (optional but complex) +- Multiple fallback chains when dependencies fail + +### 3. **Brittle LLM JSON Parsing** +- Relies on LLM returning perfect JSON format +- Complex parsing logic to handle markdown code blocks +- Single malformed response breaks the entire enhancement +- JSON schema validation adds unnecessary complexity + +### 4. **Configuration Explosion** +- 6 enhancement modes (auto, disabled, fast, semantic, hybrid, adaptive) +- 4 extraction backends (yake_only, keybert_only, hybrid, adaptive) +- Technical levels, timeframes, performance modes, timeout configs +- Each combination can behave differently and fail in unique ways + +### 5. **Cascading Failure Points** +``` +spaCy fails → KeyBERT fails → AI client fails → LLM timeouts → JSON parsing fails +``` +Each failure requires its own fallback logic, creating maintenance nightmare + +### 6. **Heavy Resource Overhead** +- Loading multiple ML models on startup +- Complex caching mechanisms +- Multiple AI API calls per search + +## Proposed Simpler, More Resilient Design + +### **Single-Step Approach** +Replace entire pipeline with: +1. Simple prompt: "Suggest 2-3 alternative search queries for: {original_query}" +2. Parse response as plain text (not JSON) +3. Use original query if anything fails + +### **Eliminate Dependencies** +- Remove spaCy, YAKE, KeyBERT entirely +- Use basic string processing if keyword extraction needed +- Let the AI handle all intelligence + +### **Two-Mode Configuration** +- `enhanced`: Use AI to suggest alternatives (default) +- `disabled`: Use original query only +- Remove all other complexity + +### **Robust Fallback** +```python +try: + enhanced_queries = await simple_ai_enhance(query) +except: + enhanced_queries = [query] # Always fallback to original +``` + +## Key Benefits of Simplified Approach + +1. **Massive reduction in complexity** - 90% less code +2. **Fewer failure points** - Single try/catch vs cascading failures +3. **No external ML dependencies** - Just use existing AI client +4. **Easier to debug and maintain** +5. **More predictable behavior** +6. **Faster startup time** - No model loading +7. **Better resource usage** - No background ML processes + +## Implementation Strategy + +1. Create new simple enhancement module alongside existing one +2. Add feature flag to switch between old/new systems +3. Test new system thoroughly +4. Gradually migrate users to new system +5. Remove old complex system once proven + +## Files to Modify/Remove + +**Remove entirely:** +- `nova/search/enhancement/extractors.py` +- `nova/search/enhancement/classifier.py` +- Most of `nova/search/enhancement/enhancer.py` + +**Simplify:** +- `nova/search/models.py` - Remove complex models +- `nova/tools/built_in/web_search.py` - Simplify parameters +- Configuration - Reduce options to just `enhanced`/`disabled` + +**Create:** +- `nova/search/enhancement/simple_enhancer.py` - New minimal implementation + +The current system is a classic over-engineering case - the complexity far exceeds the value delivered. A much simpler approach would be more reliable, maintainable, and actually more resilient. diff --git a/config/default.yaml b/config/default.yaml index 2c150ed..d3ecea1 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -15,28 +15,28 @@ profiles: max_tokens: 2000 temperature: 0.7 # api_key will be set via environment variables - + gpt4: name: "gpt4" provider: "openai" model_name: "gpt-4" max_tokens: 4000 temperature: 0.7 - + claude: name: "claude" provider: "anthropic" model_name: "claude-sonnet-4-20250514" max_tokens: 4000 temperature: 0.7 - + claude-opus: name: "claude-opus" provider: "anthropic" model_name: "claude-opus-4-20250514" max_tokens: 4000 temperature: 0.7 - + llama: name: "llama" provider: "ollama" @@ -48,6 +48,44 @@ profiles: # Active profile (defaults to "default" if not specified) active_profile: "default" +# Enhanced Web Search Configuration +search: + enabled: true + default_provider: "duckduckgo" + max_results: 5 + use_ai_answers: true + + # Enhancement Configuration + default_enhancement: "fast" # auto, disabled, fast, semantic, hybrid + enable_conversation_context: true # Use chat history for context + context_messages_count: 5 # Number of recent messages to use + default_technical_level: "intermediate" # beginner, intermediate, expert + default_timeframe: "any" # recent, past_year, any + + # Performance Settings + performance_mode: true # Prioritize speed over accuracy + enhancement_timeout: 30.0 # Query enhancement timeout (seconds) + request_timeout: 10.0 # HTTP request timeout (seconds) + + # Keyword Extraction Configuration + extraction_backend: "yake_only" # yake_only, keybert_only, hybrid, adaptive + enable_keybert: false # Set to true for KeyBERT (requires optional deps) + yake_max_keywords: 10 + keybert_max_keywords: 6 + keybert_model: "all-MiniLM-L6-v2" + + # Provider API configurations + google: {} # Add api_key and search_engine_id + bing: {} # Add api_key + +# Tools configuration +tools: + enabled: true + enabled_built_in_modules: ["file_ops", "web_search", "conversation"] + permission_mode: "prompt" + execution_timeout: 30 + max_concurrent_tools: 3 + # Memory Management Features: # - Conversation summarization for long chats (/summarize command) # - Smart context optimization with token limit awareness @@ -61,4 +99,8 @@ active_profile: "default" # - OPENAI_API_KEY: OpenAI-specific API key # - ANTHROPIC_API_KEY: Anthropic-specific API key # - NOVA_PROFILE: Override active profile -# - OLLAMA_HOST: Ollama server URL override \ No newline at end of file +# - OLLAMA_HOST: Ollama server URL override + +# For Enhanced Search with KeyBERT (optional): +# Install semantic search dependencies: uv add --optional search-semantic +# Set search.enable_keybert: true and search.extraction_backend: "hybrid" diff --git a/nova/core/ai_client.py b/nova/core/ai_client.py index 30970fb..93037bf 100644 --- a/nova/core/ai_client.py +++ b/nova/core/ai_client.py @@ -46,7 +46,7 @@ def __init__(self, config: AIModelConfig, function_registry=None): @abstractmethod async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> str: """Generate a response from the AI model""" - pass + print(messages) @abstractmethod async def generate_response_stream( @@ -116,6 +116,10 @@ async def list_models(self) -> list[str]: """List available models for this provider""" pass + async def close(self): + """Close the AI client and clean up resources""" + pass + class OpenAIClient(BaseAIClient): """OpenAI API client""" @@ -127,7 +131,7 @@ def __init__(self, config: AIModelConfig, function_registry=None): import openai self.client = openai.AsyncOpenAI( - api_key=config.api_key, base_url=config.base_url + api_key=config.api_key, base_url=config.base_url, timeout=config.timeout ) except ImportError: raise AIError("OpenAI library not installed. Install with: uv add openai") @@ -142,6 +146,7 @@ def validate_config(self) -> bool: async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> str: """Generate response using OpenAI API""" try: + print(messages) response = await self.client.chat.completions.create( model=self.config.model_name, messages=messages, @@ -149,6 +154,7 @@ async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> s temperature=self.config.temperature, **kwargs, ) + print(response) return response.choices[0].message.content except Exception as e: @@ -290,6 +296,8 @@ def _handle_api_error(self, error: Exception) -> None: """Convert OpenAI errors to our standard errors""" import openai + print(error) + if isinstance(error, openai.RateLimitError): raise AIRateLimitError(f"OpenAI rate limit exceeded: {error}") elif isinstance(error, openai.AuthenticationError): @@ -299,6 +307,14 @@ def _handle_api_error(self, error: Exception) -> None: else: raise AIError(f"OpenAI API error: {error}") + async def close(self): + """Close the OpenAI client and clean up resources""" + try: + await self.client.close() + logger.debug("OpenAI client closed successfully") + except Exception as e: + logger.warning(f"Error closing OpenAI client: {e}") + class AnthropicClient(BaseAIClient): """Anthropic API client""" @@ -310,7 +326,7 @@ def __init__(self, config: AIModelConfig, function_registry=None): import anthropic self.client = anthropic.AsyncAnthropic( - api_key=config.api_key, base_url=config.base_url + api_key=config.api_key, base_url=config.base_url, timeout=config.timeout ) except ImportError: raise AIError( @@ -401,6 +417,14 @@ def _handle_api_error(self, error: Exception) -> None: else: raise AIError(f"Anthropic API error: {error}") + async def close(self): + """Close the Anthropic client and clean up resources""" + try: + await self.client.close() + logger.debug("Anthropic client closed successfully") + except Exception as e: + logger.warning(f"Error closing Anthropic client: {e}") + class OllamaClient(BaseAIClient): """Ollama API client for local models""" @@ -425,14 +449,17 @@ def validate_config(self) -> bool: async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> str: """Generate response using Ollama API""" try: - response = await self.client.chat( - model=self.config.model_name, - messages=messages, - options={ - "temperature": self.config.temperature, - "num_predict": self.config.max_tokens, - }, - **kwargs, + response = await asyncio.wait_for( + self.client.chat( + model=self.config.model_name, + messages=messages, + options={ + "temperature": self.config.temperature, + "num_predict": self.config.max_tokens, + }, + **kwargs, + ), + timeout=self.config.timeout, ) return response["message"]["content"] @@ -444,15 +471,18 @@ async def generate_response_stream( ) -> AsyncGenerator[str, None]: """Generate streaming response using Ollama API""" try: - stream = await self.client.chat( - model=self.config.model_name, - messages=messages, - options={ - "temperature": self.config.temperature, - "num_predict": self.config.max_tokens, - }, - stream=True, - **kwargs, + stream = await asyncio.wait_for( + self.client.chat( + model=self.config.model_name, + messages=messages, + options={ + "temperature": self.config.temperature, + "num_predict": self.config.max_tokens, + }, + stream=True, + **kwargs, + ), + timeout=self.config.timeout, ) async for chunk in stream: @@ -481,6 +511,17 @@ def _handle_api_error(self, error: Exception) -> None: else: raise AIError(f"Ollama API error: {error}") + async def close(self): + """Close the Ollama client and clean up resources""" + try: + if hasattr(self.client, "_client") and hasattr( + self.client._client, "close" + ): + await self.client._client.close() + logger.debug("Ollama client closed successfully") + except Exception as e: + logger.warning(f"Error closing Ollama client: {e}") + def create_ai_client(config: AIModelConfig, function_registry=None) -> BaseAIClient: """Factory function to create appropriate AI client""" diff --git a/nova/core/chat.py b/nova/core/chat.py index 9373055..f3b9c4d 100644 --- a/nova/core/chat.py +++ b/nova/core/chat.py @@ -13,7 +13,6 @@ from nova.core.input_handler import ChatInputHandler from nova.core.memory import MemoryManager from nova.core.prompts import PromptManager -from nova.core.search import SearchError, search_web from nova.core.tools import FunctionRegistry from nova.models.config import NovaConfig from nova.models.message import Conversation, MessageRole @@ -22,7 +21,6 @@ print_error, print_info, print_message, - print_search_results, print_success, print_warning, ) @@ -289,9 +287,6 @@ def _handle_command(self, command: str, session: ChatSession) -> None: print(" /tag - Add tag to conversation") print(" /tags - Show conversation tags") print(" /search, /s - Search the web and get AI-powered answers") - print( - " /search --provider - Search with specific provider" - ) print(" /search --max - Limit number of results") print(" /prompt - Apply a prompt template") print(" /prompts - List available prompt templates") @@ -422,12 +417,13 @@ def _handle_command(self, command: str, session: ChatSession) -> None: print_info("Type '/help' for available commands") def _handle_search_command(self, search_args: str, session: ChatSession) -> None: - """Handle web search command and generate AI response""" + """Handle enhanced web search command using the web_search tool""" if not search_args: print_error("Please provide a search query") print_info( - "Usage: /search [--provider ] [--max ]" + "Usage: /search [--max ] [--enhancement ] [--technical-level ] [--timeframe