diff --git a/Cargo.toml b/Cargo.toml index ada9402f..3153b042 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ tree-sitter-c-sharp = { version = "0.23.1" } tree-sitter-html = "0.23.2" tree-sitter-md = "0.3.2" tree-sitter-yaml = "0.6.1" +tree-sitter-solidity = "=1.2.10" ast-grep-core = "0.36.1" ast-grep-language = "0.36.1" clap = { version = "4.3", features = ["derive"] } diff --git a/README.md b/README.md index 80c6e26a..8fbfec03 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ npx -y @probelabs/probe@latest agent "Refactor the login function" --allow-edit - **Smart Ranking**: BM25, TF-IDF, and hybrid algorithms with optional BERT reranking - **Token-Aware**: `--max-tokens` budget, session-based dedup to avoid repeating context - **Built-in Agent**: Multi-provider (Anthropic, OpenAI, Google, Bedrock) with retry, fallback, and context compaction -- **Multi-Language**: Rust, Python, JavaScript, TypeScript, Go, C/C++, Java, Ruby, PHP, Swift, C#, and more +- **Multi-Language**: Rust, Python, JavaScript, TypeScript, Go, C/C++, Java, Ruby, PHP, Swift, Solidity, C#, and more --- @@ -468,6 +468,7 @@ cargo install --path . | Ruby | `.rb` | | PHP | `.php` | | Swift | `.swift` | +| Solidity | `.sol` | | C# | `.cs` | | Markdown | `.md` | diff --git a/docs/probe-cli/extraction-reference.md b/docs/probe-cli/extraction-reference.md index 2c3d5843..928f8f1c 100644 --- a/docs/probe-cli/extraction-reference.md +++ b/docs/probe-cli/extraction-reference.md @@ -177,6 +177,7 @@ Probe supports extraction for many other languages including: - **Ruby**: Methods, classes, modules, blocks - **PHP**: Functions, classes, namespaces, attributes - **Swift**: Functions, classes, structs, protocols, extensions +- **Solidity**: Contracts, interfaces, libraries, functions, modifiers, events, errors - **C#**: Methods, classes, interfaces, namespaces, attributes - **Markdown**: Sections, code blocks, lists, tables, frontmatter @@ -433,4 +434,4 @@ probe extract src/api.js:27 --context 10 - **INTELLIGENT FALLBACKS**: Gracefully handles cases where AST parsing isn't possible For more information on how Probe works internally, see [How Probe Works](how-it-works.md). -For details on search capabilities, see [Search Functionality](search-functionality.md). \ No newline at end of file +For details on search capabilities, see [Search Functionality](search-functionality.md). diff --git a/docs/probe-cli/query.md b/docs/probe-cli/query.md index 478dd892..b6efc2fb 100644 --- a/docs/probe-cli/query.md +++ b/docs/probe-cli/query.md @@ -138,6 +138,7 @@ Required for accurate parsing: | Ruby | `ruby`, `rb` | .rb | | PHP | `php` | .php | | Swift | `swift` | .swift | +| Solidity | `solidity`, `sol` | .sol | | C# | `csharp`, `cs` | .cs | ```bash diff --git a/docs/probe-cli/search.md b/docs/probe-cli/search.md index 18093942..41036373 100644 --- a/docs/probe-cli/search.md +++ b/docs/probe-cli/search.md @@ -208,7 +208,7 @@ probe search "api" ./ --reranker ms-marco-tinybert --question "How is the REST A | `-l`, `--language` | String | auto | Limit to programming language | **Supported Languages:** -`rust`, `javascript`, `typescript`, `python`, `go`, `c`, `cpp`, `java`, `ruby`, `php`, `swift`, `csharp`, `yaml`, `html`, `markdown` +`rust`, `javascript`, `typescript`, `python`, `go`, `c`, `cpp`, `java`, `ruby`, `php`, `swift`, `solidity`, `csharp`, `yaml`, `html`, `markdown` ```bash # Search only Python files diff --git a/docs/reference/adding-languages.md b/docs/reference/adding-languages.md index 168b02e8..1b3e1398 100644 --- a/docs/reference/adding-languages.md +++ b/docs/reference/adding-languages.md @@ -95,7 +95,7 @@ Add to language completion lists (2 locations): ### File 10: `npm/src/mcp/index.ts` Add to supported languages description: ```rust -'Supported languages: rust, javascript, typescript, python, go, c, cpp, java, ruby, php, swift, csharp, yourlang.', +'Supported languages: rust, javascript, typescript, python, go, c, cpp, java, ruby, php, swift, solidity, csharp, yourlang.', ``` ## Step 4: Documentation diff --git a/docs/reference/architecture.md b/docs/reference/architecture.md index 5c54cdd5..3324f7b0 100644 --- a/docs/reference/architecture.md +++ b/docs/reference/architecture.md @@ -104,7 +104,7 @@ pub trait LanguageImpl { } ``` -**Supported:** Rust, JavaScript, TypeScript, Python, Go, C, C++, Java, Ruby, PHP, Swift, C#, HTML, Markdown, YAML +**Supported:** Rust, JavaScript, TypeScript, Python, Go, C, C++, Java, Ruby, PHP, Swift, Solidity, C#, HTML, Markdown, YAML ### Performance Optimizations @@ -418,4 +418,3 @@ agent.events.on('toolCall', (event) => { - [API Reference](../probe-agent/sdk/api-reference.md) - SDK API - [MCP Protocol](../probe-agent/protocols/mcp.md) - MCP integration - [Performance](../probe-cli/performance.md) - Optimization guide - diff --git a/docs/reference/faq.md b/docs/reference/faq.md index 43978c95..1d6c7b06 100644 --- a/docs/reference/faq.md +++ b/docs/reference/faq.md @@ -62,7 +62,7 @@ Yes, Probe is open source under the Apache 2.0 license. ### What languages does Probe support? -Probe supports 15+ languages including Rust, JavaScript, TypeScript, Python, Go, C, C++, Java, Ruby, PHP, Swift, C#, HTML, Markdown, and YAML. See [Supported Languages](../supported-languages.md). +Probe supports 15+ languages including Rust, JavaScript, TypeScript, Python, Go, C, C++, Java, Ruby, PHP, Swift, Solidity, C#, HTML, Markdown, and YAML. See [Supported Languages](../supported-languages.md). --- diff --git a/docs/reference/language-support.md b/docs/reference/language-support.md index 92015cdf..c49686d6 100644 --- a/docs/reference/language-support.md +++ b/docs/reference/language-support.md @@ -107,6 +107,7 @@ Probe currently supports a wide range of programming languages, including: - Ruby - PHP - Swift +- Solidity - C# - Markdown @@ -165,4 +166,4 @@ Tree-sitter provides several advantages for Probe: - [Supported Languages](/supported-languages): Detailed information about each supported language - [Adding New Languages](/adding-languages): Guide for adding support for new languages - [Search Functionality](/search-functionality): How to use Probe's search features -- [Code Extraction](/code-extraction): Details on how Probe extracts code blocks \ No newline at end of file +- [Code Extraction](/code-extraction): Details on how Probe extracts code blocks diff --git a/docs/reference/supported-languages.md b/docs/reference/supported-languages.md index de34e9d1..b76d8783 100644 --- a/docs/reference/supported-languages.md +++ b/docs/reference/supported-languages.md @@ -18,6 +18,7 @@ Probe provides language-aware code search and extraction for a wide range of pro | Ruby | `.rb` | ✅ | ✅ | | PHP | `.php` | ✅ | ✅ | | Swift | `.swift` | ✅ | ✅ | +| Solidity | `.sol` | ✅ | ✅ | | C# | `.cs` | ✅ | ✅ | | Markdown | `.md`, `.markdown` | ✅ | ✅ | | YAML | `.yaml`, `.yml` | ✅ | ✅ | @@ -128,6 +129,14 @@ Go also implements special handling for nested struct types. - **Attribute Handling**: Properly handles C# attributes - **Test Detection**: Identifies test classes and methods using NUnit, MSTest, or xUnit conventions +### Solidity + +- **Contract Extraction**: Extracts contracts, interfaces, and libraries +- **Function Extraction**: Extracts functions, constructors, fallback/receive functions, and modifiers +- **Type Extraction**: Extracts structs, enums, user-defined value types, events, and custom errors +- **Comment Handling**: Associates NatSpec and regular comments with Solidity code blocks +- **Test Detection**: Identifies Foundry-style `.t.sol` files, `*Test.sol` contracts, `setUp`, `test*`, and `invariant_*` functions + ### Markdown - **Section Extraction**: Extracts complete sections based on headings @@ -212,4 +221,4 @@ func ($RECEIVER $TYPE) $NAME($$$PARAMS) $$$BODY ## Adding Support for New Languages -If you're interested in adding support for a language that's not currently supported, see the [Adding New Languages](/adding-languages) page for a detailed guide. \ No newline at end of file +If you're interested in adding support for a language that's not currently supported, see the [Adding New Languages](/adding-languages) page for a detailed guide. diff --git a/lsp-daemon/Cargo.toml b/lsp-daemon/Cargo.toml index 35dbcb20..1f4880d2 100644 --- a/lsp-daemon/Cargo.toml +++ b/lsp-daemon/Cargo.toml @@ -81,6 +81,7 @@ tree-sitter-c-sharp = "0.23.1" tree-sitter-ruby = "0.23.1" tree-sitter-swift = "0.7.0" tree-sitter-php = "0.23.11" +tree-sitter-solidity = "=1.2.10" [target.'cfg(unix)'.dependencies] libc = "0.2" diff --git a/lsp-daemon/src/analyzer/tree_sitter_analyzer.rs b/lsp-daemon/src/analyzer/tree_sitter_analyzer.rs index 3b762d73..03bbfd80 100644 --- a/lsp-daemon/src/analyzer/tree_sitter_analyzer.rs +++ b/lsp-daemon/src/analyzer/tree_sitter_analyzer.rs @@ -31,6 +31,7 @@ fn extension_to_language_name(extension: &str) -> Option<&'static str> { "php" => Some("php"), "swift" => Some("swift"), "cs" => Some("csharp"), + "sol" => Some("solidity"), _ => None, } } @@ -98,6 +99,7 @@ impl ParserPool { "java" => Some(tree_sitter_java::LANGUAGE), "c" => Some(tree_sitter_c::LANGUAGE), "cpp" | "c++" | "cxx" => Some(tree_sitter_cpp::LANGUAGE), + "solidity" | "sol" => Some(tree_sitter_solidity::LANGUAGE), _ => None, }; diff --git a/lsp-daemon/src/daemon.rs b/lsp-daemon/src/daemon.rs index e1e8a0b8..85e6bba5 100644 --- a/lsp-daemon/src/daemon.rs +++ b/lsp-daemon/src/daemon.rs @@ -7217,6 +7217,12 @@ impl LspDaemon { .ok()?; Some(()) } + "sol" => { + parser + .set_language(&tree_sitter_solidity::LANGUAGE.into()) + .ok()?; + Some(()) + } _ => None, }?; diff --git a/lsp-daemon/src/fqn.rs b/lsp-daemon/src/fqn.rs index 539bd31a..4a0eef0e 100644 --- a/lsp-daemon/src/fqn.rs +++ b/lsp-daemon/src/fqn.rs @@ -44,6 +44,7 @@ pub fn get_fqn_from_ast_with_content( "go" => Some(tree_sitter_go::LANGUAGE), "c" => Some(tree_sitter_c::LANGUAGE), "cpp" | "cc" | "cxx" => Some(tree_sitter_cpp::LANGUAGE), + "sol" => Some(tree_sitter_solidity::LANGUAGE), _ => None, }; @@ -360,7 +361,7 @@ fn find_declaration_in_descendants<'a>( fn get_language_separator(extension: &str) -> &str { match extension { "rs" | "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "rb" => "::", - "py" | "js" | "ts" | "jsx" | "tsx" | "java" | "go" | "cs" => ".", + "py" | "js" | "ts" | "jsx" | "tsx" | "java" | "go" | "cs" | "sol" => ".", "php" => "\\", _ => "::", // Default to Rust-style for unknown languages } @@ -381,6 +382,13 @@ fn is_method_node(node: &tree_sitter::Node, extension: &str) -> bool { "java" | "cs" => kind == "method_declaration", "go" => kind == "function_declaration", "cpp" | "cc" | "cxx" => matches!(kind, "function_definition" | "method_declaration"), + "sol" => matches!( + kind, + "function_definition" + | "constructor_definition" + | "modifier_definition" + | "fallback_receive_definition" + ), _ => kind.contains("function") || kind.contains("method"), } } @@ -403,6 +411,14 @@ fn is_namespace_node(node: &tree_sitter::Node, extension: &str) -> bool { kind, "class_specifier" | "struct_specifier" | "namespace_definition" ), + "sol" => matches!( + kind, + "contract_declaration" + | "interface_declaration" + | "library_declaration" + | "struct_declaration" + | "enum_declaration" + ), _ => { // Fallback for unknown languages: try to detect common node types kind.contains("class") || kind.contains("struct") || kind.contains("namespace") diff --git a/lsp-daemon/src/indexing/ast_extractor.rs b/lsp-daemon/src/indexing/ast_extractor.rs index 1edd4ddc..24a50abb 100644 --- a/lsp-daemon/src/indexing/ast_extractor.rs +++ b/lsp-daemon/src/indexing/ast_extractor.rs @@ -697,6 +697,9 @@ impl AstSymbolExtractor { crate::language_detector::Language::Java => Ok(tree_sitter_java::LANGUAGE.into()), crate::language_detector::Language::C => Ok(tree_sitter_c::LANGUAGE.into()), crate::language_detector::Language::Cpp => Ok(tree_sitter_cpp::LANGUAGE.into()), + crate::language_detector::Language::Solidity => { + Ok(tree_sitter_solidity::LANGUAGE.into()) + } _ => Err(anyhow::anyhow!("Unsupported language: {:?}", language)), } } @@ -834,6 +837,20 @@ impl AstSymbolExtractor { "field_declaration" => (SymbolKind::Variable, true), _ => (SymbolKind::Function, false), }, + crate::language_detector::Language::Solidity => match node_kind { + "function_definition" | "fallback_receive_definition" => { + (SymbolKind::Function, true) + } + "constructor_definition" | "modifier_definition" => (SymbolKind::Method, true), + "contract_declaration" | "library_declaration" => (SymbolKind::Class, true), + "interface_declaration" => (SymbolKind::Interface, true), + "struct_declaration" => (SymbolKind::Struct, true), + "enum_declaration" => (SymbolKind::Enum, true), + "event_definition" | "error_declaration" => (SymbolKind::Type, true), + "state_variable_declaration" => (SymbolKind::Variable, true), + "user_defined_type_definition" => (SymbolKind::Type, true), + _ => (SymbolKind::Function, false), + }, _ => { // For other languages, try some common patterns match node_kind { @@ -933,7 +950,17 @@ impl AstSymbolExtractor { } } - None + match node.kind() { + "constructor_definition" => Some("constructor".to_string()), + "fallback_receive_definition" => node.utf8_text(content).ok().map(|text| { + if text.trim_start().starts_with("receive") { + "receive".to_string() + } else { + "fallback".to_string() + } + }), + _ => None, + } } } diff --git a/lsp-daemon/src/indexing/config.rs b/lsp-daemon/src/indexing/config.rs index 983390ca..ebed2a15 100644 --- a/lsp-daemon/src/indexing/config.rs +++ b/lsp-daemon/src/indexing/config.rs @@ -1401,6 +1401,7 @@ fn load_language_configs_from_env() -> Result Result { + features.set_language_feature("extract_contracts".to_string(), true); + features.set_language_feature("extract_events".to_string(), true); + features.set_language_feature("extract_modifiers".to_string(), true); + } _ => {} } @@ -1540,6 +1546,7 @@ fn default_extensions_for_language(language: Language) -> Vec { "hpp".to_string(), "hxx".to_string(), ], + Language::Solidity => vec!["sol".to_string()], _ => vec![], } } @@ -1557,6 +1564,7 @@ impl FromStr for Language { "java" => Ok(Language::Java), "c" => Ok(Language::C), "cpp" | "c++" => Ok(Language::Cpp), + "solidity" | "sol" => Ok(Language::Solidity), _ => Err(anyhow!("Unknown language: {}", s)), } } diff --git a/lsp-daemon/src/indexing/file_detector.rs b/lsp-daemon/src/indexing/file_detector.rs index 34fd4399..446aaf17 100644 --- a/lsp-daemon/src/indexing/file_detector.rs +++ b/lsp-daemon/src/indexing/file_detector.rs @@ -122,6 +122,7 @@ impl Default for DetectionConfig { "rb", "php", "swift", + "sol", "cs", "kt", "scala", @@ -384,9 +385,9 @@ impl FileChangeDetector { // Check if this extension is supported based on our known languages let supported_languages = [ "rs", "js", "jsx", "ts", "tsx", "py", "go", "c", "h", "cpp", "cc", "cxx", "hpp", - "hxx", "java", "rb", "php", "swift", "cs", "kt", "scala", "clj", "ex", "exs", - "erl", "hrl", "hs", "lhs", "ml", "mli", "fs", "fsx", "fsi", "dart", "jl", "r", "R", - "m", "mm", "pl", "pm", "sh", "bash", "zsh", "fish", "lua", "vim", "sql", + "hxx", "java", "rb", "php", "swift", "sol", "cs", "kt", "scala", "clj", "ex", + "exs", "erl", "hrl", "hs", "lhs", "ml", "mli", "fs", "fsx", "fsi", "dart", "jl", + "r", "R", "m", "mm", "pl", "pm", "sh", "bash", "zsh", "fish", "lua", "vim", "sql", ]; if supported_languages.contains(&extension) { diff --git a/lsp-daemon/src/language_detector.rs b/lsp-daemon/src/language_detector.rs index d474f96b..7497c66e 100644 --- a/lsp-daemon/src/language_detector.rs +++ b/lsp-daemon/src/language_detector.rs @@ -20,6 +20,7 @@ pub enum Language { Ruby, Php, Swift, + Solidity, Kotlin, Scala, Haskell, @@ -45,6 +46,7 @@ impl Language { Language::Ruby => "ruby", Language::Php => "php", Language::Swift => "swift", + Language::Solidity => "solidity", Language::Kotlin => "kotlin", Language::Scala => "scala", Language::Haskell => "haskell", @@ -72,6 +74,7 @@ impl Language { "ruby" => Some(Language::Ruby), "php" => Some(Language::Php), "swift" => Some(Language::Swift), + "solidity" | "sol" => Some(Language::Solidity), "kotlin" => Some(Language::Kotlin), "scala" => Some(Language::Scala), "haskell" => Some(Language::Haskell), @@ -146,6 +149,9 @@ impl LanguageDetector { // Swift extension_map.insert("swift".to_string(), Language::Swift); + // Solidity + extension_map.insert("sol".to_string(), Language::Solidity); + // Kotlin extension_map.insert("kt".to_string(), Language::Kotlin); extension_map.insert("kts".to_string(), Language::Kotlin); diff --git a/lsp-daemon/src/lsp_database_adapter.rs b/lsp-daemon/src/lsp_database_adapter.rs index 6acb19fa..c0f29871 100644 --- a/lsp-daemon/src/lsp_database_adapter.rs +++ b/lsp-daemon/src/lsp_database_adapter.rs @@ -822,6 +822,10 @@ impl LspDatabaseAdapter { debug!("[TREE_SITTER] Using tree-sitter-cpp"); Some(tree_sitter_cpp::LANGUAGE.into()) } + "solidity" | "sol" => { + debug!("[TREE_SITTER] Using tree-sitter-solidity"); + Some(tree_sitter_solidity::LANGUAGE.into()) + } "php" => { debug!("[TREE_SITTER] Using tree-sitter-php"); Some(tree_sitter_php::LANGUAGE_PHP.into()) @@ -2400,7 +2404,7 @@ impl LspDatabaseAdapter { fn get_language_separator(extension: &str) -> &str { match extension { "rs" | "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "rb" => "::", - "py" | "js" | "ts" | "jsx" | "tsx" | "java" | "go" | "cs" => ".", + "py" | "js" | "ts" | "jsx" | "tsx" | "java" | "go" | "cs" | "sol" => ".", "php" => "\\", _ => "::", // Default to Rust-style for unknown languages } @@ -2419,6 +2423,13 @@ impl LspDatabaseAdapter { "java" | "cs" => kind == "method_declaration", "go" => kind == "function_declaration", "cpp" | "cc" | "cxx" => matches!(kind, "function_definition" | "method_declaration"), + "sol" => matches!( + kind, + "function_definition" + | "constructor_definition" + | "modifier_definition" + | "fallback_receive_definition" + ), _ => kind.contains("function") || kind.contains("method"), } } @@ -2442,6 +2453,14 @@ impl LspDatabaseAdapter { kind, "class_specifier" | "struct_specifier" | "namespace_definition" ), + "sol" => matches!( + kind, + "contract_declaration" + | "interface_declaration" + | "library_declaration" + | "struct_declaration" + | "enum_declaration" + ), _ => { kind.contains("class") || kind.contains("struct") diff --git a/lsp-daemon/src/lsp_registry.rs b/lsp-daemon/src/lsp_registry.rs index 695d09ba..ab4b854b 100644 --- a/lsp-daemon/src/lsp_registry.rs +++ b/lsp-daemon/src/lsp_registry.rs @@ -503,6 +503,7 @@ impl LspRegistry { "ruby" => Language::Ruby, "php" => Language::Php, "swift" => Language::Swift, + "solidity" => Language::Solidity, "kotlin" => Language::Kotlin, "scala" => Language::Scala, "haskell" => Language::Haskell, diff --git a/lsp-daemon/src/lsp_server.rs b/lsp-daemon/src/lsp_server.rs index 52396998..95865d73 100644 --- a/lsp-daemon/src/lsp_server.rs +++ b/lsp-daemon/src/lsp_server.rs @@ -2345,6 +2345,7 @@ impl LspServer { Some("rb") => "ruby", Some("php") => "php", Some("swift") => "swift", + Some("sol") => "solidity", Some("kt") | Some("kts") => "kotlin", Some("scala") | Some("sc") => "scala", Some("hs") => "haskell", diff --git a/lsp-daemon/src/relationship/tree_sitter_extractor.rs b/lsp-daemon/src/relationship/tree_sitter_extractor.rs index 886225dd..3893749d 100644 --- a/lsp-daemon/src/relationship/tree_sitter_extractor.rs +++ b/lsp-daemon/src/relationship/tree_sitter_extractor.rs @@ -67,6 +67,7 @@ impl RelationshipParserPool { "java" => Some(tree_sitter_java::LANGUAGE), "c" => Some(tree_sitter_c::LANGUAGE), "cpp" | "c++" | "cxx" => Some(tree_sitter_cpp::LANGUAGE), + "solidity" | "sol" => Some(tree_sitter_solidity::LANGUAGE), _ => None, }; diff --git a/lsp-daemon/src/symbol/language_support.rs b/lsp-daemon/src/symbol/language_support.rs index 978df1ef..4b73faf9 100644 --- a/lsp-daemon/src/symbol/language_support.rs +++ b/lsp-daemon/src/symbol/language_support.rs @@ -294,6 +294,43 @@ impl LanguageRules { } } + /// Create rules for Solidity + pub fn solidity() -> Self { + Self { + scope_separator: ".".to_string(), + anonymous_prefix: "anon".to_string(), + supports_overloading: false, + case_sensitive: true, + signature_normalization: SignatureNormalization::RemoveParameterNames, + visibility_affects_uid: false, + default_visibility: "internal".to_string(), + file_extensions: vec!["sol".to_string()], + signature_keywords: vec![ + "contract".to_string(), + "interface".to_string(), + "library".to_string(), + "function".to_string(), + "constructor".to_string(), + "modifier".to_string(), + "event".to_string(), + "error".to_string(), + "public".to_string(), + "external".to_string(), + "internal".to_string(), + "private".to_string(), + "view".to_string(), + "pure".to_string(), + "payable".to_string(), + "virtual".to_string(), + "override".to_string(), + ], + type_aliases: vec![ + ("uint".to_string(), "uint256".to_string()), + ("int".to_string(), "int256".to_string()), + ], + } + } + /// Check if this language supports a specific feature pub fn supports_feature(&self, feature: &str) -> bool { match feature { @@ -483,6 +520,7 @@ impl LanguageRulesFactory { "java" => Some(LanguageRules::java()), "c" => Some(LanguageRules::c()), "cpp" | "c++" | "cxx" => Some(LanguageRules::cpp()), + "solidity" | "sol" => Some(LanguageRules::solidity()), _ => None, } } @@ -498,6 +536,7 @@ impl LanguageRulesFactory { "java".to_string(), "c".to_string(), "cpp".to_string(), + "solidity".to_string(), ] } diff --git a/lsp-daemon/src/symbol/uid_generator.rs b/lsp-daemon/src/symbol/uid_generator.rs index a4d08d99..8061afae 100644 --- a/lsp-daemon/src/symbol/uid_generator.rs +++ b/lsp-daemon/src/symbol/uid_generator.rs @@ -27,6 +27,7 @@ fn extension_to_language_name(extension: &str) -> Option<&'static str> { "php" => Some("php"), "swift" => Some("swift"), "cs" => Some("csharp"), + "sol" => Some("solidity"), _ => None, } } @@ -101,6 +102,7 @@ impl SymbolUIDGenerator { rules.insert("c".to_string(), LanguageRules::c()); rules.insert("cpp".to_string(), LanguageRules::cpp()); rules.insert("c++".to_string(), LanguageRules::cpp()); + rules.insert("solidity".to_string(), LanguageRules::solidity()); rules } diff --git a/lsp-daemon/src/workspace/config.rs b/lsp-daemon/src/workspace/config.rs index 5fedd281..aa8f744f 100644 --- a/lsp-daemon/src/workspace/config.rs +++ b/lsp-daemon/src/workspace/config.rs @@ -243,6 +243,7 @@ impl Default for WorkspaceConfig { "java".to_string(), "c".to_string(), "cpp".to_string(), + "solidity".to_string(), ], git_integration: true, incremental_indexing: true, diff --git a/lsp-daemon/src/workspace/project.rs b/lsp-daemon/src/workspace/project.rs index a51a1a42..2d2be6b7 100644 --- a/lsp-daemon/src/workspace/project.rs +++ b/lsp-daemon/src/workspace/project.rs @@ -553,6 +553,7 @@ where "php" => Some("php"), "rb" => Some("ruby"), "swift" => Some("swift"), + "sol" => Some("solidity"), "kt" => Some("kotlin"), "cs" => Some("csharp"), "scala" => Some("scala"), diff --git a/lsp-daemon/src/workspace_resolver.rs b/lsp-daemon/src/workspace_resolver.rs index 4d0b195a..a9cbdde1 100644 --- a/lsp-daemon/src/workspace_resolver.rs +++ b/lsp-daemon/src/workspace_resolver.rs @@ -249,6 +249,7 @@ impl WorkspaceResolver { Language::Ruby => vec!["Gemfile", ".ruby-version"], Language::Php => vec!["composer.json", "composer.lock"], Language::Swift => vec!["Package.swift", "*.xcodeproj"], + Language::Solidity => vec!["foundry.toml", "hardhat.config.js", "hardhat.config.ts"], Language::Kotlin => vec!["build.gradle.kts", "build.gradle"], Language::Scala => vec!["build.sbt", "build.sc"], Language::Haskell => vec!["stack.yaml", "*.cabal", "cabal.project"], diff --git a/src/cli.rs b/src/cli.rs index e5f95741..ac57e953 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -168,6 +168,7 @@ pub enum Commands { "ruby", "rb", "php", "swift", + "solidity", "sol", "csharp", "cs", "yaml", "yml" ])] @@ -349,6 +350,7 @@ pub enum Commands { "ruby", "rb", "php", "swift", + "solidity", "sol", "csharp", "cs", "yaml", "yml" ])] diff --git a/src/debug_tree_sitter.rs b/src/debug_tree_sitter.rs index 2b551cb7..a92310d3 100644 --- a/src/debug_tree_sitter.rs +++ b/src/debug_tree_sitter.rs @@ -150,6 +150,7 @@ fn get_language_name(language: &TSLanguage) -> &str { _ if format!("{language:?}").contains("ruby") => "Ruby", _ if format!("{language:?}").contains("php") => "PHP", _ if format!("{language:?}").contains("swift") => "Swift", + _ if format!("{language:?}").contains("solidity") => "Solidity", _ if format!("{language:?}").contains("csharp") => "C#", _ => "Unknown", } diff --git a/src/extract/formatter.rs b/src/extract/formatter.rs index 7c096b5f..d1289857 100644 --- a/src/extract/formatter.rs +++ b/src/extract/formatter.rs @@ -1052,6 +1052,7 @@ pub fn get_language_from_extension(extension: &str) -> &'static str { "kt" | "kts" => "kotlin", "swift" => "swift", "cs" => "csharp", + "sol" => "solidity", "scala" => "scala", "dart" => "dart", "ex" | "exs" => "elixir", diff --git a/src/extract/symbols.rs b/src/extract/symbols.rs index 4b3f31a7..f3baf125 100644 --- a/src/extract/symbols.rs +++ b/src/extract/symbols.rs @@ -80,8 +80,12 @@ fn is_container_node(kind: &str) -> bool { | "interface_declaration" | "namespace_declaration" | "module_declaration" + | "contract_declaration" + | "library_declaration" | "enum_declaration" | "enum_item" + | "struct_declaration" + | "contract_body" | "declaration_list" | "class_body" | "block" @@ -185,6 +189,8 @@ fn collect_children_symbols( | "block" | "field_declaration_list" | "enum_body" + | "struct_body" + | "contract_body" | "object_type" | "interface_body" | "statement_block" @@ -228,6 +234,20 @@ fn extract_symbol_name(node: &Node, source: &[u8]) -> String { } } + if node.kind() == "constructor_definition" { + return "constructor".to_string(); + } + + if node.kind() == "fallback_receive_definition" { + if let Ok(text) = node.utf8_text(source) { + let trimmed = text.trim_start(); + if trimmed.starts_with("receive") { + return "receive".to_string(); + } + } + return "fallback".to_string(); + } + // For variable declarations/const, try to find the identifier let mut cursor = node.walk(); for child in node.children(&mut cursor) { @@ -262,16 +282,23 @@ fn normalize_kind(kind: &str) -> String { | "function_expression" | "arrow_function" => "function", "method_declaration" | "method_definition" => "method", - "struct_item" | "struct_type" => "struct", + "struct_item" | "struct_type" | "struct_declaration" => "struct", "impl_item" => "impl", "trait_item" => "trait", "enum_item" | "enum_declaration" => "enum", "mod_item" | "module_declaration" | "namespace_declaration" => "module", + "contract_declaration" => "contract", + "library_declaration" => "library", "class_declaration" | "class_definition" => "class", "interface_declaration" => "interface", "const_item" | "const_declaration" => "const", + "state_variable_declaration" => "variable", "static_item" => "static", - "type_item" | "type_alias_declaration" | "type_declaration" | "type_spec" => "type", + "type_item" + | "type_alias_declaration" + | "type_declaration" + | "type_spec" + | "user_defined_type_definition" => "type", "macro_definition" => "macro", "use_declaration" => "use", "variable_declarator" @@ -284,6 +311,11 @@ fn normalize_kind(kind: &str) -> String { "export_statement" => "export", "declare_statement" => "declare", "constructor_declaration" => "constructor", + "constructor_definition" => "constructor", + "modifier_definition" => "modifier", + "fallback_receive_definition" => "function", + "event_definition" => "event", + "error_declaration" => "error", "field_declaration" => "field", other => other, } diff --git a/src/language/factory.rs b/src/language/factory.rs index 57002fbf..58b60eaf 100644 --- a/src/language/factory.rs +++ b/src/language/factory.rs @@ -11,6 +11,7 @@ use probe_code::language::php::PhpLanguage; use probe_code::language::python::PythonLanguage; use probe_code::language::ruby::RubyLanguage; use probe_code::language::rust::RustLanguage; +use probe_code::language::solidity::SolidityLanguage; use probe_code::language::swift::SwiftLanguage; use probe_code::language::typescript::TypeScriptLanguage; use probe_code::language::yaml::YamlLanguage; @@ -31,6 +32,7 @@ pub fn get_language_impl(extension: &str) -> Option> { "php" => Some(Box::new(PhpLanguage::new())), "swift" => Some(Box::new(SwiftLanguage::new())), "cs" => Some(Box::new(CSharpLanguage::new())), + "sol" => Some(Box::new(SolidityLanguage::new())), "html" | "htm" => Some(Box::new(HtmlLanguage::new())), "md" | "markdown" => Some(Box::new(MarkdownLanguage::new())), "yaml" | "yml" => Some(Box::new(YamlLanguage::new())), diff --git a/src/language/mod.rs b/src/language/mod.rs index e3e89ae5..168dc973 100644 --- a/src/language/mod.rs +++ b/src/language/mod.rs @@ -24,6 +24,7 @@ pub mod php; pub mod python; pub mod ruby; pub mod rust; +pub mod solidity; pub mod swift; pub mod typescript; pub mod yaml; diff --git a/src/language/parser_pool.rs b/src/language/parser_pool.rs index a5867553..346563a0 100644 --- a/src/language/parser_pool.rs +++ b/src/language/parser_pool.rs @@ -44,7 +44,7 @@ lazy_static::lazy_static! { let critical_languages = ["rs", "js", "ts", "py", "go", "java"]; // Tier 2: Common languages - warm with lower priority - let common_languages = ["cpp", "c", "jsx", "tsx", "rb", "php", "cs"]; + let common_languages = ["cpp", "c", "jsx", "tsx", "rb", "php", "cs", "sol"]; // Tier 3: Specialized languages - warm last let specialized_languages = ["swift", "h", "cc", "cxx", "hpp", "hxx"]; @@ -137,7 +137,7 @@ pub fn smart_warm_parser_pool_for_directory(path: &Path) { let priority_order = [ "rs", "js", "ts", "py", "go", "java", // Tier 1: Critical "cpp", "c", "jsx", "tsx", "rb", "php", "cs", // Tier 2: Common - "swift", "h", "cc", "cxx", "hpp", "hxx", // Tier 3: Specialized + "swift", "sol", "h", "cc", "cxx", "hpp", "hxx", // Tier 3: Specialized ]; // Warm detected languages in priority order diff --git a/src/language/solidity.rs b/src/language/solidity.rs new file mode 100644 index 00000000..98167a4c --- /dev/null +++ b/src/language/solidity.rs @@ -0,0 +1,133 @@ +use super::language_trait::LanguageImpl; +use tree_sitter::{Language as TSLanguage, Node}; + +/// Implementation of LanguageImpl for Solidity. +pub struct SolidityLanguage; + +impl Default for SolidityLanguage { + fn default() -> Self { + Self::new() + } +} + +impl SolidityLanguage { + pub fn new() -> Self { + SolidityLanguage + } + + fn is_contract_like(kind: &str) -> bool { + matches!( + kind, + "contract_declaration" | "interface_declaration" | "library_declaration" + ) + } + + fn body_signature(node: &Node, source: &[u8], container: bool) -> Option { + let end = node + .child_by_field_name("body") + .map(|body| body.start_byte()) + .unwrap_or_else(|| node.end_byte()); + let sig = String::from_utf8_lossy(&source[node.start_byte()..end]) + .trim() + .trim_end_matches('{') + .trim() + .to_string(); + + if sig.is_empty() { + None + } else if container { + Some(format!("{sig} {{ ... }}")) + } else { + Some(sig) + } + } +} + +impl LanguageImpl for SolidityLanguage { + fn get_tree_sitter_language(&self) -> TSLanguage { + tree_sitter_solidity::LANGUAGE.into() + } + + fn get_extension(&self) -> &'static str { + "sol" + } + + fn is_acceptable_parent(&self, node: &Node) -> bool { + matches!( + node.kind(), + "contract_declaration" + | "interface_declaration" + | "library_declaration" + | "function_definition" + | "constructor_definition" + | "modifier_definition" + | "fallback_receive_definition" + | "struct_declaration" + | "enum_declaration" + | "event_definition" + | "error_declaration" + | "state_variable_declaration" + | "user_defined_type_definition" + ) + } + + fn is_symbol_node(&self, node: &Node) -> bool { + self.is_acceptable_parent(node) + } + + fn is_test_node(&self, node: &Node, source: &[u8]) -> bool { + match node.kind() { + "contract_declaration" => { + if let Some(name) = node.child_by_field_name("name") { + let name = name.utf8_text(source).unwrap_or(""); + return name.ends_with("Test") || name.ends_with("Tests"); + } + } + "function_definition" => { + if let Some(name) = node.child_by_field_name("name") { + let name = name.utf8_text(source).unwrap_or(""); + return name == "setUp" + || name.starts_with("test") + || name.starts_with("invariant_"); + } + } + _ => {} + } + + false + } + + fn find_parent_function<'a>(&self, node: Node<'a>) -> Option> { + let mut current = node; + while let Some(parent) = current.parent() { + if matches!( + parent.kind(), + "function_definition" | "constructor_definition" | "modifier_definition" + ) { + return Some(parent); + } + current = parent; + } + None + } + + fn get_symbol_signature(&self, node: &Node, source: &[u8]) -> Option { + match node.kind() { + kind if Self::is_contract_like(kind) => Self::body_signature(node, source, true), + "function_definition" + | "constructor_definition" + | "modifier_definition" + | "fallback_receive_definition" => Self::body_signature(node, source, false), + "struct_declaration" | "enum_declaration" => Self::body_signature(node, source, true), + "event_definition" + | "error_declaration" + | "state_variable_declaration" + | "user_defined_type_definition" => Some( + String::from_utf8_lossy(&source[node.start_byte()..node.end_byte()]) + .trim() + .to_string(), + ), + _ => None, + } + } +} diff --git a/src/language/test_detection.rs b/src/language/test_detection.rs index 1f76e80d..e1cebf23 100644 --- a/src/language/test_detection.rs +++ b/src/language/test_detection.rs @@ -102,6 +102,17 @@ pub fn is_test_file(path: &Path) -> bool { } return true; } + + // Solidity/Foundry: *.t.sol, *Test.sol, Test*.sol + if file_name.ends_with(".t.sol") + || file_name.ends_with("Test.sol") + || file_name.starts_with("Test") && file_name.ends_with(".sol") + { + if _debug_mode { + println!("DEBUG: Test file detected (Solidity pattern): {file_name}"); + } + return true; + } } // Check directory patterns diff --git a/src/language/tests.rs b/src/language/tests.rs index ec830d35..ef995732 100644 --- a/src/language/tests.rs +++ b/src/language/tests.rs @@ -13,6 +13,7 @@ extern crate tree_sitter_php; extern crate tree_sitter_python; extern crate tree_sitter_ruby; extern crate tree_sitter_rust; +extern crate tree_sitter_solidity; extern crate tree_sitter_swift; extern crate tree_sitter_typescript; @@ -31,6 +32,7 @@ fn get_language(extension: &str) -> Option { "rb" => Some(tree_sitter_ruby::LANGUAGE.into()), "swift" => Some(tree_sitter_swift::LANGUAGE.into()), "cs" => Some(tree_sitter_c_sharp::LANGUAGE.into()), + "sol" => Some(tree_sitter_solidity::LANGUAGE.into()), // It seems tree_sitter_php::LANGUAGE doesn't exist, so we'll return None for PHP "php" => None, _ => None, @@ -55,6 +57,7 @@ fn test_get_language() { assert!(get_language("rb").is_some()); // Ruby assert!(get_language("swift").is_some()); // Swift assert!(get_language("cs").is_some()); // C# + assert!(get_language("sol").is_some()); // Solidity assert!(get_language("php").is_none()); // PHP (not supported in current tree-sitter version) // Test unsupported language @@ -62,6 +65,21 @@ fn test_get_language() { assert!(get_language("").is_none()); } +#[test] +fn test_solidity_language_implementation() { + let solidity_impl = get_language_impl("sol"); + assert!( + solidity_impl.is_some(), + "Should be able to get Solidity language implementation" + ); + + let language = get_language("sol"); + assert!( + language.is_some(), + "Should be able to get Solidity tree-sitter language" + ); +} + #[test] fn test_is_acceptable_parent() { // This test directly checks if the Rust language implementation's is_acceptable_parent function diff --git a/src/lsp_integration/client.rs b/src/lsp_integration/client.rs index 2cc3d3a3..267dabd3 100644 --- a/src/lsp_integration/client.rs +++ b/src/lsp_integration/client.rs @@ -912,6 +912,7 @@ impl LspClient { "ruby" | "rb" => Some(Language::Ruby), "php" => Some(Language::Php), "swift" => Some(Language::Swift), + "solidity" | "sol" => Some(Language::Solidity), "kotlin" | "kt" => Some(Language::Kotlin), "scala" => Some(Language::Scala), "haskell" | "hs" => Some(Language::Haskell), diff --git a/src/lsp_integration/management.rs b/src/lsp_integration/management.rs index 51b7d7c4..1b4e11cb 100644 --- a/src/lsp_integration/management.rs +++ b/src/lsp_integration/management.rs @@ -5756,6 +5756,7 @@ impl LspManager { "php" => Ok(Language::Php), "ruby" | "rb" => Ok(Language::Ruby), "swift" => Ok(Language::Swift), + "solidity" | "sol" => Ok(Language::Solidity), "kotlin" | "kt" => Ok(Language::Kotlin), "scala" => Ok(Language::Scala), _ => Err(anyhow::anyhow!("Unsupported language: {}", lang_str)), diff --git a/src/lsp_integration/readiness.rs b/src/lsp_integration/readiness.rs index aff77551..93db9018 100644 --- a/src/lsp_integration/readiness.rs +++ b/src/lsp_integration/readiness.rs @@ -261,6 +261,7 @@ fn determine_language_from_path(file_path: &Path) -> Option { "rb" => "ruby", "php" => "php", "swift" => "swift", + "sol" => "solidity", _ => extension, // fallback to extension }; return Some(language_name.to_string()); diff --git a/src/main.rs b/src/main.rs index 2121a8d3..83c387af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -863,6 +863,7 @@ async fn main() -> Result<()> { "cc" | "cxx" | "hpp" | "hxx" => "cpp", "rb" => "ruby", "cs" => "csharp", + "sol" => "solidity", _ => lang, // Return the original language if no alias is found } }), diff --git a/src/query.rs b/src/query.rs index 09729497..56125068 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,4 +1,5 @@ use anyhow::{Context, Result}; +use ast_grep_core::language::{Language, TSLanguage}; use ast_grep_core::AstGrep; use ast_grep_language::SupportLang; use colored::*; @@ -35,21 +36,37 @@ pub struct QueryOptions<'a> { pub no_gitignore: bool, } +#[derive(Clone, Copy)] +enum ProbeQueryLang { + Builtin(SupportLang), + Solidity, +} + +impl Language for ProbeQueryLang { + fn get_ts_language(&self) -> TSLanguage { + match self { + ProbeQueryLang::Builtin(lang) => lang.get_ts_language(), + ProbeQueryLang::Solidity => tree_sitter_solidity::LANGUAGE.into(), + } + } +} + /// Convert a language string to the corresponding SupportLang -fn get_language(lang: &str) -> Option { +fn get_language(lang: &str) -> Option { match lang.to_lowercase().as_str() { - "rust" => Some(SupportLang::Rust), - "javascript" => Some(SupportLang::JavaScript), - "typescript" => Some(SupportLang::TypeScript), - "python" => Some(SupportLang::Python), - "go" => Some(SupportLang::Go), - "c" => Some(SupportLang::C), - "cpp" => Some(SupportLang::Cpp), - "java" => Some(SupportLang::Java), - "ruby" => Some(SupportLang::Ruby), - "php" => Some(SupportLang::Php), - "swift" => Some(SupportLang::Swift), - "csharp" => Some(SupportLang::CSharp), + "rust" => Some(ProbeQueryLang::Builtin(SupportLang::Rust)), + "javascript" => Some(ProbeQueryLang::Builtin(SupportLang::JavaScript)), + "typescript" => Some(ProbeQueryLang::Builtin(SupportLang::TypeScript)), + "python" => Some(ProbeQueryLang::Builtin(SupportLang::Python)), + "go" => Some(ProbeQueryLang::Builtin(SupportLang::Go)), + "c" => Some(ProbeQueryLang::Builtin(SupportLang::C)), + "cpp" => Some(ProbeQueryLang::Builtin(SupportLang::Cpp)), + "java" => Some(ProbeQueryLang::Builtin(SupportLang::Java)), + "ruby" => Some(ProbeQueryLang::Builtin(SupportLang::Ruby)), + "php" => Some(ProbeQueryLang::Builtin(SupportLang::Php)), + "swift" => Some(ProbeQueryLang::Builtin(SupportLang::Swift)), + "solidity" | "sol" => Some(ProbeQueryLang::Solidity), + "csharp" => Some(ProbeQueryLang::Builtin(SupportLang::CSharp)), _ => None, } } @@ -68,6 +85,7 @@ fn get_file_extension(lang: &str) -> Vec<&str> { "ruby" => vec![".rb"], "php" => vec![".php"], "swift" => vec![".swift"], + "solidity" | "sol" => vec![".sol"], "csharp" => vec![".cs"], _ => vec![], } @@ -130,18 +148,21 @@ fn query_file(file_path: &Path, options: &QueryOptions) -> Result> } else { // If language is not specified, try to infer from file extension let inferred_lang = match file_ext { - "rs" => Some(SupportLang::Rust), - "js" | "jsx" | "mjs" => Some(SupportLang::JavaScript), - "ts" | "tsx" => Some(SupportLang::TypeScript), - "py" => Some(SupportLang::Python), - "go" => Some(SupportLang::Go), - "c" | "h" => Some(SupportLang::C), - "cpp" | "hpp" | "cc" | "hh" | "cxx" | "hxx" => Some(SupportLang::Cpp), - "java" => Some(SupportLang::Java), - "rb" => Some(SupportLang::Ruby), - "php" => Some(SupportLang::Php), - "swift" => Some(SupportLang::Swift), - "cs" => Some(SupportLang::CSharp), + "rs" => Some(ProbeQueryLang::Builtin(SupportLang::Rust)), + "js" | "jsx" | "mjs" => Some(ProbeQueryLang::Builtin(SupportLang::JavaScript)), + "ts" | "tsx" => Some(ProbeQueryLang::Builtin(SupportLang::TypeScript)), + "py" => Some(ProbeQueryLang::Builtin(SupportLang::Python)), + "go" => Some(ProbeQueryLang::Builtin(SupportLang::Go)), + "c" | "h" => Some(ProbeQueryLang::Builtin(SupportLang::C)), + "cpp" | "hpp" | "cc" | "hh" | "cxx" | "hxx" => { + Some(ProbeQueryLang::Builtin(SupportLang::Cpp)) + } + "java" => Some(ProbeQueryLang::Builtin(SupportLang::Java)), + "rb" => Some(ProbeQueryLang::Builtin(SupportLang::Ruby)), + "php" => Some(ProbeQueryLang::Builtin(SupportLang::Php)), + "swift" => Some(ProbeQueryLang::Builtin(SupportLang::Swift)), + "sol" => Some(ProbeQueryLang::Solidity), + "cs" => Some(ProbeQueryLang::Builtin(SupportLang::CSharp)), _ => None, // Unsupported extension }; @@ -611,3 +632,46 @@ pub fn handle_query( Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + #[test] + fn test_solidity_query_support() { + let temp_dir = TempDir::new().unwrap(); + let file = temp_dir.path().join("Counter.sol"); + fs::write( + &file, + r#" +contract Counter { + uint256 private _value; + + function increment() public { + _value += 1; + } +} +"#, + ) + .unwrap(); + + let options = QueryOptions { + path: temp_dir.path(), + pattern: "function $NAME() public { $$$BODY }", + language: Some("solidity"), + ignore: &[], + allow_tests: true, + max_results: Some(10), + with_context: false, + format: "json", + no_gitignore: true, + }; + + let matches = perform_query(&options).expect("Solidity query should run"); + assert_eq!(matches.len(), 1); + assert_eq!(matches[0].file_path, file); + assert!(matches[0].matched_text.contains("function increment()")); + } +} diff --git a/src/search/file_list_cache.rs b/src/search/file_list_cache.rs index a17f481a..c793c6b9 100644 --- a/src/search/file_list_cache.rs +++ b/src/search/file_list_cache.rs @@ -506,6 +506,7 @@ fn get_language_extensions(language: &str) -> Vec { "ruby" => vec![".rb".to_string(), ".rake".to_string()], "php" => vec![".php".to_string()], "swift" => vec![".swift".to_string()], + "solidity" => vec![".sol".to_string()], "csharp" => vec![".cs".to_string()], "markdown" => vec![".md".to_string(), ".markdown".to_string()], "yaml" => vec![".yaml".to_string(), ".yml".to_string()], diff --git a/src/search/filters.rs b/src/search/filters.rs index 0e3e3bc7..8ac91ac2 100644 --- a/src/search/filters.rs +++ b/src/search/filters.rs @@ -473,6 +473,7 @@ fn normalize_language_name(lang: &str) -> String { "py" => "python".to_string(), "rb" => "ruby".to_string(), "cs" => "csharp".to_string(), + "sol" => "solidity".to_string(), "cpp" | "cc" | "cxx" => "cpp".to_string(), "h" | "hpp" | "hxx" => "c".to_string(), other => other.to_string(), @@ -529,6 +530,9 @@ fn get_extensions_for_type(file_type: &str) -> Option> { "swift" => { extensions.insert("swift".to_string()); } + "solidity" | "sol" => { + extensions.insert("sol".to_string()); + } "kotlin" => { extensions.insert("kt".to_string()); extensions.insert("kts".to_string()); diff --git a/src/search/results_formatter.rs b/src/search/results_formatter.rs index 135d9a30..b011c5a2 100644 --- a/src/search/results_formatter.rs +++ b/src/search/results_formatter.rs @@ -133,6 +133,7 @@ pub fn format_and_print_search_results(results: &[SearchResult], dry_run: bool) "java" => "java", "rb" => "ruby", "php" => "php", + "sol" => "solidity", "sh" => "bash", "md" => "markdown", "json" => "json", diff --git a/src/search/search_output.rs b/src/search/search_output.rs index d868f9df..30211d68 100644 --- a/src/search/search_output.rs +++ b/src/search/search_output.rs @@ -331,6 +331,7 @@ fn format_and_print_color_results( "java" => "java", "rb" => "ruby", "php" => "php", + "sol" => "solidity", "sh" => "bash", "md" => "markdown", "json" => "json", @@ -848,10 +849,10 @@ fn is_test_code_block(code: &str, node_type: &str) -> bool { return true; } - // JS/TS: test(', describe(', it(', expect( - if first_lines.contains("test(") - || first_lines.contains("describe(") - || first_lines.contains("it(") + // JS/TS: test(...), describe(...), it(...) + if contains_call_name(&first_lines, "test") + || contains_call_name(&first_lines, "describe") + || contains_call_name(&first_lines, "it") { return true; } @@ -869,6 +870,30 @@ fn is_test_code_block(code: &str, node_type: &str) -> bool { false } +fn contains_call_name(code: &str, name: &str) -> bool { + let bytes = code.as_bytes(); + let mut offset = 0; + + while let Some(relative) = code[offset..].find(name) { + let start = offset + relative; + let end = start + name.len(); + let before = start.checked_sub(1).map(|index| bytes[index]); + let after = bytes.get(end).copied(); + + let valid_before = before + .map(|byte| !(byte.is_ascii_alphanumeric() || byte == b'_' || byte == b'$')) + .unwrap_or(true); + + if valid_before && after == Some(b'(') { + return true; + } + + offset = end; + } + + false +} + /// Classify the structural scope of a search result block. /// /// Returns one of: "test", "example", "doc", "function", "declaration", "module", "file" @@ -2319,7 +2344,7 @@ fn get_comment_prefix(extension: &str) -> &'static str { match extension { // C-style comments "rs" | "c" | "h" | "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "java" | "js" | "jsx" | "ts" - | "tsx" | "cs" | "swift" | "go" | "php" => "//", + | "tsx" | "cs" | "swift" | "go" | "php" | "sol" => "//", // Python-style comments "py" | "rb" | "sh" | "bash" | "pl" | "r" | "yaml" | "yml" => "#", @@ -3421,6 +3446,10 @@ mod tests { "it('should work', () => {})", "call_expression" )); + assert!(!is_test_code_block( + "function quorumNumerator() public view returns (uint256) {\n return history.latest();\n}", + "function_definition" + )); } #[test] diff --git a/src/search/search_runner.rs b/src/search/search_runner.rs index afef2202..cb095827 100644 --- a/src/search/search_runner.rs +++ b/src/search/search_runner.rs @@ -1994,6 +1994,7 @@ fn normalize_language_alias(lang: &str) -> &str { "cc" | "cxx" | "hpp" | "hxx" => "cpp", "rb" => "ruby", "cs" => "csharp", + "sol" => "solidity", _ => lang, // Return the original language if no alias is found } } diff --git a/src/semantic_context.rs b/src/semantic_context.rs index 250a167b..d139d54c 100644 --- a/src/semantic_context.rs +++ b/src/semantic_context.rs @@ -96,6 +96,7 @@ pub fn language_name_for_path(path: &Path) -> Option<&'static str> { "rb" => Some("ruby"), "php" => Some("php"), "swift" => Some("swift"), + "sol" => Some("solidity"), "cs" => Some("csharp"), "html" | "htm" => Some("html"), "md" | "markdown" => Some("markdown"), diff --git a/tests/fixtures/solidity/project1/contracts/GovernorExample.sol b/tests/fixtures/solidity/project1/contracts/GovernorExample.sol new file mode 100644 index 00000000..d002e690 --- /dev/null +++ b/tests/fixtures/solidity/project1/contracts/GovernorExample.sol @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: MIT +pragma solidity ^0.8.24; + +interface IVotesLike { + function getVotes(address account) external view returns (uint256); +} + +library VoteMath { + function quorum(uint256 supply, uint256 numerator) internal pure returns (uint256) { + return (supply * numerator) / 100; + } +} + +contract GovernorExample { + enum ProposalState { + Pending, + Active, + Succeeded, + Defeated + } + + struct ProposalCore { + uint64 voteStart; + uint64 voteEnd; + bool executed; + } + + event ProposalCreated(uint256 indexed proposalId, address indexed proposer); + error GovernorUnexpectedProposalState(uint256 proposalId, ProposalState current); + + IVotesLike public immutable token; + mapping(uint256 => ProposalCore) private _proposals; + + modifier onlyActive(uint256 proposalId) { + if (state(proposalId) != ProposalState.Active) { + revert GovernorUnexpectedProposalState(proposalId, state(proposalId)); + } + _; + } + + constructor(IVotesLike tokenAddress) { + token = tokenAddress; + } + + function propose(address[] memory targets, bytes[] memory calldatas) public returns (uint256) { + uint256 proposalId = hashProposal(targets, calldatas); + _proposals[proposalId] = ProposalCore({ + voteStart: uint64(block.number + votingDelay()), + voteEnd: uint64(block.number + votingDelay() + votingPeriod()), + executed: false + }); + emit ProposalCreated(proposalId, msg.sender); + return proposalId; + } + + function castVote(uint256 proposalId, uint8 support) public onlyActive(proposalId) returns (uint256) { + return token.getVotes(msg.sender) + support; + } + + function state(uint256 proposalId) public view returns (ProposalState) { + ProposalCore storage proposal = _proposals[proposalId]; + if (proposal.executed) { + return ProposalState.Succeeded; + } + if (block.number < proposal.voteStart) { + return ProposalState.Pending; + } + if (block.number <= proposal.voteEnd) { + return ProposalState.Active; + } + return ProposalState.Defeated; + } + + function hashProposal(address[] memory targets, bytes[] memory calldatas) public pure returns (uint256) { + return uint256(keccak256(abi.encode(targets, calldatas))); + } + + function votingDelay() public pure returns (uint256) { + return 1; + } + + function votingPeriod() public pure returns (uint256) { + return 45818; + } +} diff --git a/tests/fixtures/solidity/project1/foundry.toml b/tests/fixtures/solidity/project1/foundry.toml new file mode 100644 index 00000000..1671c35a --- /dev/null +++ b/tests/fixtures/solidity/project1/foundry.toml @@ -0,0 +1,5 @@ +[profile.default] +src = "contracts" +test = "test" +libs = ["lib"] +solc_version = "0.8.24" diff --git a/tests/fixtures/solidity/project1/test/GovernorExample.t.sol b/tests/fixtures/solidity/project1/test/GovernorExample.t.sol new file mode 100644 index 00000000..51986221 --- /dev/null +++ b/tests/fixtures/solidity/project1/test/GovernorExample.t.sol @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +pragma solidity ^0.8.24; + +import "../contracts/GovernorExample.sol"; + +contract GovernorExampleTest { + GovernorExample private governor; + + function setUp() public { + governor = GovernorExample(address(0)); + } + + function testVotingDelay() public view { + require(governor.votingDelay() == 1); + } +} diff --git a/tests/solidity_language_tests.rs b/tests/solidity_language_tests.rs new file mode 100644 index 00000000..ffeea61f --- /dev/null +++ b/tests/solidity_language_tests.rs @@ -0,0 +1,141 @@ +use probe_code::extract::process_file_for_extraction; +use probe_code::extract::symbols::extract_symbols; +use probe_code::search::{perform_probe, SearchOptions}; +use std::path::PathBuf; + +fn fixture_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/solidity/project1") +} + +#[test] +fn test_solidity_symbols_extract_contract_members() { + let file = fixture_root().join("contracts/GovernorExample.sol"); + let symbols = extract_symbols(&file, false).expect("symbols should parse Solidity"); + + let top_names: Vec<_> = symbols + .symbols + .iter() + .map(|symbol| symbol.name.as_str()) + .collect(); + assert!( + top_names.contains(&"IVotesLike"), + "top symbols: {top_names:?}" + ); + assert!( + top_names.contains(&"VoteMath"), + "top symbols: {top_names:?}" + ); + assert!( + top_names.contains(&"GovernorExample"), + "top symbols: {top_names:?}" + ); + + let governor = symbols + .symbols + .iter() + .find(|symbol| symbol.name == "GovernorExample") + .expect("missing GovernorExample contract"); + let child_names: Vec<_> = governor + .children + .iter() + .map(|symbol| symbol.name.as_str()) + .collect(); + + assert!( + child_names.contains(&"ProposalState"), + "children: {child_names:?}" + ); + assert!( + child_names.contains(&"ProposalCore"), + "children: {child_names:?}" + ); + assert!( + child_names.contains(&"ProposalCreated"), + "children: {child_names:?}" + ); + assert!( + child_names.contains(&"GovernorUnexpectedProposalState"), + "children: {child_names:?}" + ); + assert!( + child_names.contains(&"onlyActive"), + "children: {child_names:?}" + ); + assert!( + child_names.contains(&"constructor"), + "children: {child_names:?}" + ); + assert!( + child_names.contains(&"propose"), + "children: {child_names:?}" + ); + assert!( + child_names.contains(&"castVote"), + "children: {child_names:?}" + ); +} + +#[test] +fn test_solidity_symbol_extraction_by_name() { + let file = fixture_root().join("contracts/GovernorExample.sol"); + let results = process_file_for_extraction( + &file, + None, + None, + Some("castVote"), + true, + 0, + None, + false, + false, + ) + .expect("extract should find Solidity function"); + + let code = &results.code; + assert!(code.contains("function castVote")); + assert!(code.contains("onlyActive(proposalId)")); + assert!( + !code.contains("function state("), + "should extract only castVote block" + ); +} + +#[test] +fn test_solidity_search_language_filter_and_test_exclusion() { + let root = fixture_root(); + let query = "votingDelay".to_string(); + let options = SearchOptions { + path: &root, + queries: &[query], + files_only: false, + custom_ignores: &[], + exclude_filenames: false, + reranker: "bm25", + frequency_search: true, + exact: false, + language: Some("solidity"), + max_results: Some(20), + max_bytes: None, + max_tokens: None, + allow_tests: false, + no_merge: false, + merge_threshold: None, + lsp: false, + dry_run: false, + session: None, + timeout: 30, + question: None, + no_gitignore: true, + }; + + let results = perform_probe(&options).expect("search should support Solidity language filter"); + assert!(!results.results.is_empty()); + assert!(results + .results + .iter() + .all(|result| result.file.ends_with("GovernorExample.sol"))); + assert!(results + .results + .iter() + .all(|result| !result.file.ends_with(".t.sol"))); +}