Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ tree-sitter-md = "0.3.2"
tree-sitter-yaml = "0.6.1"
tree-sitter-solidity = "=1.2.10"
tree-sitter-crystal = { git = "https://github.com/crystal-lang-tools/tree-sitter-crystal", rev = "f71f4ca62ac0" }
tree-sitter-haskell = "0.23.1"
ast-grep-core = "0.36.1"
ast-grep-language = "0.36.1"
clap = { version = "4.3", features = ["derive"] }
Expand Down
8 changes: 8 additions & 0 deletions docs/reference/supported-languages.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Probe provides language-aware code search and extraction for a wide range of pro
| Swift | `.swift` | ✅ | ✅ |
| Solidity | `.sol` | ✅ | ✅ |
| Crystal | `.cr` | ✅ | ✅ |
| Haskell | `.hs`, `.lhs` | ✅ | ✅ |
| C# | `.cs` | ✅ | ✅ |
| Markdown | `.md`, `.markdown` | ✅ | ✅ |
| YAML | `.yaml`, `.yml` | ✅ | ✅ |
Expand Down Expand Up @@ -146,6 +147,13 @@ Go also implements special handling for nested struct types.
- **Comment Handling**: Uses Crystal's `#` comments when formatting contextual output
- **Test Detection**: Identifies Crystal spec files such as `*_spec.cr` and common spec DSL blocks

### Haskell

- **Type/Class Extraction**: Extracts data types, newtypes, type synonyms, type families, type classes, and instances
- **Function Extraction**: Extracts top-level functions, bindings, signatures, foreign imports/exports, and pattern synonyms
- **Comment Handling**: Uses Haskell's `--` comments when formatting contextual output
- **Test Detection**: Identifies common Hspec/Tasty/QuickCheck files such as `*Spec.hs`, `*Test.hs`, and literate `.lhs` variants

### Markdown

- **Section Extraction**: Extracts complete sections based on headings
Expand Down
1 change: 1 addition & 0 deletions lsp-daemon/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ tree-sitter-swift = "0.7.0"
tree-sitter-php = "0.23.11"
tree-sitter-solidity = "=1.2.10"
tree-sitter-crystal = { git = "https://github.com/crystal-lang-tools/tree-sitter-crystal", rev = "f71f4ca62ac0" }
tree-sitter-haskell = "0.23.1"

[target.'cfg(unix)'.dependencies]
libc = "0.2"
Expand Down
2 changes: 1 addition & 1 deletion lsp-daemon/src/analyzer/language_analyzers/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ impl GenericAnalyzer {
"jl" => {
detected.insert("Julia");
}
"hs" => {
"hs" | "lhs" => {
detected.insert("Haskell");
}
"ml" => {
Expand Down
154 changes: 150 additions & 4 deletions lsp-daemon/src/analyzer/tree_sitter_analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"cs" => Some("csharp"),
"sol" => Some("solidity"),
"cr" => Some("crystal"),
"hs" | "lhs" => Some("haskell"),
_ => None,
}
}
Expand Down Expand Up @@ -102,6 +103,7 @@
"cpp" | "c++" | "cxx" => Some(tree_sitter_cpp::LANGUAGE),
"solidity" | "sol" => Some(tree_sitter_solidity::LANGUAGE),
"crystal" | "cr" => Some(tree_sitter_crystal::LANGUAGE),
"haskell" | "hs" | "lhs" => Some(tree_sitter_haskell::LANGUAGE),

Check warning on line 106 in lsp-daemon/src/analyzer/tree_sitter_analyzer.rs

View check run for this annotation

probelabs / Visor: architecture

architecture Issue

The extension_to_language_name() function duplicates the mapping logic that exists in the core language factory. This creates two sources of truth for language mappings, which can lead to inconsistencies.
Raw output
Consider centralizing this mapping or having the LSP daemon use the core language factory's mapping logic to avoid duplication and maintenance burden.
_ => None,
};

Expand Down Expand Up @@ -450,6 +452,7 @@
"java" => self.map_java_node_to_symbol(node_kind),
"c" | "cpp" | "c++" => self.map_c_node_to_symbol(node_kind),
"crystal" | "cr" => self.map_crystal_node_to_symbol(node_kind),
"haskell" | "hs" | "lhs" => self.map_haskell_node_to_symbol(node_kind),
_ => self.map_generic_node_to_symbol(node_kind),
};

Expand Down Expand Up @@ -592,6 +595,21 @@
}
}

/// Map Haskell node kinds to symbol kinds
fn map_haskell_node_to_symbol(&self, node_kind: &str) -> Option<SymbolKind> {
match node_kind {
"function" | "bind" | "foreign_import" | "foreign_export" => Some(SymbolKind::Function),
"signature" | "default_signature" => Some(SymbolKind::Function),
"data_type" | "newtype" => Some(SymbolKind::Type),
"type_synomym" | "type_family" | "type_instance" | "data_family" | "data_instance"
| "kind_signature" => Some(SymbolKind::Type),
"class" => Some(SymbolKind::Class),
"instance" => Some(SymbolKind::TraitImpl),
"pattern_synonym" => Some(SymbolKind::Constant),
_ => None,
}
}

/// Generic node mapping for unknown languages
fn map_generic_node_to_symbol(&self, node_kind: &str) -> Option<SymbolKind> {
if node_kind.contains("function") {
Expand Down Expand Up @@ -635,6 +653,12 @@
| "module_name"
| "parameter_name"
| "constant"
| "name"
| "variable"
| "constructor"
| "module_id"
| "field_name"
| "prefix_id"
) {
let start_byte = child.start_byte();
let end_byte = child.end_byte();
Expand All @@ -656,9 +680,9 @@
if let Ok(nested_name) = self.extract_symbol_name(child, content) {
if !nested_name.is_empty()
&& !self.is_keyword_or_invalid(&nested_name)
&& nested_name
.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '?' || c == '!')
&& nested_name.chars().all(|c| {
c.is_alphanumeric() || c == '_' || c == '?' || c == '!' || c == '\''
})
{
return Ok(nested_name);
}
Expand Down Expand Up @@ -706,7 +730,11 @@
.next()
.map_or(false, |c| c.is_alphabetic() || c == '_')
&& word.chars().all(|c| {
c.is_alphanumeric() || c == '_' || c == '?' || c == '!'
c.is_alphanumeric()
|| c == '_'
|| c == '?'
|| c == '!'
|| c == '\''
})
})
.unwrap_or("")
Expand Down Expand Up @@ -765,6 +793,11 @@
| "override"
| "virtual"
| "abstract"
| "data"
| "newtype"
| "type"
| "where"
| "instance"
) || text.is_empty()
}

Expand Down Expand Up @@ -884,6 +917,23 @@
| "macro_def"
| "fun_def"
),
"haskell" | "hs" | "lhs" => matches!(
node_kind,
"module"
| "class"
| "instance"
| "class_declarations"
| "instance_declarations"
| "function"
| "bind"
| "signature"
| "data_type"
| "newtype"
| "type_synomym"
| "type_family"
| "data_family"
| "pattern_synonym"
),
_ => false,
}
}
Expand Down Expand Up @@ -1029,6 +1079,7 @@
"c".to_string(),
"cpp".to_string(),
"crystal".to_string(),
"haskell".to_string(),
]
}

Expand Down Expand Up @@ -1268,6 +1319,101 @@
);
}

#[test]
fn test_haskell_parser_pool_and_node_mapping() {
let analyzer = create_test_analyzer();
let mut pool = ParserPool::new();

assert!(
pool.get_parser("haskell").is_some(),
"Haskell parser should be available by language name"
);
assert!(
pool.get_parser("hs").is_some(),
"Haskell parser should be available by extension alias"
);
assert!(
pool.get_parser("lhs").is_some(),
"Literate Haskell parser should be available by extension alias"
);
assert_eq!(
analyzer.map_haskell_node_to_symbol("function"),
Some(SymbolKind::Function)
);
assert_eq!(
analyzer.map_haskell_node_to_symbol("data_type"),
Some(SymbolKind::Type)
);
assert_eq!(
analyzer.map_haskell_node_to_symbol("class"),
Some(SymbolKind::Class)
);
assert_eq!(
analyzer.map_haskell_node_to_symbol("instance"),
Some(SymbolKind::TraitImpl)
);
assert!(analyzer.creates_scope("class", "haskell"));
assert!(analyzer.creates_scope("function", "hs"));
}

#[tokio::test]
async fn test_haskell_symbol_extraction_uses_parser_pool() {
let analyzer = create_test_analyzer();
let uid_generator = Arc::new(SymbolUIDGenerator::new());
let context = AnalysisContext::new(
1,
2,
"haskell".to_string(),
PathBuf::from("."),
PathBuf::from("Sample.hs"),
uid_generator,
);
let haskell_code = r#"
module Demo.Sample where

data User = User { userName :: String }

class Serializable a where
serialize :: a -> String

active :: User -> Bool
active user = True
"#;

let result = analyzer
.analyze_file(haskell_code, Path::new("Sample.hs"), "haskell", &context)
.await
.expect("Haskell analysis should use the parser pool");

let symbols = result
.symbols
.iter()
.map(|symbol| format!("{}:{:?}", symbol.name, symbol.kind))
.collect::<Vec<_>>();

assert!(
result
.symbols
.iter()
.any(|symbol| symbol.name == "User" && symbol.kind == SymbolKind::Type),
"expected User type in symbols: {symbols:?}"
);
assert!(
result
.symbols
.iter()
.any(|symbol| symbol.name == "Serializable" && symbol.kind == SymbolKind::Class),
"expected Serializable class in symbols: {symbols:?}"
);
assert!(
result
.symbols
.iter()
.any(|symbol| symbol.name == "active" && symbol.kind == SymbolKind::Function),
"expected active function in symbols: {symbols:?}"
);
}

#[test]
fn test_function_signature_cleaning() {
let analyzer = create_test_analyzer();
Expand Down
17 changes: 16 additions & 1 deletion lsp-daemon/src/daemon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7229,6 +7229,12 @@ impl LspDaemon {
.ok()?;
Some(())
}
"hs" | "lhs" => {
parser
.set_language(&tree_sitter_haskell::LANGUAGE.into())
.ok()?;
Some(())
}
_ => None,
}?;

Expand Down Expand Up @@ -7281,6 +7287,10 @@ impl LspDaemon {
| "interface_type" => true,
// Java
"constructor_declaration" | "enum_declaration" => true,
// Haskell
"function" | "bind" | "signature" | "default_signature" | "data_type" | "newtype"
| "class" | "instance" | "type_synomym" | "type_family" | "data_family"
| "foreign_import" | "foreign_export" | "pattern_synonym" => true,
_ => false,
};

Expand Down Expand Up @@ -7327,7 +7337,12 @@ impl LspDaemon {
| "field_identifier"
| "type_identifier"
| "property_identifier"
| "function_declarator" => {
| "function_declarator"
| "name"
| "variable"
| "constructor"
| "module_id"
| "field_name" => {
let name = child.utf8_text(content).unwrap_or("");
if !name.is_empty() {
return Some(name.to_string());
Expand Down
23 changes: 22 additions & 1 deletion lsp-daemon/src/fqn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub fn get_fqn_from_ast_with_content(
"cpp" | "cc" | "cxx" => Some(tree_sitter_cpp::LANGUAGE),
"sol" => Some(tree_sitter_solidity::LANGUAGE),
"cr" => Some(tree_sitter_crystal::LANGUAGE),
"hs" | "lhs" => Some(tree_sitter_haskell::LANGUAGE),
_ => None,
};

Expand Down Expand Up @@ -105,6 +106,7 @@ fn language_to_extension(language: &str) -> Option<&'static str> {
"cpp" | "c++" | "cxx" => Some("cpp"),
"solidity" | "sol" => Some("sol"),
"crystal" | "cr" => Some("cr"),
"haskell" | "hs" | "lhs" => Some("hs"),
_ => None,
}
}
Expand Down Expand Up @@ -364,7 +366,7 @@ fn find_declaration_in_descendants<'a>(
fn get_language_separator(extension: &str) -> &str {
match extension {
"rs" | "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "rb" => "::",
"py" | "js" | "ts" | "jsx" | "tsx" | "java" | "go" | "cs" | "sol" => ".",
"py" | "js" | "ts" | "jsx" | "tsx" | "java" | "go" | "cs" | "sol" | "hs" | "lhs" => ".",
"cr" => "::",
"php" => "\\",
_ => "::", // Default to Rust-style for unknown languages
Expand Down Expand Up @@ -397,6 +399,15 @@ fn is_method_node(node: &tree_sitter::Node, extension: &str) -> bool {
kind,
"method_def" | "abstract_method_def" | "macro_def" | "fun_def"
),
"hs" | "lhs" => matches!(
kind,
"function"
| "bind"
| "signature"
| "default_signature"
| "foreign_import"
| "foreign_export"
),
_ => kind.contains("function") || kind.contains("method"),
}
}
Expand Down Expand Up @@ -431,6 +442,16 @@ fn is_namespace_node(node: &tree_sitter::Node, extension: &str) -> bool {
kind,
"class_def" | "module_def" | "struct_def" | "enum_def" | "lib_def" | "union_def"
),
"hs" | "lhs" => matches!(
kind,
"class"
| "instance"
| "data_type"
| "newtype"
| "type_synomym"
| "type_family"
| "data_family"
),
_ => {
// Fallback for unknown languages: try to detect common node types
kind.contains("class") || kind.contains("struct") || kind.contains("namespace")
Expand Down
11 changes: 11 additions & 0 deletions lsp-daemon/src/indexing/ast_extractor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ impl AstSymbolExtractor {
Ok(tree_sitter_solidity::LANGUAGE.into())
}
crate::language_detector::Language::Crystal => Ok(tree_sitter_crystal::LANGUAGE.into()),
crate::language_detector::Language::Haskell => Ok(tree_sitter_haskell::LANGUAGE.into()),
_ => Err(anyhow::anyhow!("Unsupported language: {:?}", language)),
}
}
Expand Down Expand Up @@ -864,6 +865,16 @@ impl AstSymbolExtractor {
"alias" | "annotation_def" | "type_def" | "union_def" => (SymbolKind::Type, true),
_ => (SymbolKind::Function, false),
},
crate::language_detector::Language::Haskell => match node_kind {
"function" | "bind" | "signature" | "default_signature" | "foreign_import"
| "foreign_export" => (SymbolKind::Function, true),
"data_type" | "newtype" | "type_synomym" | "type_family" | "type_instance"
| "data_family" | "data_instance" | "kind_signature" => (SymbolKind::Type, true),
"class" => (SymbolKind::Class, true),
"instance" => (SymbolKind::TraitImpl, true),
"pattern_synonym" => (SymbolKind::Constant, true),
_ => (SymbolKind::Function, false),
},
_ => {
// For other languages, try some common patterns
match node_kind {
Expand Down
Loading
Loading