Add supported languages to MCP tool parameter description

sebthom · sebthom · commit 33694893210d · 2025-08-15T13:18:42.000+02:00
diff --git a/main.py b/main.py
@@ -5,6 +5,7 @@
 import sys
 from typing import Any, List, Literal, Optional
 
+import yaml
 from mcp.server.fastmcp import FastMCP
 from pydantic import Field
 
@@ -59,168 +60,172 @@ def parse_args_and_get_config():
 
 DumpFormat = Literal["pattern", "cst", "ast"]
 
-@mcp.tool()
-def dump_syntax_tree(
-    code: str = Field(description = "The code you need"),
-    language: str = Field(description = "The language of the code"),
-    format: DumpFormat = Field(description = "Code dump format. Available values: pattern, ast, cst", default = "cst"),
-) -> str:
-    """
-    Dump code's syntax structure or dump a query's pattern structure.
-    This is useful to discover correct syntax kind and syntax tree structure. Call it when debugging a rule.
-    The tool requires three arguments: code, language and format. The first two are self-explanatory.
-    `format` is the output format of the syntax tree.
-    use `format=cst` to inspect the code's concrete syntax tree structure, useful to debug target code.
-    use `format=pattern` to inspect how ast-grep interprets a pattern, useful to debug pattern rule.
-
-    Internally calls: ast-grep run --pattern <code> --lang <language> --debug-query=<format>
-    """
-    result = run_ast_grep("run", ["--pattern", code, "--lang", language, f"--debug-query={format}"])
-    return result.stderr.strip()  # type: ignore[no-any-return]
-
-@mcp.tool()
-def test_match_code_rule(
-    code: str = Field(description="The code to test against the rule"),
-    yaml: str = Field(description="The ast-grep YAML rule to search. It must have id, language, rule fields."),
-) -> List[dict[str, Any]]:
-    """
-    Test a code against an ast-grep YAML rule.
-    This is useful to test a rule before using it in a project.
-
-    Internally calls: ast-grep scan --inline-rules <yaml> --json --stdin
-    """
-    result = run_ast_grep("scan", ["--inline-rules", yaml, "--json", "--stdin"], input_text = code)
-    matches = json.loads(result.stdout.strip())
-    if not matches:
-        raise ValueError("No matches found for the given code and rule. Try adding `stopBy: end` to your inside/has rule.")
-    return matches  # type: ignore[no-any-return]
-
-@mcp.tool()
-def find_code(
-    project_folder: str = Field(description="The absolute path to the project folder. It must be absolute path."),
-    pattern: str = Field(description="The ast-grep pattern to search for. Note, the pattern must have valid AST structure."),
-    language: str = Field(description="The language of the query", default=""),
-    max_results: Optional[int] = Field(default=None, description="Maximum results to return"),
-    output_format: str = Field(default="text", description="'text' or 'json'"),
-) -> str | List[dict[str, Any]]:
-    """
-    Find code in a project folder that matches the given ast-grep pattern.
-    Pattern is good for simple and single-AST node result.
-    For more complex usage, please use YAML by `find_code_by_rule`.
-
-    Internally calls: ast-grep run --pattern <pattern> [--json] <project_folder>
-
-    Output formats:
-    - text (default): Compact text format with file:line-range headers and complete match text
-      Example:
-        Found 2 matches:
-
-        path/to/file.py:10-15
-        def example_function():
-            # function body
-            return result
-
-        path/to/file.py:20-22
-        def another_function():
-            pass
-
-    - json: Full match objects with metadata including ranges, meta-variables, etc.
-
-    The max_results parameter limits the number of complete matches returned (not individual lines).
-    When limited, the header shows "Found X matches (showing first Y of Z)".
-
-    Example usage:
-      find_code(pattern="class $NAME", max_results=20)  # Returns text format
-      find_code(pattern="class $NAME", output_format="json")  # Returns JSON with metadata
-    """
-    if output_format not in ["text", "json"]:
-        raise ValueError(f"Invalid output_format: {output_format}. Must be 'text' or 'json'.")
-
-    args = ["--pattern", pattern]
-    if language:
-        args.extend(["--lang", language])
-
-    # Always get JSON internally for accurate match limiting
-    result = run_ast_grep("run", args + ["--json", project_folder])
-    matches = json.loads(result.stdout.strip() or "[]")
-
-    # Apply max_results limit to complete matches
-    total_matches = len(matches)
-    if max_results is not None and total_matches > max_results:
-        matches = matches[:max_results]
-
-    if output_format == "text":
+def register_mcp_tools() -> None:
+
+    @mcp.tool()
+    def dump_syntax_tree(
+        code: str = Field(description = "The code you need"),
+        language: str = Field(description = f"The language of the code. Supported: {', '.join(get_supported_languages())}"),
+        format: DumpFormat = Field(description = "Code dump format. Available values: pattern, ast, cst", default = "cst"),
+    ) -> str:
+        """
+        Dump code's syntax structure or dump a query's pattern structure.
+        This is useful to discover correct syntax kind and syntax tree structure. Call it when debugging a rule.
+        The tool requires three arguments: code, language and format. The first two are self-explanatory.
+        `format` is the output format of the syntax tree.
+        use `format=cst` to inspect the code's concrete syntax tree structure, useful to debug target code.
+        use `format=pattern` to inspect how ast-grep interprets a pattern, useful to debug pattern rule.
+
+        Internally calls: ast-grep run --pattern <code> --lang <language> --debug-query=<format>
+        """
+        result = run_ast_grep("run", ["--pattern", code, "--lang", language, f"--debug-query={format}"])
+        return result.stderr.strip()  # type: ignore[no-any-return]
+
+    @mcp.tool()
+    def test_match_code_rule(
+        code: str = Field(description = "The code to test against the rule"),
+        yaml: str = Field(description = "The ast-grep YAML rule to search. It must have id, language, rule fields."),
+    ) -> List[dict[str, Any]]:
+        """
+        Test a code against an ast-grep YAML rule.
+        This is useful to test a rule before using it in a project.
+
+        Internally calls: ast-grep scan --inline-rules <yaml> --json --stdin
+        """
+        result = run_ast_grep("scan", ["--inline-rules", yaml, "--json", "--stdin"], input_text = code)
+        matches = json.loads(result.stdout.strip())
         if not matches:
-            return "No matches found"
-        text_output = format_matches_as_text(matches)
-        header = f"Found {len(matches)} matches"
-        if max_results is not None and total_matches > max_results:
-            header += f" (showing first {max_results} of {total_matches})"
-        return header + ":\n\n" + text_output
-    return matches  # type: ignore[no-any-return]
-
-@mcp.tool()
-def find_code_by_rule(
-    project_folder: str = Field(description="The absolute path to the project folder. It must be absolute path."),
-    yaml: str = Field(description="The ast-grep YAML rule to search. It must have id, language, rule fields."),
-    max_results: Optional[int] = Field(default=None, description="Maximum results to return"),
-    output_format: str = Field(default="text", description="'text' or 'json'"),
+            raise ValueError("No matches found for the given code and rule. Try adding `stopBy: end` to your inside/has rule.")
+        return matches  # type: ignore[no-any-return]
+
+    @mcp.tool()
+    def find_code(
+        project_folder: str = Field(description = "The absolute path to the project folder. It must be absolute path."),
+        pattern: str = Field(description = "The ast-grep pattern to search for. Note, the pattern must have valid AST structure."),
+        language: str = Field(description = f"The language of the code. Supported: {', '.join(get_supported_languages())}."
+                                           "If not specified, will be auto-detected based on file extensions.", default = ""),
+        max_results: Optional[int] = Field(default = None, description = "Maximum results to return"),
+        output_format: str = Field(default = "text", description = "'text' or 'json'"),
     ) -> str | List[dict[str, Any]]:
-    """
-    Find code using ast-grep's YAML rule in a project folder.
-    YAML rule is more powerful than simple pattern and can perform complex search like find AST inside/having another AST.
-    It is a more advanced search tool than the simple `find_code`.
+        """
+        Find code in a project folder that matches the given ast-grep pattern.
+        Pattern is good for simple and single-AST node result.
+        For more complex usage, please use YAML by `find_code_by_rule`.
 
-    Tip: When using relational rules (inside/has), add `stopBy: end` to ensure complete traversal.
+        Internally calls: ast-grep run --pattern <pattern> [--json] <project_folder>
 
-    Internally calls: ast-grep scan --inline-rules <yaml> [--json] <project_folder>
+        Output formats:
+        - text (default): Compact text format with file:line-range headers and complete match text
+          Example:
+            Found 2 matches:
 
-    Output formats:
-    - text (default): Compact text format with file:line-range headers and complete match text
-      Example:
-        Found 2 matches:
+            path/to/file.py:10-15
+            def example_function():
+                # function body
+                return result
 
-        src/models.py:45-52
-        class UserModel:
-            def __init__(self):
-                self.id = None
-                self.name = None
+            path/to/file.py:20-22
+            def another_function():
+                pass
 
-        src/views.py:12
-        class SimpleView: pass
+        - json: Full match objects with metadata including ranges, meta-variables, etc.
 
-    - json: Full match objects with metadata including ranges, meta-variables, etc.
+        The max_results parameter limits the number of complete matches returned (not individual lines).
+        When limited, the header shows "Found X matches (showing first Y of Z)".
 
-    The max_results parameter limits the number of complete matches returned (not individual lines).
-    When limited, the header shows "Found X matches (showing first Y of Z)".
+        Example usage:
+          find_code(pattern="class $NAME", max_results=20)  # Returns text format
+          find_code(pattern="class $NAME", output_format="json")  # Returns JSON with metadata
+        """
+        if output_format not in ["text", "json"]:
+            raise ValueError(f"Invalid output_format: {output_format}. Must be 'text' or 'json'.")
 
-    Example usage:
-      find_code_by_rule(yaml="id: x\\nlanguage: python\\nrule: {pattern: 'class $NAME'}", max_results=20)
-      find_code_by_rule(yaml="...", output_format="json")  # For full metadata
-    """
-    if output_format not in ["text", "json"]:
-        raise ValueError(f"Invalid output_format: {output_format}. Must be 'text' or 'json'.")
+        args = ["--pattern", pattern]
+        if language:
+            args.extend(["--lang", language])
 
-    args = ["--inline-rules", yaml]
+        # Always get JSON internally for accurate match limiting
+        result = run_ast_grep("run", args + ["--json", project_folder])
+        matches = json.loads(result.stdout.strip() or "[]")
 
-    # Always get JSON internally for accurate match limiting
-    result = run_ast_grep("scan", args + ["--json", project_folder])
-    matches = json.loads(result.stdout.strip() or "[]")
+        # Apply max_results limit to complete matches
+        total_matches = len(matches)
+        if max_results is not None and total_matches > max_results:
+            matches = matches[:max_results]
+
+        if output_format == "text":
+            if not matches:
+                return "No matches found"
+            text_output = format_matches_as_text(matches)
+            header = f"Found {len(matches)} matches"
+            if max_results is not None and total_matches > max_results:
+                header += f" (showing first {max_results} of {total_matches})"
+            return header + ":\n\n" + text_output
+        return matches  # type: ignore[no-any-return]
+
+    @mcp.tool()
+    def find_code_by_rule(
+        project_folder: str = Field(description = "The absolute path to the project folder. It must be absolute path."),
+        yaml: str = Field(description = "The ast-grep YAML rule to search. It must have id, language, rule fields."),
+        max_results: Optional[int] = Field(default = None, description = "Maximum results to return"),
+        output_format: str = Field(default = "text", description = "'text' or 'json'"),
+        ) -> str | List[dict[str, Any]]:
+        """
+        Find code using ast-grep's YAML rule in a project folder.
+        YAML rule is more powerful than simple pattern and can perform complex search like find AST inside/having another AST.
+        It is a more advanced search tool than the simple `find_code`.
+
+        Tip: When using relational rules (inside/has), add `stopBy: end` to ensure complete traversal.
+
+        Internally calls: ast-grep scan --inline-rules <yaml> [--json] <project_folder>
+
+        Output formats:
+        - text (default): Compact text format with file:line-range headers and complete match text
+          Example:
+            Found 2 matches:
+
+            src/models.py:45-52
+            class UserModel:
+                def __init__(self):
+                    self.id = None
+                    self.name = None
+
+            src/views.py:12
+            class SimpleView: pass
+
+        - json: Full match objects with metadata including ranges, meta-variables, etc.
+
+        The max_results parameter limits the number of complete matches returned (not individual lines).
+        When limited, the header shows "Found X matches (showing first Y of Z)".
+
+        Example usage:
+          find_code_by_rule(yaml="id: x\\nlanguage: python\\nrule: {pattern: 'class $NAME'}", max_results=20)
+          find_code_by_rule(yaml="...", output_format="json")  # For full metadata
+        """
+        if output_format not in ["text", "json"]:
+            raise ValueError(f"Invalid output_format: {output_format}. Must be 'text' or 'json'.")
+
+        args = ["--inline-rules", yaml]
+
+        # Always get JSON internally for accurate match limiting
+        result = run_ast_grep("scan", args + ["--json", project_folder])
+        matches = json.loads(result.stdout.strip() or "[]")
+
+        # Apply max_results limit to complete matches
+        total_matches = len(matches)
+        if max_results is not None and total_matches > max_results:
+            matches = matches[:max_results]
 
-    # Apply max_results limit to complete matches
-    total_matches = len(matches)
-    if max_results is not None and total_matches > max_results:
-        matches = matches[:max_results]
+        if output_format == "text":
+            if not matches:
+                return "No matches found"
+            text_output = format_matches_as_text(matches)
+            header = f"Found {len(matches)} matches"
+            if max_results is not None and total_matches > max_results:
+                header += f" (showing first {max_results} of {total_matches})"
+            return header + ":\n\n" + text_output
+        return matches  # type: ignore[no-any-return]
 
-    if output_format == "text":
-        if not matches:
-            return "No matches found"
-        text_output = format_matches_as_text(matches)
-        header = f"Found {len(matches)} matches"
-        if max_results is not None and total_matches > max_results:
-            header += f" (showing first {max_results} of {total_matches})"
-        return header + ":\n\n" + text_output
-    return matches  # type: ignore[no-any-return]
 
 def format_matches_as_text(matches: List[dict]) -> str:
     """Convert JSON matches to LLM-friendly text format.
@@ -248,6 +253,29 @@ def format_matches_as_text(matches: List[dict]) -> str:
 
     return '\n\n'.join(output_blocks)
 
+def get_supported_languages() -> List[str]:
+    """Get all supported languages as a field description string."""
+    languages = [  # https://ast-grep.github.io/reference/languages.html
+        "bash", "c", "cpp", "csharp", "css", "elixir", "go", "haskell",
+        "html", "java", "javascript", "json", "jsx", "kotlin", "lua",
+        "nix", "php", "python", "ruby", "rust", "scala", "solidity",
+        "swift", "tsx", "typescript", "yaml"
+    ]
+
+    # Check for custom languages in config file
+    # https://ast-grep.github.io/advanced/custom-language.html#register-language-in-sgconfig-yml
+    if CONFIG_PATH and os.path.exists(CONFIG_PATH):
+        try:
+            with open(CONFIG_PATH, 'r') as f:
+                config = yaml.safe_load(f)
+                if config and 'customLanguages' in config:
+                    custom_langs = list(config['customLanguages'].keys())
+                    languages += custom_langs
+        except Exception:
+            pass
+
+    return sorted(set(languages))
+
 def run_command(args: List[str], input_text: Optional[str] = None) -> subprocess.CompletedProcess:
     try:
         # On Windows, if ast-grep is installed via npm, it's a batch file
@@ -281,7 +309,8 @@ def run_mcp_server() -> None:
     Run the MCP server.
     This function is used to start the MCP server when this script is run directly.
     """
-    parse_args_and_get_config()
+    parse_args_and_get_config()  # sets CONFIG_PATH
+    register_mcp_tools()  # tools defined *after* CONFIG_PATH is known
     mcp.run(transport="stdio")
 
 if __name__ == "__main__":
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,6 +7,7 @@ requires-python = ">=3.13"
 dependencies = [
     "pydantic>=2.11.0",
     "mcp[cli]>=1.6.0",
+    "pyyaml>=6.0.2",
 ]
 
 [project.optional-dependencies]
@@ -16,6 +17,7 @@ dev = [
     "pytest-mock>=3.14.0",
     "ruff>=0.7.0",
     "mypy>=1.13.0",
+    "types-pyyaml>=6.0.12.20250809",
 ]
 
 [project.scripts]
@@ -55,3 +57,4 @@ warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = false
 ignore_missing_imports = true
+
diff --git a/uv.lock b/uv.lock