From cad6790fe5a22ed6e9d8c919c5da42230002c44f Mon Sep 17 00:00:00 2001 From: James Davy Date: Thu, 20 Nov 2025 10:07:14 +0000 Subject: [PATCH 1/2] feat: add a file discovery package --- pkg/ecosystems/discovery/README.md | 171 ++++++++++++ pkg/ecosystems/discovery/files.go | 373 +++++++++++++++++++++++++ pkg/ecosystems/discovery/files_test.go | 302 ++++++++++++++++++++ 3 files changed, 846 insertions(+) create mode 100644 pkg/ecosystems/discovery/README.md create mode 100644 pkg/ecosystems/discovery/files.go create mode 100644 pkg/ecosystems/discovery/files_test.go diff --git a/pkg/ecosystems/discovery/README.md b/pkg/ecosystems/discovery/README.md new file mode 100644 index 0000000..6d9fc26 --- /dev/null +++ b/pkg/ecosystems/discovery/README.md @@ -0,0 +1,171 @@ +# File Discovery Package + +Efficient file discovery utilities for finding manifest and configuration files in directory trees. + +## Features + +- **Multiple Target Files**: Find specific files by path +- **Multiple Glob Patterns**: Find files matching any of multiple patterns (e.g., `*.py`, `*.toml`) +- **Flexible Combination**: Use both target files and glob patterns together +- **Automatic Deduplication**: Returns unique results when multiple criteria match the same file +- **Exclude Patterns**: Skip directories and files using glob patterns +- **Context Support**: Cancellable operations for long-running searches +- **Efficient Traversal**: Uses `filepath.WalkDir` for optimal performance +- **Symlink Handling**: Optional symlink following +- **Structured Logging**: `slog` integration for debugging + +## Usage + +The package uses the functional options pattern for clean and idiomatic configuration. + +### Find a Specific File + +```go +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithTargetFile("requirements.txt")) +``` + +### Find Multiple Specific Files + +```go +// Multiple individual options +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithTargetFile("requirements.txt"), + discovery.WithTargetFile("setup.py"), + discovery.WithTargetFile("pyproject.toml")) + +// Or use variadic form +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithTargetFiles("requirements.txt", "setup.py", "pyproject.toml")) +``` + +### Find Files Matching Pattern + +```go +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithInclude("requirements*.txt")) +``` + +### Find Files Matching Multiple Patterns + +```go +// Multiple individual patterns +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithInclude("*.py"), + discovery.WithInclude("*.toml"), + discovery.WithInclude("*.yml")) + +// Or use variadic form +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithIncludes("*.py", "*.toml", "*.yml")) +``` + +### Combine Target Files and Globs + +```go +// Find specific files AND all files matching patterns +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithTargetFile("requirements.txt"), + discovery.WithInclude("*.py"), + discovery.WithInclude("*.toml")) +// Returns: requirements.txt + all .py files + all .toml files (deduplicated) +``` + +### Exclude Patterns + +```go +// Single exclude pattern +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithInclude("requirements.txt"), + discovery.WithExclude("node_modules")) // Excludes node_modules directory +``` + +### Multiple Exclude Patterns + +```go +// Multiple individual exclude patterns +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithInclude("*.py"), + discovery.WithExclude("node_modules"), + discovery.WithExclude(".*"), // Exclude hidden directories + discovery.WithExclude("__pycache__")) + +// Or use variadic form +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithInclude("*.py"), + discovery.WithExcludes("node_modules", ".*", "__pycache__")) +``` + +### Follow Symlinks + +```go +results, err := discovery.FindFiles(ctx, "/path/to/project", + discovery.WithInclude("*.txt"), + discovery.WithFollowSymlinks(true)) +``` + +### Common Exclude Patterns + +```go +// Exclude hidden directories +WithExclude(".*") + +// Exclude specific directory +WithExclude("node_modules") + +// Exclude file type +WithExclude("*.tmp") + +// Exclude multiple patterns at once +WithExcludes("node_modules", ".*", "__pycache__", "*.pyc") +``` + +## Performance + +- Uses `filepath.WalkDir` instead of `filepath.Walk` for better performance +- Skips entire directory trees when excluded +- Minimal allocations for large directory structures +- Context cancellation for early termination + +## Return Value + +Returns `[]FindResult` where each result contains: +- `Path`: Absolute path to the file +- `RelPath`: Relative path from the root directory + +## Error Handling + +- Returns error for invalid patterns or inaccessible root directory +- Logs warnings for inaccessible files/directories but continues walking +- Returns `context.Canceled` if operation is cancelled + +## Examples + +### Find Python Manifest Files + +```go +ctx := context.Background() + +// Find all Python manifest files +results, err := discovery.FindFiles(ctx, projectDir, + discovery.WithTargetFile("requirements.txt"), // Exact file + discovery.WithIncludes("requirements*.txt", "*.toml", "setup.py"), // Patterns + discovery.WithExcludes(".venv", "__pycache__", "*.pyc")) // Exclude virtual env and build artifacts + +if err != nil { + return err +} + +for _, result := range results { + fmt.Printf("Found: %s (at %s)\n", result.RelPath, result.Path) +} +``` + +### Find Configuration Files Across Ecosystem + +```go +// Find manifest files for multiple package managers +results, err := discovery.FindFiles(ctx, projectDir, + discovery.WithTargetFiles("package.json", "go.mod", "Gemfile", "pom.xml"), + discovery.WithIncludes("*.csproj", "*.gradle", "*.toml")) +``` diff --git a/pkg/ecosystems/discovery/files.go b/pkg/ecosystems/discovery/files.go new file mode 100644 index 0000000..b7d4ef4 --- /dev/null +++ b/pkg/ecosystems/discovery/files.go @@ -0,0 +1,373 @@ +package discovery + +import ( + "context" + "fmt" + "io/fs" + "log/slog" + "os" + "path/filepath" +) + +const ( + logKeyFile = "file" + logKeyPath = "path" + logKeyDir = "dir" + logKeyError = "error" + logKeyPattern = "pattern" +) + +// findOptions configures file discovery behavior. +type findOptions struct { + targetFiles []string + includeGlobs []string + excludeGlobs []string + followSymlinks bool +} + +// FindOption is a functional option for configuring file discovery. +type FindOption func(*findOptions) + +// WithTargetFile adds a specific file to find. +func WithTargetFile(file string) FindOption { + return func(o *findOptions) { + o.targetFiles = append(o.targetFiles, file) + } +} + +// WithTargetFiles adds multiple specific files to find. +func WithTargetFiles(files ...string) FindOption { + return func(o *findOptions) { + o.targetFiles = append(o.targetFiles, files...) + } +} + +// WithInclude adds a glob pattern for files to include (e.g., "requirements*.txt"). +func WithInclude(pattern string) FindOption { + return func(o *findOptions) { + o.includeGlobs = append(o.includeGlobs, pattern) + } +} + +// WithIncludes adds multiple glob patterns for files to include. +func WithIncludes(patterns ...string) FindOption { + return func(o *findOptions) { + o.includeGlobs = append(o.includeGlobs, patterns...) + } +} + +// WithExclude adds a glob pattern for files/directories to exclude (e.g., "node_modules"). +func WithExclude(pattern string) FindOption { + return func(o *findOptions) { + o.excludeGlobs = append(o.excludeGlobs, pattern) + } +} + +// WithExcludes adds multiple glob patterns for files/directories to exclude. +func WithExcludes(patterns ...string) FindOption { + return func(o *findOptions) { + o.excludeGlobs = append(o.excludeGlobs, patterns...) + } +} + +// WithFollowSymlinks enables or disables following symbolic links. +func WithFollowSymlinks(follow bool) FindOption { + return func(o *findOptions) { + o.followSymlinks = follow + } +} + +// FindResult represents a discovered file. +type FindResult struct { + Path string // Absolute path to the file + RelPath string // Path relative to the root directory +} + +// FindFiles discovers files in a directory based on the provided options. +// It efficiently traverses the directory tree and returns matching files. +// +// Finds all files specified in TargetFiles and all files matching any IncludeGlobs pattern. +// Exclude pattern filters out directories and files from both modes. +// Returns a deduplicated list of matching files. +// +// The search can be canceled via the context. +func FindFiles(ctx context.Context, rootDir string, options ...FindOption) ([]FindResult, error) { + // Apply options + opts := &findOptions{ + targetFiles: []string{}, + includeGlobs: []string{}, + excludeGlobs: []string{}, + } + for _, opt := range options { + opt(opts) + } + + if err := validateInputs(rootDir, opts); err != nil { + return nil, err + } + + absRoot, err := filepath.Abs(rootDir) + if err != nil { + return nil, fmt.Errorf("failed to resolve absolute path for %s: %w", rootDir, err) + } + + slog.Debug("Starting file discovery", + slog.String("root_dir", absRoot), + slog.Any("target_files", opts.targetFiles), + slog.Any("include_globs", opts.includeGlobs), + slog.Any("exclude_globs", opts.excludeGlobs)) + + // Use a map to deduplicate results by absolute path + resultMap := make(map[string]FindResult) + + // Find all target files + for _, targetFile := range opts.targetFiles { + result, err := findTargetFile(absRoot, targetFile, opts.excludeGlobs) + if err != nil { + return nil, err + } + // Only add if not excluded (empty result means excluded) + if result.Path != "" { + resultMap[result.Path] = result + } + } + + // Walk directory for pattern matching if any globs specified + if len(opts.includeGlobs) > 0 { + globResults, err := walkDirectory(ctx, absRoot, opts) + if err != nil { + return nil, err + } + for _, result := range globResults { + resultMap[result.Path] = result + } + } + + // Convert map to slice + results := make([]FindResult, 0, len(resultMap)) + for _, result := range resultMap { + results = append(results, result) + } + + slog.Info("File discovery completed", + slog.String("root_dir", absRoot), + slog.Int("files_found", len(results))) + + return results, nil +} + +// validateInputs checks that required parameters are provided. +func validateInputs(rootDir string, opts *findOptions) error { + if rootDir == "" { + return fmt.Errorf("rootDir cannot be empty") + } + if opts == nil { + return fmt.Errorf("opts cannot be nil") + } + if len(opts.targetFiles) == 0 && len(opts.includeGlobs) == 0 { + return fmt.Errorf("at least one target file or include pattern must be specified") + } + + // Validate include patterns + for _, pattern := range opts.includeGlobs { + if _, err := filepath.Match(pattern, "test"); err != nil { + return fmt.Errorf("invalid include pattern %s: %w", pattern, err) + } + } + + // Validate exclude patterns + for _, pattern := range opts.excludeGlobs { + if _, err := filepath.Match(pattern, "test"); err != nil { + return fmt.Errorf("invalid exclude pattern %s: %w", pattern, err) + } + } + + return nil +} + +// findTargetFile attempts to find a specific file by path. +// Returns an error if the file is not found or is a directory. +// Returns nil error with empty result if the file is excluded. +func findTargetFile(absRoot, targetFile string, excludePatterns []string) (FindResult, error) { + targetPath := filepath.Join(absRoot, targetFile) + + info, err := os.Stat(targetPath) + if err != nil { + return FindResult{}, fmt.Errorf("target file %s not found: %w", targetFile, err) + } + if info.IsDir() { + return FindResult{}, fmt.Errorf("target file %s is a directory", targetFile) + } + + // Check if excluded - return empty result but no error + if isExcluded(targetFile, targetFile, excludePatterns) { + slog.Debug("Target file excluded by pattern", slog.String(logKeyFile, targetFile)) + return FindResult{}, nil + } + + slog.Debug("Found target file", slog.String(logKeyFile, targetPath)) + return FindResult{ + Path: targetPath, + RelPath: targetFile, + }, nil +} + +// walkDirectory traverses the directory tree and finds files matching the include pattern. +func walkDirectory(ctx context.Context, absRoot string, opts *findOptions) ([]FindResult, error) { + // Pre-allocate with reasonable capacity to reduce allocations + results := make([]FindResult, 0, 16) + + err := filepath.WalkDir(absRoot, func(path string, d fs.DirEntry, err error) error { + // Check for cancellation + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + if err != nil { + slog.Warn("Error accessing path", slog.String(logKeyPath, path), slog.Any(logKeyError, err)) + return nil // Continue walking despite errors + } + + relPath, err := filepath.Rel(absRoot, path) + if err != nil { + slog.Warn("Failed to compute relative path", slog.String(logKeyPath, path), slog.Any(logKeyError, err)) + return nil + } + + // Handle directories + if d.IsDir() { + return handleDirectory(d, relPath, opts.excludeGlobs) + } + + // Handle symlinks + if d.Type()&fs.ModeSymlink != 0 && !shouldFollowSymlink(path, relPath, opts.followSymlinks) { + return nil + } + + // Check exclusions and pattern match for files + if shouldIncludeFile(d, relPath, opts) { + results = append(results, FindResult{ + Path: path, + RelPath: relPath, + }) + slog.Debug("Matched file", slog.String(logKeyFile, relPath)) + } + + return nil + }) + if err != nil { + return nil, fmt.Errorf("error walking directory %s: %w", absRoot, err) + } + + return results, nil +} + +// handleDirectory checks if a directory should be excluded and returns fs.SkipDir if so. +func handleDirectory(d fs.DirEntry, relPath string, excludePatterns []string) error { + if len(excludePatterns) == 0 { + return nil + } + + name := d.Name() + for _, pattern := range excludePatterns { + // Check relative path first (more specific) + matched, err := filepath.Match(pattern, relPath) + if err != nil { + slog.Warn("Invalid exclude pattern for directory", slog.String(logKeyPattern, pattern), slog.Any(logKeyError, err)) + continue + } + if matched { + slog.Debug("Excluding directory by path", slog.String(logKeyDir, relPath), slog.String(logKeyPattern, pattern)) + return fs.SkipDir + } + + // Check directory name (matches anywhere in tree) + matched, err = filepath.Match(pattern, name) + if err != nil { + slog.Warn("Invalid exclude pattern for directory", slog.String(logKeyPattern, pattern), slog.Any(logKeyError, err)) + continue + } + if matched { + slog.Debug("Excluding directory by name", slog.String(logKeyDir, name), slog.String(logKeyPattern, pattern)) + return fs.SkipDir + } + } + + return nil +} + +// shouldFollowSymlink determines if a symlink should be followed. +func shouldFollowSymlink(path, relPath string, followSymlinks bool) bool { + if !followSymlinks { + slog.Debug("Skipping symlink", slog.String(logKeyPath, relPath)) + return false + } + + // Resolve symlink and check if it's a file + targetInfo, err := os.Stat(path) + if err != nil || targetInfo.IsDir() { + return false + } + + return true +} + +// shouldIncludeFile determines if a file should be included in results. +// Returns true if the file matches any of the include globs and is not excluded. +func shouldIncludeFile(d fs.DirEntry, relPath string, opts *findOptions) bool { + name := d.Name() + + // Check exclusions first (most likely to filter out files) + if isExcluded(name, relPath, opts.excludeGlobs) { + slog.Debug("Excluding file", slog.String(logKeyFile, relPath)) + return false + } + + // Match against any include pattern (already validated in validateInputs) + for _, pattern := range opts.includeGlobs { + matched, err := filepath.Match(pattern, name) + if err != nil { + slog.Warn("Invalid include pattern for file", slog.String(logKeyPattern, pattern), slog.Any(logKeyError, err)) + continue + } + if matched { + return true + } + } + + return false +} + +// isExcluded checks if a file/directory matches any of the exclude patterns. +// Checks both the name (for matching anywhere in tree) and relPath (for specific paths). +func isExcluded(name, relPath string, excludePatterns []string) bool { + if len(excludePatterns) == 0 { + return false + } + + for _, pattern := range excludePatterns { + // Check by name + matched, err := filepath.Match(pattern, name) + if err != nil { + slog.Warn("Invalid exclude pattern", slog.String(logKeyPattern, pattern), slog.Any(logKeyError, err)) + continue + } + if matched { + return true + } + + // Check by relative path + matched, err = filepath.Match(pattern, relPath) + if err != nil { + slog.Warn("Invalid exclude pattern", slog.String(logKeyPattern, pattern), slog.Any(logKeyError, err)) + continue + } + if matched { + return true + } + } + + return false +} diff --git a/pkg/ecosystems/discovery/files_test.go b/pkg/ecosystems/discovery/files_test.go new file mode 100644 index 0000000..3825cc5 --- /dev/null +++ b/pkg/ecosystems/discovery/files_test.go @@ -0,0 +1,302 @@ +//go:build !integration +// +build !integration + +package discovery + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// setupFiles creates test files in the given directory +func setupFiles(t *testing.T, root string, files map[string]string) { + t.Helper() + for path, content := range files { + fullPath := filepath.Join(root, path) + require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0755)) + require.NoError(t, os.WriteFile(fullPath, []byte(content), 0644)) + } +} + +func TestFindFiles_TargetFile(t *testing.T) { + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "requirements.txt"), []byte("test"), 0644)) + + // Create subdirectory structure + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "subdir"), 0755)) + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "subdir", "sub-requirements.txt"), []byte("test"), 0644)) + + tests := []struct { + name string + targetFile string + exclude string + wantCount int + wantRelPath string + wantErr bool + }{ + {"finds existing target file", "requirements.txt", "", 1, "requirements.txt", false}, + {"errors when not found", "missing.txt", "", 0, "", true}, + {"excludes when pattern matches", "requirements.txt", "*.txt", 0, "", false}, + {"finds in subdirectory", "subdir/sub-requirements.txt", "", 1, "subdir/sub-requirements.txt", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + opts := []FindOption{WithTargetFile(tt.targetFile)} + if tt.exclude != "" { + opts = append(opts, WithExclude(tt.exclude)) + } + + results, err := FindFiles(context.Background(), tmpDir, opts...) + if tt.wantErr { + assert.Error(t, err) + assert.Contains(t, err.Error(), "not found") + return + } + require.NoError(t, err) + assert.Len(t, results, tt.wantCount) + + if tt.wantRelPath != "" && len(results) > 0 { + assert.Equal(t, tt.wantRelPath, results[0].RelPath) + } + }) + } +} + +func TestFindFiles_IncludeGlob(t *testing.T) { + tmpDir := t.TempDir() + setupFiles(t, tmpDir, map[string]string{ + "requirements.txt": "test", + "requirements-dev.txt": "test", + "setup.py": "test", + "subdir/requirements.txt": "test", + "subdir/other.txt": "test", + "subdir2/requirements.txt": "test", + }) + + tests := []struct { + name string + pattern string + wantCount int + }{ + {"finds all matching pattern", "requirements*.txt", 4}, + {"finds simple pattern", "*.py", 1}, + {"returns empty when no match", "*.yml", 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + results, err := FindFiles(context.Background(), tmpDir, WithInclude(tt.pattern)) + require.NoError(t, err) + assert.Len(t, results, tt.wantCount) + }) + } +} + +func TestFindFiles_ExcludePattern(t *testing.T) { + tmpDir := t.TempDir() + setupFiles(t, tmpDir, map[string]string{ + "requirements.txt": "test", + "node_modules/package.json": "test", + "node_modules/dep/requirements.txt": "test", + ".venv/requirements.txt": "test", + "subdir/requirements.txt": "test", + }) + + t.Run("excludes files by name", func(t *testing.T) { + results, err := FindFiles(context.Background(), tmpDir, + WithInclude("*.json"), + WithExclude("package.json")) + require.NoError(t, err) + assert.Empty(t, results) + }) + + t.Run("excludes directories by name", func(t *testing.T) { + results, err := FindFiles(context.Background(), tmpDir, + WithInclude("requirements.txt"), + WithExclude("node_modules")) + require.NoError(t, err) + assert.Len(t, results, 3) + for _, r := range results { + assert.NotContains(t, r.RelPath, "node_modules") + } + }) + + t.Run("excludes hidden directories", func(t *testing.T) { + results, err := FindFiles(context.Background(), tmpDir, + WithInclude("requirements.txt"), + WithExclude(".*")) + require.NoError(t, err) + for _, r := range results { + assert.NotContains(t, r.RelPath, ".venv") + } + }) + + t.Run("multiple exclude patterns", func(t *testing.T) { + results, err := FindFiles(context.Background(), tmpDir, + WithInclude("requirements.txt"), + WithExclude("node_modules"), + WithExclude(".*")) + require.NoError(t, err) + for _, r := range results { + assert.NotContains(t, r.RelPath, "node_modules") + assert.NotContains(t, r.RelPath, ".venv") + } + }) + + t.Run("WithExcludes variadic", func(t *testing.T) { + results, err := FindFiles(context.Background(), tmpDir, + WithInclude("requirements.txt"), + WithExcludes("node_modules", ".*")) + require.NoError(t, err) + for _, r := range results { + assert.NotContains(t, r.RelPath, "node_modules") + assert.NotContains(t, r.RelPath, ".venv") + } + }) +} + +func TestFindFiles_ContextCancellation(t *testing.T) { + tmpDir := t.TempDir() + // Create nested structure + for i := 0; i < 100; i++ { + subDir := filepath.Join(tmpDir, "dir", "nested", "path", "very", "deep") + require.NoError(t, os.MkdirAll(subDir, 0755)) + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel immediately + + _, err := FindFiles(ctx, tmpDir, WithInclude("*.txt")) + assert.Error(t, err) + assert.ErrorIs(t, err, context.Canceled) +} + +func TestFindFiles_ValidationErrors(t *testing.T) { + tmpDir := t.TempDir() + + tests := []struct { + name string + rootDir string + opts []FindOption + wantErrMsg string + }{ + {"empty root directory", "", []FindOption{WithInclude("*.txt")}, "rootDir cannot be empty"}, + {"no search criteria", tmpDir, []FindOption{}, "at least one target file or include pattern must be specified"}, + {"invalid include pattern", tmpDir, []FindOption{WithInclude("[invalid")}, "invalid include pattern"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := FindFiles(context.Background(), tt.rootDir, tt.opts...) + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErrMsg) + }) + } +} + +func TestFindFiles_MultipleTargetsAndGlobs(t *testing.T) { + tmpDir := t.TempDir() + setupFiles(t, tmpDir, map[string]string{ + "requirements.txt": "test", + "requirements-dev.txt": "test", + "setup.py": "test", + "pyproject.toml": "test", + "subdir/requirements.txt": "test", + "subdir/setup.py": "test", + }) + + tests := []struct { + name string + opts []FindOption + wantCount int + }{ + { + "multiple target files chained", + []FindOption{WithTargetFile("requirements.txt"), WithTargetFile("setup.py")}, + 2, + }, + { + "multiple target files variadic", + []FindOption{WithTargetFiles("requirements.txt", "setup.py", "pyproject.toml")}, + 3, + }, + { + "multiple include globs chained", + []FindOption{WithInclude("*.py"), WithInclude("*.toml")}, + 3, + }, + { + "multiple include globs variadic", + []FindOption{WithIncludes("*.py", "*.toml")}, + 3, + }, + { + "combine targets and globs", + []FindOption{WithTargetFile("requirements.txt"), WithInclude("*.py")}, + 3, + }, + { + "deduplicates overlapping results", + []FindOption{WithTargetFile("requirements.txt"), WithInclude("requirements*.txt")}, + 3, // requirements.txt (deduplicated), requirements-dev.txt, subdir/requirements.txt + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + results, err := FindFiles(context.Background(), tmpDir, tt.opts...) + require.NoError(t, err) + assert.Len(t, results, tt.wantCount) + }) + } + + // Test error case separately + t.Run("errors on missing target file", func(t *testing.T) { + _, err := FindFiles(context.Background(), tmpDir, + WithTargetFile("missing.txt"), + WithTargetFile("requirements.txt")) + assert.Error(t, err) + assert.Contains(t, err.Error(), "missing.txt") + assert.Contains(t, err.Error(), "not found") + }) +} + +func TestFindFiles_EdgeCases(t *testing.T) { + t.Run("empty directory", func(t *testing.T) { + tmpDir := t.TempDir() + results, err := FindFiles(context.Background(), tmpDir, WithInclude("*.txt")) + require.NoError(t, err) + assert.Empty(t, results) + }) + + t.Run("only directories no files", func(t *testing.T) { + tmpDir := t.TempDir() + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "subdir1"), 0755)) + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "subdir2"), 0755)) + + results, err := FindFiles(context.Background(), tmpDir, WithInclude("*.txt")) + require.NoError(t, err) + assert.Empty(t, results) + }) + + t.Run("handles relative paths", func(t *testing.T) { + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "test.txt"), []byte("test"), 0644)) + + wd, err := os.Getwd() + require.NoError(t, err) + relDir, err := filepath.Rel(wd, tmpDir) + require.NoError(t, err) + + results, err := FindFiles(context.Background(), relDir, WithInclude("*.txt")) + require.NoError(t, err) + assert.Len(t, results, 1) + assert.True(t, filepath.IsAbs(results[0].Path), "path should be absolute") + }) +} From 469fbba38d5f47ddd4d954bb52e1826b2dd071cc Mon Sep 17 00:00:00 2001 From: James Davy Date: Mon, 24 Nov 2025 12:12:27 +0000 Subject: [PATCH 2/2] remove symlink --- pkg/ecosystems/discovery/README.md | 9 -------- pkg/ecosystems/discovery/files.go | 35 +++--------------------------- 2 files changed, 3 insertions(+), 41 deletions(-) diff --git a/pkg/ecosystems/discovery/README.md b/pkg/ecosystems/discovery/README.md index 6d9fc26..1243860 100644 --- a/pkg/ecosystems/discovery/README.md +++ b/pkg/ecosystems/discovery/README.md @@ -11,7 +11,6 @@ Efficient file discovery utilities for finding manifest and configuration files - **Exclude Patterns**: Skip directories and files using glob patterns - **Context Support**: Cancellable operations for long-running searches - **Efficient Traversal**: Uses `filepath.WalkDir` for optimal performance -- **Symlink Handling**: Optional symlink following - **Structured Logging**: `slog` integration for debugging ## Usage @@ -96,14 +95,6 @@ results, err := discovery.FindFiles(ctx, "/path/to/project", discovery.WithExcludes("node_modules", ".*", "__pycache__")) ``` -### Follow Symlinks - -```go -results, err := discovery.FindFiles(ctx, "/path/to/project", - discovery.WithInclude("*.txt"), - discovery.WithFollowSymlinks(true)) -``` - ### Common Exclude Patterns ```go diff --git a/pkg/ecosystems/discovery/files.go b/pkg/ecosystems/discovery/files.go index b7d4ef4..517030d 100644 --- a/pkg/ecosystems/discovery/files.go +++ b/pkg/ecosystems/discovery/files.go @@ -19,10 +19,9 @@ const ( // findOptions configures file discovery behavior. type findOptions struct { - targetFiles []string - includeGlobs []string - excludeGlobs []string - followSymlinks bool + targetFiles []string + includeGlobs []string + excludeGlobs []string } // FindOption is a functional option for configuring file discovery. @@ -70,13 +69,6 @@ func WithExcludes(patterns ...string) FindOption { } } -// WithFollowSymlinks enables or disables following symbolic links. -func WithFollowSymlinks(follow bool) FindOption { - return func(o *findOptions) { - o.followSymlinks = follow - } -} - // FindResult represents a discovered file. type FindResult struct { Path string // Absolute path to the file @@ -241,11 +233,6 @@ func walkDirectory(ctx context.Context, absRoot string, opts *findOptions) ([]Fi return handleDirectory(d, relPath, opts.excludeGlobs) } - // Handle symlinks - if d.Type()&fs.ModeSymlink != 0 && !shouldFollowSymlink(path, relPath, opts.followSymlinks) { - return nil - } - // Check exclusions and pattern match for files if shouldIncludeFile(d, relPath, opts) { results = append(results, FindResult{ @@ -298,22 +285,6 @@ func handleDirectory(d fs.DirEntry, relPath string, excludePatterns []string) er return nil } -// shouldFollowSymlink determines if a symlink should be followed. -func shouldFollowSymlink(path, relPath string, followSymlinks bool) bool { - if !followSymlinks { - slog.Debug("Skipping symlink", slog.String(logKeyPath, relPath)) - return false - } - - // Resolve symlink and check if it's a file - targetInfo, err := os.Stat(path) - if err != nil || targetInfo.IsDir() { - return false - } - - return true -} - // shouldIncludeFile determines if a file should be included in results. // Returns true if the file matches any of the include globs and is not excluded. func shouldIncludeFile(d fs.DirEntry, relPath string, opts *findOptions) bool {