Skip to content

Commit c6dc19b

Browse files
authored
Switch to a higher file-limit approach. (#1194)
Apparently CompareCommits "only" returns up to 300 files and 250 commits, so this is an unreliably way of enumerating changes. The new approach uses `GetTree` which returns all of the files and their SHAs with a limit of 100k files, so we can use this to determine file changes with only a couple API calls. Signed-off-by: Matt Moore <[email protected]>
1 parent 831fa0b commit c6dc19b

File tree

1 file changed

+50
-7
lines changed
  • modules/github-path-reconciler/cmd/push

1 file changed

+50
-7
lines changed

modules/github-path-reconciler/cmd/push/main.go

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,17 +149,60 @@ func (h *pushHandler) handlePushEvent(ctx context.Context, event cloudevents.Eve
149149
return fmt.Errorf("failed to get GitHub client: %w", err)
150150
}
151151

152-
// Use the GitHub API to compare commits to get all changed files
153-
// This handles all merge strategies correctly (merge commits, squash, rebase)
154-
comparison, _, err := ghClient.Repositories.CompareCommits(ctx, owner, repo, before, after, &github.ListOptions{})
152+
// Use Git Tree comparison to get all changed files
153+
// CompareCommits is limited to 300 files, but GetTree has no such limit
154+
// Get the commit objects to access their tree SHAs
155+
beforeCommit, _, err := ghClient.Git.GetCommit(ctx, owner, repo, before)
155156
if err != nil {
156-
return fmt.Errorf("failed to compare commits: %w", err)
157+
return fmt.Errorf("failed to get before commit: %w", err)
157158
}
158159

159-
// Collect all changed files from the comparison
160+
afterCommit, _, err := ghClient.Git.GetCommit(ctx, owner, repo, after)
161+
if err != nil {
162+
return fmt.Errorf("failed to get after commit: %w", err)
163+
}
164+
165+
// Get recursive trees for both commits
166+
beforeTree, _, err := ghClient.Git.GetTree(ctx, owner, repo, beforeCommit.Tree.GetSHA(), true)
167+
if err != nil {
168+
return fmt.Errorf("failed to get before tree: %w", err)
169+
}
170+
171+
afterTree, _, err := ghClient.Git.GetTree(ctx, owner, repo, afterCommit.Tree.GetSHA(), true)
172+
if err != nil {
173+
return fmt.Errorf("failed to get after tree: %w", err)
174+
}
175+
176+
// Build maps of file paths to their blob SHAs
177+
beforeFiles := make(map[string]string) // path -> SHA
178+
for _, entry := range beforeTree.Entries {
179+
if entry.GetType() == "blob" {
180+
beforeFiles[entry.GetPath()] = entry.GetSHA()
181+
}
182+
}
183+
184+
afterFiles := make(map[string]string) // path -> SHA
185+
for _, entry := range afterTree.Entries {
186+
if entry.GetType() == "blob" {
187+
afterFiles[entry.GetPath()] = entry.GetSHA()
188+
}
189+
}
190+
191+
// Find all changed files (added, modified, or deleted)
160192
changedFiles := make(map[string]struct{})
161-
for _, file := range comparison.Files {
162-
changedFiles[file.GetFilename()] = struct{}{}
193+
194+
// Find added or modified files
195+
for path, afterSHA := range afterFiles {
196+
if beforeSHA, exists := beforeFiles[path]; !exists || beforeSHA != afterSHA {
197+
changedFiles[path] = struct{}{}
198+
}
199+
}
200+
201+
// Find deleted files
202+
for path := range beforeFiles {
203+
if _, exists := afterFiles[path]; !exists {
204+
changedFiles[path] = struct{}{}
205+
}
163206
}
164207

165208
log.Infof("Processing %d changed files", len(changedFiles))

0 commit comments

Comments
 (0)