Skip to content

Commit 4ad7544

Browse files
authored
Implement unused import warning, and lowering of source code info extensions (#566)
This PR adds two related features: 1. An unused imports warning, which requires additional book-keeping to track which direct import caused a transitive import to get imported. 2. Codegen for the Buf source code info extension, which includes unused imports and whether a `syntax` production exists in the file. In the process of implementing this, I found a bug in how imports are built, which this PR fixes. It showed up because import causes weren't being listed correctly, leading to incorrect diagnosis of unused imports. I also fixed a bug in `yaml.go` where extensions we're being printed in the output.
1 parent cb175e9 commit 4ad7544

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1009
-79
lines changed

experimental/ir/fdp.go

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package ir
1717
import (
1818
"slices"
1919

20+
descriptorv1 "buf.build/gen/go/bufbuild/protodescriptor/protocolbuffers/go/buf/descriptor/v1"
2021
"google.golang.org/protobuf/proto"
2122
"google.golang.org/protobuf/types/descriptorpb"
2223

@@ -25,6 +26,7 @@ import (
2526
"github.com/bufbuild/protocompile/experimental/ir/presence"
2627
"github.com/bufbuild/protocompile/experimental/seq"
2728
"github.com/bufbuild/protocompile/internal/ext/cmpx"
29+
"github.com/bufbuild/protocompile/internal/ext/iterx"
2830
"github.com/bufbuild/protocompile/internal/ext/slicesx"
2931
)
3032

@@ -66,8 +68,20 @@ func DescriptorProtoBytes(file File, options ...DescriptorOption) ([]byte, error
6668
// DescriptorOption is an option to pass to [DescriptorSetBytes] or [DescriptorProtoBytes].
6769
type DescriptorOption func(*descGenerator)
6870

71+
// IncludeDebugInfo sets whether or not to include google.protobuf.SourceCodeInfo in
72+
// the output.
73+
func IncludeSourceCodeInfo(flag bool) DescriptorOption {
74+
return func(dg *descGenerator) {
75+
dg.includeDebugInfo = flag
76+
}
77+
}
78+
6979
type descGenerator struct {
70-
currentFile File
80+
currentFile File
81+
includeDebugInfo bool
82+
83+
sourceCodeInfo *descriptorpb.SourceCodeInfo
84+
sourceCodeInfoExtn *descriptorv1.SourceCodeInfoExtension
7185
}
7286

7387
func (dg *descGenerator) files(files []File, fds *descriptorpb.FileDescriptorSet) {
@@ -77,12 +91,20 @@ func (dg *descGenerator) files(files []File, fds *descriptorpb.FileDescriptorSet
7791
for file := range topoSort(files) {
7892
fdp := new(descriptorpb.FileDescriptorProto)
7993
fds.File = append(fds.File, fdp)
94+
8095
dg.file(file, fdp)
8196
}
8297
}
8398

8499
func (dg *descGenerator) file(file File, fdp *descriptorpb.FileDescriptorProto) {
85100
dg.currentFile = file
101+
if dg.includeDebugInfo {
102+
dg.sourceCodeInfo = new(descriptorpb.SourceCodeInfo)
103+
fdp.SourceCodeInfo = dg.sourceCodeInfo
104+
105+
dg.sourceCodeInfoExtn = new(descriptorv1.SourceCodeInfoExtension)
106+
proto.SetExtension(dg.sourceCodeInfo, descriptorv1.E_BufSourceCodeInfoExtension, dg.sourceCodeInfoExtn)
107+
}
86108

87109
fdp.Name = addr(file.Path())
88110
fdp.Package = addr(string(file.Package()))
@@ -94,12 +116,16 @@ func (dg *descGenerator) file(file File, fdp *descriptorpb.FileDescriptorProto)
94116
fdp.Syntax = addr(file.Syntax().String())
95117
}
96118

119+
if dg.sourceCodeInfoExtn != nil {
120+
dg.sourceCodeInfoExtn.IsSyntaxUnspecified = file.AST().Syntax().IsZero()
121+
}
122+
97123
// Canonicalize import order so that it does not change whenever we refactor
98124
// internal structures.
99-
// TODO: sort in declaration order to match protoc? Not done currently
100-
// since that requires additional book-keeping in [imports].
101125
imports := seq.ToSlice(file.Imports())
102-
slices.SortFunc(imports, cmpx.Key(Import.Path))
126+
slices.SortFunc(imports, cmpx.Key(func(imp Import) int {
127+
return imp.Decl.KeywordToken().Span().Start
128+
}))
103129
for i, imp := range imports {
104130
fdp.Dependency = append(fdp.Dependency, imp.Path())
105131
if imp.Public {
@@ -108,6 +134,10 @@ func (dg *descGenerator) file(file File, fdp *descriptorpb.FileDescriptorProto)
108134
if imp.Weak {
109135
fdp.WeakDependency = append(fdp.WeakDependency, int32(i))
110136
}
137+
138+
if dg.sourceCodeInfoExtn != nil && !imp.Used {
139+
dg.sourceCodeInfoExtn.UnusedDependency = append(dg.sourceCodeInfoExtn.UnusedDependency, int32(i))
140+
}
111141
}
112142

113143
for ty := range seq.Values(file.Types()) {
@@ -139,6 +169,10 @@ func (dg *descGenerator) file(file File, fdp *descriptorpb.FileDescriptorProto)
139169
fdp.Options = new(descriptorpb.FileOptions)
140170
dg.options(options, fdp.Options)
141171
}
172+
173+
if dg.sourceCodeInfoExtn != nil && iterx.Empty2(dg.sourceCodeInfoExtn.ProtoReflect().Range) {
174+
proto.ClearExtension(dg.sourceCodeInfo, descriptorv1.E_BufSourceCodeInfoExtension)
175+
}
142176
}
143177

144178
func (dg *descGenerator) message(ty Type, mdp *descriptorpb.DescriptorProto) {

experimental/ir/ir_imports.go

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ package ir
1717
import (
1818
"slices"
1919

20-
"github.com/bits-and-blooms/bitset"
21-
2220
"github.com/bufbuild/protocompile/experimental/ast"
2321
"github.com/bufbuild/protocompile/experimental/seq"
2422
"github.com/bufbuild/protocompile/internal/ext/mapsx"
@@ -32,6 +30,7 @@ type Import struct {
3230
Public, Weak bool // The kind of import this is.
3331
Direct bool // Whether this is a direct or transitive import.
3432
Visible bool // Whether this import's symbols are visible in the current file.
33+
Used bool // Whether this import has been marked as used.
3534
Decl ast.DeclImport // The import declaration.
3635
}
3736

@@ -57,21 +56,22 @@ type imports struct {
5756
// 2. Weak imports.
5857
// 3. Regular imports.
5958
// 4. Transitive public imports.
60-
// 4. Transitive imports.
59+
// 5. Transitive imports.
6160
//
6261
// The fields after this one specify where each of these segments ends.
6362
//
6463
// The last element of this slice is always descriptor.proto, even if it
6564
// exists elsewhere as an ordinary import.
6665
files []imported
6766

68-
// Which of the above files we are permitted to import from.
69-
visible bitset.BitSet
70-
7167
// Maps the path of each file to its index in files. This is used for
7268
// mapping from one [Context]'s file IDs to another's.
7369
byPath intern.Map[uint32]
7470

71+
// Map of path of each imported file to a direct import which causes it to
72+
// be imported. This is used for marking which imports are used.
73+
causes intern.Map[uint32]
74+
7575
// NOTE: public imports always come first. This ensures that when
7676
// recursively determining public imports, we consider public imports'
7777
// recursive imports first. Consider the following sequence of files:
@@ -105,8 +105,9 @@ type imports struct {
105105

106106
// imported wraps an imported [File] and the import statement declaration [ast.DeclImport].
107107
type imported struct {
108-
file File
109-
decl ast.DeclImport
108+
file File
109+
decl ast.DeclImport
110+
visible, used bool
110111
}
111112

112113
// Append appends a direct import to this imports table.
@@ -132,7 +133,7 @@ func (i *imports) AddDirect(imp Import) {
132133
//
133134
// Must only be called once, after all direct imports are added.
134135
func (i *imports) Recurse(dedup intern.Map[ast.DeclImport]) {
135-
for file := range seq.Values(i.Directs()) {
136+
for _, file := range seq.All(i.Directs()) {
136137
for imp := range seq.Values(file.TransitiveImports()) {
137138
if !mapsx.AddZero(dedup, imp.InternedPath()) {
138139
continue
@@ -150,6 +151,20 @@ func (i *imports) Recurse(dedup intern.Map[ast.DeclImport]) {
150151
i.Insert(imp, -1, imp.Public)
151152
}
152153
}
154+
155+
// Now, build the path and causes maps.
156+
i.byPath = make(intern.Map[uint32])
157+
i.causes = make(intern.Map[uint32])
158+
159+
for n, imp := range i.files {
160+
i.byPath[imp.file.InternedPath()] = uint32(n)
161+
}
162+
for k, file := range seq.All(i.Directs()) {
163+
mapsx.Add(i.causes, file.InternedPath(), uint32(k))
164+
for imp := range seq.Values(file.TransitiveImports()) {
165+
mapsx.Add(i.causes, imp.InternedPath(), uint32(k))
166+
}
167+
}
153168
}
154169

155170
// Insert inserts a new import at the given position.
@@ -160,13 +175,19 @@ func (i *imports) Insert(imp Import, pos int, visible bool) {
160175
pos = len(i.files)
161176
}
162177

163-
if i.byPath == nil {
164-
i.byPath = make(intern.Map[uint32])
165-
}
178+
i.files = slices.Insert(i.files, pos, imported{
179+
file: imp.File,
180+
decl: imp.Decl,
181+
visible: visible,
182+
})
183+
}
166184

167-
i.files = slices.Insert(i.files, pos, imported{file: imp.File, decl: imp.Decl})
168-
i.byPath[imp.File.InternedPath()] = uint32(pos)
169-
i.visible.SetTo(uint(pos), visible)
185+
// MarkUsed records a file as used, which affects the values of [Import].Used.
186+
func (i *imports) MarkUsed(file File) {
187+
idx, ok := i.causes[file.InternedPath()]
188+
if ok {
189+
i.files[idx].used = true
190+
}
170191
}
171192

172193
// DescriptorProto returns the file for descriptor.proto.
@@ -181,21 +202,25 @@ func (i *imports) Directs() seq.Indexer[Import] {
181202
i.files[:i.importEnd],
182203
func(j int, imported imported) Import {
183204
n := uint32(j)
205+
public := n < i.publicEnd
184206
return Import{
185207
File: imported.file,
186-
Public: n < i.publicEnd,
187-
Weak: n >= i.publicEnd && n < i.weakEnd,
208+
Public: public,
209+
Weak: !public && n < i.weakEnd,
188210
Direct: true,
189211
Visible: true,
190212
Decl: imported.decl,
213+
214+
// Public imports are implicitly always used.
215+
Used: imported.used || public,
191216
}
192217
},
193218
)
194219
}
195220

196221
// Transitive returns an indexer over the Transitive imports.
197222
//
198-
// This function does not report whether those imports are weak or not.
223+
// This function does not report whether those imports are weak or used.
199224
func (i *imports) Transitive() seq.Indexer[Import] {
200225
return seq.NewFixedSlice(
201226
i.files[:max(0, len(i.files)-1)], // Exclude the implicit descriptor.proto.
@@ -206,7 +231,7 @@ func (i *imports) Transitive() seq.Indexer[Import] {
206231
Public: n < i.publicEnd ||
207232
(n >= i.importEnd && n < i.transPublicEnd),
208233
Direct: n < i.importEnd,
209-
Visible: i.visible.Test(uint(j)),
234+
Visible: imported.visible,
210235
Decl: imported.decl,
211236
}
212237
},

experimental/ir/ir_symbol.go

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ func (s Symbol) AsMethod() Method {
145145
func (s Symbol) Visible() bool {
146146
return s.ref.file <= 0 ||
147147
s.Kind() == SymbolKindPackage || // Packages don't get visibility checks.
148-
s.Context().imports.visible.Test(uint(s.ref.file)-1)
148+
s.Context().imports.files[uint(s.ref.file)-1].visible
149149
}
150150

151151
// Definition returns a span for the definition site of this symbol;
@@ -482,24 +482,26 @@ again:
482482
candidate = slices.Delete(candidate, len(scope), oldLen)
483483
}
484484

485-
if !scopeSearch {
486-
// This was a single identifier name so we're done.
487-
return found, expected
485+
if scopeSearch {
486+
// Now search for the full name inside of the scope we found.
487+
candidate = append(candidate, name[len(first):]...)
488+
found = s.lookupBytes(c, candidate)
489+
if found.ptr.Nil() {
490+
// Try again, this time using the full candidate name. This happens
491+
// expressly for the purpose of diagnostics.
492+
scopeSearch = false
493+
// Need to clear the found scope, since otherwise we might get a weird
494+
// error message where we say that we found the parent scope.
495+
found = ref[rawSymbol]{}
496+
expected = FullName(candidate)
497+
goto again
498+
}
488499
}
489500

490-
// Now search for the full name inside of the scope we found.
491-
candidate = append(candidate, name[len(first):]...)
492-
r := s.lookupBytes(c, candidate)
493-
if r.ptr.Nil() {
494-
// Try again, this time using the full candidate name. This happens
495-
// expressly for the purpose of diagnostics.
496-
scopeSearch = false
497-
// Need to clear the found scope, since otherwise we might get a weird
498-
// error message where we say that we found the parent scope.
499-
found = ref[rawSymbol]{}
500-
expected = FullName(candidate)
501-
goto again
501+
file := found.context(c).File()
502+
if file != c.File() {
503+
c.imports.MarkUsed(file)
502504
}
503505

504-
return r, expected
506+
return found, expected
505507
}

experimental/ir/ir_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ type Test struct {
5555
Files []File `yaml:"files"`
5656
ExcludeWKTSources bool `yaml:"exclude_wkt_sources"`
5757
OutputWKTs bool `yaml:"output_wkts"`
58+
SourceCodeInfo bool `yaml:"source_code_info"`
5859
}
5960

6061
type File struct {
@@ -130,7 +131,7 @@ func TestIR(t *testing.T) {
130131

131132
irs := slicesx.Transform(results, func(r incremental.Result[ir.File]) ir.File { return r.Value })
132133
irs = slices.DeleteFunc(irs, ir.File.IsZero)
133-
bytes, err := ir.DescriptorSetBytes(irs)
134+
bytes, err := ir.DescriptorSetBytes(irs, ir.IncludeSourceCodeInfo(test.SourceCodeInfo))
134135
require.NoError(t, err)
135136

136137
fds := new(descriptorpb.FileDescriptorSet)
@@ -176,6 +177,7 @@ func symtabProto(files []ir.File, t *Test) *compilerpb.SymbolSet {
176177
Weak: imp.Weak,
177178
Transitive: !imp.Direct,
178179
Visible: imp.Visible,
180+
Used: imp.Used,
179181
})
180182
}
181183
slices.SortFunc(symtab.Imports, cmpx.Key(func(x *compilerpb.Import) string { return x.Path }))

experimental/ir/lower.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,6 @@ func lower(c *Context, r *report.Report, importer Importer) {
102102
// done in two separate steps.
103103
populateOptionTargets(c.File(), r)
104104
validateOptionTargets(c.File(), r)
105+
106+
diagnoseUnusedImports(c.File(), r)
105107
}

experimental/ir/lower_imports.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ func buildImports(f File, r *report.Report, importer Importer) {
109109
c.imports.Recurse(dedup)
110110

111111
// Check if descriptor.proto was transitively imported. If not, import it.
112-
if idx, ok := c.imports.byPath.Get(&f.Context().session.intern, DescriptorProtoPath); ok {
112+
if idx, ok := c.imports.byPath[f.Context().session.builtins.DescriptorFile]; ok {
113113
// Copy it to the end so that it's easy to find.
114114
c.imports.files = append(c.imports.files, c.imports.files[idx])
115115
return
@@ -119,6 +119,8 @@ func buildImports(f File, r *report.Report, importer Importer) {
119119
// avoid cycles.
120120
if f.IsDescriptorProto() {
121121
c.imports.Insert(Import{File: f}, -1, false)
122+
c.imports.byPath[f.Context().session.builtins.DescriptorFile] = uint32(len(c.imports.files) - 1)
123+
c.imports.causes[f.Context().session.builtins.DescriptorFile] = uint32(len(c.imports.files) - 1)
122124
return
123125
}
124126

@@ -134,6 +136,8 @@ func buildImports(f File, r *report.Report, importer Importer) {
134136
}
135137

136138
c.imports.Insert(Import{File: dproto, Decl: ast.DeclImport{}}, -1, false)
139+
c.imports.byPath[f.Context().session.builtins.DescriptorFile] = uint32(len(c.imports.files) - 1)
140+
c.imports.causes[f.Context().session.builtins.DescriptorFile] = uint32(len(c.imports.files) - 1)
137141
}
138142

139143
// diagnoseCycle generates a diagnostic for an import cycle, showing each

experimental/ir/lower_validate.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright 2020-2025 Buf Technologies, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package ir
16+
17+
import (
18+
"github.com/bufbuild/protocompile/experimental/report"
19+
"github.com/bufbuild/protocompile/experimental/seq"
20+
)
21+
22+
// diagnoseUnusedImports generates diagnostics for each unused import.
23+
func diagnoseUnusedImports(f File, r *report.Report) {
24+
for imp := range seq.Values(f.Imports()) {
25+
if imp.Used {
26+
continue
27+
}
28+
29+
r.Warnf("unused import \"%s\"", f.Path()).Apply(
30+
report.Snippet(imp.Decl.ImportPath()),
31+
report.SuggestEdits(imp.Decl, "delete it", report.Edit{
32+
Start: 0, End: imp.Decl.Span().Len(),
33+
}),
34+
report.Helpf("no symbols from this file are referenced"),
35+
)
36+
}
37+
}

0 commit comments

Comments
 (0)