Skip to content

Commit b073588

Browse files
authored
Merge pull request #53 from snyk/feat/pip-report-to-graph
feat: convert pip report into a graph
2 parents 628cdf3 + c92f0d9 commit b073588

File tree

2 files changed

+281
-0
lines changed

2 files changed

+281
-0
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package pip
2+
3+
import (
4+
"fmt"
5+
"log/slog"
6+
"regexp"
7+
"strings"
8+
9+
"github.com/snyk/cli-extension-dep-graph/pkg/ecosystems"
10+
)
11+
12+
// depStringPattern extracts the package name from a dependency string.
13+
// Example: "urllib3 (<3,>=1.21.1)" -> "urllib3".
14+
// Example: "certifi (>=2017.4.17)" -> "certifi".
15+
// Example: "idna (<4,>=2.5)" -> "idna".
16+
var depStringPattern = regexp.MustCompile(`^([a-zA-Z0-9._-]+)`)
17+
18+
// ToDepgraph converts a pip install Report into a Depgraph.
19+
// The root package ID is "root" and points to all direct dependencies.
20+
func (r *Report) ToDepgraph() (*ecosystems.Depgraph, error) {
21+
if r == nil {
22+
return nil, fmt.Errorf("report cannot be nil")
23+
}
24+
25+
slog.Debug("Converting pip report to depgraph", slog.Int("total_packages", len(r.Install)))
26+
27+
// First pass: index packages by name for dependency resolution
28+
// Pip's dependency resolver ensures only one version of each package is installed
29+
slog.Debug("Building package name index for dependency resolution")
30+
packageByName := make(map[string]InstallItem)
31+
for _, item := range r.Install {
32+
packageByName[strings.ToLower(item.Metadata.Name)] = item
33+
}
34+
35+
// Second pass: build packages, graph, and collect direct dependencies
36+
slog.Debug("Building dependency graph")
37+
packages := make(map[ecosystems.PackageID]ecosystems.Package)
38+
graph := make(map[ecosystems.PackageID][]ecosystems.PackageID)
39+
var directDeps []ecosystems.PackageID
40+
41+
for _, item := range r.Install {
42+
version := item.Metadata.Version
43+
if version == "" {
44+
slog.Debug("Package has empty version, using fallback",
45+
slog.String("package", item.Metadata.Name))
46+
version = "?"
47+
}
48+
pkgID := toPackageID(item.Metadata.Name, version)
49+
50+
// Add to packages map
51+
packages[pkgID] = ecosystems.Package{
52+
PackageID: pkgID,
53+
PackageName: item.Metadata.Name,
54+
Version: version,
55+
}
56+
57+
// Track direct dependencies
58+
if item.IsDirectDependency() {
59+
directDeps = append(directDeps, pkgID)
60+
}
61+
62+
// Build dependency list for this package
63+
var deps []ecosystems.PackageID
64+
for _, depString := range item.Metadata.RequiresDist {
65+
if depName := extractPackageName(depString); depName != "" {
66+
if depItem, found := packageByName[strings.ToLower(depName)]; found {
67+
depVersion := depItem.Metadata.Version
68+
if depVersion == "" {
69+
depVersion = "?"
70+
}
71+
deps = append(deps, toPackageID(depItem.Metadata.Name, depVersion))
72+
}
73+
}
74+
}
75+
graph[pkgID] = deps
76+
}
77+
78+
// Add root pointing to direct dependencies
79+
graph["root"] = directDeps
80+
81+
slog.Debug("Successfully converted pip report to depgraph",
82+
slog.Int("total_packages", len(packages)),
83+
slog.Int("direct_dependencies", len(directDeps)),
84+
slog.Int("graph_nodes", len(graph)))
85+
86+
return &ecosystems.Depgraph{
87+
Packages: packages,
88+
Graph: graph,
89+
RootPackageID: "root",
90+
}, nil
91+
}
92+
93+
// toPackageID creates a PackageID in the format "name@version".
94+
func toPackageID(name, version string) ecosystems.PackageID {
95+
return ecosystems.PackageID(fmt.Sprintf("%s@%s", name, version))
96+
}
97+
98+
// extractPackageName extracts the package name from a dependency string.
99+
// Example: "urllib3 (<3,>=1.21.1)" -> "urllib3".
100+
func extractPackageName(depString string) string {
101+
matches := depStringPattern.FindStringSubmatch(depString)
102+
if len(matches) > 1 {
103+
return matches[1]
104+
}
105+
return ""
106+
}
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
//go:build !integration
2+
// +build !integration
3+
4+
package pip
5+
6+
import (
7+
"testing"
8+
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
12+
"github.com/snyk/cli-extension-dep-graph/pkg/ecosystems"
13+
)
14+
15+
func TestReport_ToDepgraph(t *testing.T) {
16+
t.Run("simple report with direct and transitive deps", func(t *testing.T) {
17+
report := &Report{
18+
Install: []InstallItem{
19+
{
20+
Metadata: PackageMetadata{
21+
Name: "requests",
22+
Version: "2.31.0",
23+
RequiresDist: []string{
24+
"urllib3 (<3,>=1.21.1)",
25+
"certifi (>=2017.4.17)",
26+
},
27+
},
28+
Requested: true, // Direct dependency
29+
},
30+
{
31+
Metadata: PackageMetadata{
32+
Name: "urllib3",
33+
Version: "2.0.4",
34+
RequiresDist: []string{
35+
"certifi",
36+
},
37+
},
38+
Requested: false, // Transitive dependency
39+
},
40+
{
41+
Metadata: PackageMetadata{
42+
Name: "certifi",
43+
Version: "2023.7.22",
44+
RequiresDist: []string{}, // Leaf package
45+
},
46+
Requested: false, // Transitive dependency
47+
},
48+
},
49+
}
50+
51+
depgraph, err := report.ToDepgraph()
52+
require.NoError(t, err)
53+
54+
// Verify structure
55+
assert.Equal(t, ecosystems.PackageID("root"), depgraph.RootPackageID)
56+
assert.Len(t, depgraph.Packages, 3)
57+
assert.Len(t, depgraph.Graph, 4) // 3 packages + root
58+
59+
// Verify root points to direct dependency
60+
assert.Equal(t, []ecosystems.PackageID{"[email protected]"}, depgraph.Graph["root"])
61+
62+
// Verify dependency chain
63+
assert.ElementsMatch(t, []ecosystems.PackageID{"[email protected]", "[email protected]"},
64+
depgraph.Graph["[email protected]"])
65+
assert.Equal(t, []ecosystems.PackageID{"[email protected]"},
66+
depgraph.Graph["[email protected]"])
67+
assert.Empty(t, depgraph.Graph["[email protected]"])
68+
})
69+
70+
t.Run("multiple direct dependencies", func(t *testing.T) {
71+
report := &Report{
72+
Install: []InstallItem{
73+
{
74+
Metadata: PackageMetadata{
75+
Name: "requests",
76+
Version: "2.31.0",
77+
RequiresDist: []string{},
78+
},
79+
Requested: true,
80+
},
81+
{
82+
Metadata: PackageMetadata{
83+
Name: "flask",
84+
Version: "2.3.0",
85+
RequiresDist: []string{},
86+
},
87+
Requested: true,
88+
},
89+
},
90+
}
91+
92+
depgraph, err := report.ToDepgraph()
93+
require.NoError(t, err)
94+
95+
assert.Len(t, depgraph.Packages, 2)
96+
assert.ElementsMatch(t, []ecosystems.PackageID{"[email protected]", "[email protected]"},
97+
depgraph.Graph["root"])
98+
})
99+
100+
t.Run("empty report", func(t *testing.T) {
101+
report := &Report{
102+
Install: []InstallItem{},
103+
}
104+
105+
depgraph, err := report.ToDepgraph()
106+
require.NoError(t, err)
107+
108+
assert.Empty(t, depgraph.Packages)
109+
assert.Empty(t, depgraph.Graph["root"])
110+
})
111+
112+
t.Run("nil report", func(t *testing.T) {
113+
var report *Report
114+
_, err := report.ToDepgraph()
115+
assert.Error(t, err)
116+
assert.Contains(t, err.Error(), "cannot be nil")
117+
})
118+
}
119+
120+
func TestExtractPackageName(t *testing.T) {
121+
tests := map[string]struct {
122+
depString string
123+
want string
124+
}{
125+
"with_constraints": {"urllib3 (<3,>=1.21.1)", "urllib3"},
126+
"with_extras": {"requests[security] (>=2.20.0)", "requests"},
127+
"special_chars": {"some-package_name.py (>=1.0)", "some-package_name.py"},
128+
"no_constraints": {"certifi", "certifi"},
129+
"empty": {"", ""},
130+
"no_space_version": {"idna>=3.3", "idna"},
131+
"with_extra_marker": {"mypy; extra == \"dev\"", "mypy"},
132+
"hyphenated_with_extra": {"pre-commit; extra == \"dev\"", "pre-commit"},
133+
"hyphenated_with_marker": {"pytest-cov; extra == \"dev\"", "pytest-cov"},
134+
"multiple_hyphens": {"pytest-socket; extra == \"dev\"", "pytest-socket"},
135+
"simple_with_extra": {"pytest; extra == \"dev\"", "pytest"},
136+
"single_char_with_marker": {"ruff; extra == \"dev\"", "ruff"},
137+
}
138+
139+
for name, tt := range tests {
140+
t.Run(name, func(t *testing.T) {
141+
assert.Equal(t, tt.want, extractPackageName(tt.depString))
142+
})
143+
}
144+
}
145+
146+
func TestToPackageID(t *testing.T) {
147+
tests := map[string]struct {
148+
name string
149+
version string
150+
want ecosystems.PackageID
151+
}{
152+
"standard package": {
153+
name: "requests",
154+
version: "2.31.0",
155+
156+
},
157+
"package with dash": {
158+
name: "some-package",
159+
version: "1.0.0",
160+
161+
},
162+
"empty version": {
163+
name: "package",
164+
version: "",
165+
want: "package@", // toPackageID doesn't handle fallback
166+
},
167+
}
168+
169+
for name, tt := range tests {
170+
t.Run(name, func(t *testing.T) {
171+
got := toPackageID(tt.name, tt.version)
172+
assert.Equal(t, tt.want, got)
173+
})
174+
}
175+
}

0 commit comments

Comments
 (0)