Skip to content

Commit ad775b1

Browse files
committed
Update vfio-manage to choose best VFIO driver when binding devices
Rather than always binding GPUs to the vfio-pci driver, this commit introduces logic to see if the running kernel has a VFIO variant driver available that is a better match for the device. This is required on Grace-based systems where the nvgrace_gpu_vfio_pci module is required to be used in favor of the vfio-pci module. We read the mod.alias file for a given device, then we look through /lib/modules/${kernel_version}/modules.alias for the vfio_pci alias that matches with the least number of wildcard ('*') fields. The code introduced in this commit is inspired by: https://gitlab.com/libvirt/libvirt/-/commit/82e2fac297105f554f57fb589002933231b4f711 Signed-off-by: Christopher Desiniotis <[email protected]>
1 parent ab47270 commit ad775b1

31 files changed

+11256
-28
lines changed

cmd/vfio-manage/bind.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ type bindOptions struct {
4040
// newBindCommand constructs a bind command with the specified logger
4141
func newBindCommand(logger *logrus.Logger) *cli.Command {
4242
c := bindCommand{
43-
logger: logger,
44-
nvpciLib: nvpci.New(),
43+
logger: logger,
44+
nvpciLib: nvpci.New(
45+
nvpci.WithLogger(logger),
46+
),
4547
}
4648
return c.build()
4749
}

cmd/vfio-manage/unbind.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ type unbindOptions struct {
4040
// newUnbindCommand constructs an unbind command with the specified logger
4141
func newUnbindCommand(logger *logrus.Logger) *cli.Command {
4242
c := unbindCommand{
43-
logger: logger,
44-
nvpciLib: nvpci.New(),
43+
logger: logger,
44+
nvpciLib: nvpci.New(
45+
nvpci.WithLogger(logger),
46+
),
4547
}
4648
return c.build()
4749
}

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require (
66
github.com/NVIDIA/go-nvlib v0.8.1
77
github.com/moby/sys/mount v0.3.4
88
github.com/sirupsen/logrus v1.9.3
9+
github.com/stretchr/testify v1.11.1
910
github.com/urfave/cli/v2 v2.27.7
1011
golang.org/x/sys v0.36.0
1112
k8s.io/api v0.33.2
@@ -53,6 +54,7 @@ require (
5354
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
5455
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
5556
github.com/pkg/errors v0.9.1 // indirect
57+
github.com/pmezard/go-difflib v1.0.0 // indirect
5658
github.com/russross/blackfriday/v2 v2.1.0 // indirect
5759
github.com/spf13/cobra v1.8.1 // indirect
5860
github.com/spf13/pflag v1.0.5 // indirect

internal/nvpci/modalias.go

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package nvpci
18+
19+
import (
20+
"fmt"
21+
"math"
22+
"reflect"
23+
"strings"
24+
25+
"golang.org/x/sys/unix"
26+
)
27+
28+
// modAlias is a decomposed version of string like this
29+
//
30+
// vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN
31+
//
32+
// The "NNNN" are always of the length in the example
33+
// unless replaced with a wildcard ("*")
34+
type modAlias struct {
35+
vendor string // v
36+
device string // d
37+
subvendor string // sv
38+
subdevice string // sd
39+
baseClass string // bc
40+
subClass string // sc
41+
interface_ string // i
42+
}
43+
44+
// vfioAlias represents an entry from the modules.alias file for a vfio driver
45+
type vfioAlias struct {
46+
modAlias *modAlias // The modalias pattern
47+
driver string // The vfio driver name
48+
}
49+
50+
func parseModAliasString(input string) (*modAlias, error) {
51+
if input == "" {
52+
return nil, fmt.Errorf("modalias string is empty")
53+
}
54+
55+
input = strings.TrimSpace(input)
56+
57+
// Trim the leading "pci:" prefix in the modalias file
58+
split := strings.SplitN(input, ":", 2)
59+
if len(split) != 2 {
60+
return nil, fmt.Errorf("unexpected number of parts in modalias after trimming 'pci:' prefix: %s", input)
61+
}
62+
input = split[1]
63+
64+
if !strings.HasPrefix(input, "v") {
65+
return nil, fmt.Errorf("modalias must start with 'v', got: %s", input)
66+
}
67+
68+
ma := &modAlias{}
69+
remaining := input[1:] // skip 'v'
70+
71+
vendor, remaining, err := extractField(remaining, "d")
72+
if err != nil {
73+
return nil, fmt.Errorf("failed to parse vendor: %w", err)
74+
}
75+
ma.vendor = vendor
76+
77+
device, remaining, err := extractField(remaining, "sv")
78+
if err != nil {
79+
return nil, fmt.Errorf("failed to parse device: %w", err)
80+
}
81+
ma.device = device
82+
83+
subvendor, remaining, err := extractField(remaining, "sd")
84+
if err != nil {
85+
return nil, fmt.Errorf("failed to parse subvendor: %w", err)
86+
}
87+
ma.subvendor = subvendor
88+
89+
subdevice, remaining, err := extractField(remaining, "bc")
90+
if err != nil {
91+
return nil, fmt.Errorf("failed to parse subdevice: %w", err)
92+
}
93+
ma.subdevice = subdevice
94+
95+
baseClass, remaining, err := extractField(remaining, "sc")
96+
if err != nil {
97+
return nil, fmt.Errorf("failed to parse base class: %w", err)
98+
}
99+
ma.baseClass = baseClass
100+
101+
subClass, remaining, err := extractField(remaining, "i")
102+
if err != nil {
103+
return nil, fmt.Errorf("failed to parse subclass: %w", err)
104+
}
105+
ma.subClass = subClass
106+
107+
ma.interface_ = remaining
108+
109+
return ma, nil
110+
}
111+
112+
// extractField extracts the value before the next delimiter from the input string.
113+
// Returns the extracted value, the remaining string (without the delimiter), and any error.
114+
func extractField(input, delimiter string) (string, string, error) {
115+
idx := strings.Index(input, delimiter)
116+
if idx == -1 {
117+
return "", "", fmt.Errorf("failed to find index of the first instance of %q in string %q", delimiter, input)
118+
}
119+
120+
value := input[:idx]
121+
remaining := input[idx+len(delimiter):]
122+
123+
return value, remaining, nil
124+
}
125+
126+
func getKernelVersion() (string, error) {
127+
var uname unix.Utsname
128+
if err := unix.Uname(&uname); err != nil {
129+
return "", err
130+
}
131+
132+
// Convert C-style byte array to Go string
133+
release := make([]byte, 0, len(uname.Release))
134+
for _, c := range uname.Release {
135+
if c == 0 {
136+
break
137+
}
138+
release = append(release, c)
139+
}
140+
141+
return string(release), nil
142+
}
143+
144+
// getVFIOAliases returns the vfio driver aliases from the input string.
145+
// The input string is expected to be the content of a modules.alias file.
146+
// Only lines that begin with 'alias vfio_pci:' are parsed, with the
147+
// format being:
148+
//
149+
// alias vfio_pci:<modalias string> <driver_name>
150+
func getVFIOAliases(input string) []vfioAlias {
151+
var aliases []vfioAlias
152+
153+
lines := strings.Split(input, "\n")
154+
for _, line := range lines {
155+
line = strings.TrimSpace(line)
156+
157+
if !strings.HasPrefix(line, "alias vfio_pci:") {
158+
continue
159+
}
160+
161+
split := strings.SplitN(line, " ", 3)
162+
if len(split) != 3 {
163+
continue
164+
}
165+
modAliasStr := split[1]
166+
modAlias, err := parseModAliasString(modAliasStr)
167+
if err != nil {
168+
continue
169+
}
170+
171+
driver := split[2]
172+
aliases = append(aliases, vfioAlias{
173+
modAlias: modAlias,
174+
driver: driver,
175+
})
176+
}
177+
178+
return aliases
179+
}
180+
181+
// findBestMatch finds the best matching VFIO driver for the given modalias
182+
// by comparing against all available vfio alias patterns. The best match
183+
// is the one with the fewest wildcard characters.
184+
func findBestMatch(deviceModAlias *modAlias, aliases []vfioAlias) string {
185+
var bestDriver string
186+
bestWildcardCount := math.MaxInt
187+
188+
for _, alias := range aliases {
189+
if matches, wildcardCount := matchModalias(deviceModAlias, alias.modAlias); matches {
190+
if wildcardCount < bestWildcardCount {
191+
bestDriver = alias.driver
192+
bestWildcardCount = wildcardCount
193+
}
194+
}
195+
}
196+
197+
return bestDriver
198+
}
199+
200+
// matchModalias checks if a device modalias matches a pattern from modules.alias
201+
// Returns true if it matches and the number of wildcards
202+
func matchModalias(deviceModAlias, patternModAlias *modAlias) (bool, int) {
203+
wildcardCount := 0
204+
205+
modAliasType := reflect.TypeOf(*deviceModAlias)
206+
deviceModAliasValue := reflect.ValueOf(*deviceModAlias)
207+
patternModAliasValue := reflect.ValueOf(*patternModAlias)
208+
209+
// iterate over both modAlias structs, comparing each field
210+
for i := 0; i < modAliasType.NumField(); i++ {
211+
deviceValue := deviceModAliasValue.Field(i).String()
212+
patternValue := patternModAliasValue.Field(i).String()
213+
214+
if patternValue == "*" {
215+
wildcardCount++
216+
continue
217+
}
218+
219+
if deviceValue != patternValue {
220+
return false, wildcardCount
221+
}
222+
}
223+
return true, wildcardCount
224+
}

0 commit comments

Comments
 (0)