Skip to content

Commit 355997d

Browse files
authored
Merge pull request #314 from elezar/CNT-4032/mulitple-naming-strategies
Allow multiple naming strategies when generating CDI specification
2 parents a8d4880 + b6efd30 commit 355997d

File tree

14 files changed

+128
-64
lines changed

14 files changed

+128
-64
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
44
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
5+
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
6+
CDI spec to be generated that includes GPUs by index and UUID.
7+
* Set the default `--device-name-strategy` for the `nvidia-ctk cdi generate` command to `[index, uuid]`.
58

69
## v1.15.0-rc.3
710
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.

cmd/nvidia-ctk/cdi/generate/generate.go

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,16 @@ type command struct {
4242
}
4343

4444
type options struct {
45-
output string
46-
format string
47-
deviceNameStrategy string
48-
driverRoot string
49-
devRoot string
50-
nvidiaCTKPath string
51-
ldconfigPath string
52-
mode string
53-
vendor string
54-
class string
45+
output string
46+
format string
47+
deviceNameStrategies cli.StringSlice
48+
driverRoot string
49+
devRoot string
50+
nvidiaCTKPath string
51+
ldconfigPath string
52+
mode string
53+
vendor string
54+
class string
5555

5656
librarySearchPaths cli.StringSlice
5757

@@ -109,11 +109,11 @@ func (m command) build() *cli.Command {
109109
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
110110
Destination: &opts.devRoot,
111111
},
112-
&cli.StringFlag{
112+
&cli.StringSliceFlag{
113113
Name: "device-name-strategy",
114-
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
115-
Value: nvcdi.DeviceNameStrategyIndex,
116-
Destination: &opts.deviceNameStrategy,
114+
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
115+
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
116+
Destination: &opts.deviceNameStrategies,
117117
},
118118
&cli.StringFlag{
119119
Name: "driver-root",
@@ -185,9 +185,11 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
185185
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
186186
}
187187

188-
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
189-
if err != nil {
190-
return err
188+
for _, strategy := range opts.deviceNameStrategies.Value() {
189+
_, err := nvcdi.NewDeviceNamer(strategy)
190+
if err != nil {
191+
return err
192+
}
191193
}
192194

193195
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
@@ -241,9 +243,13 @@ func formatFromFilename(filename string) string {
241243
}
242244

243245
func (m command) generateSpec(opts *options) (spec.Interface, error) {
244-
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
245-
if err != nil {
246-
return nil, fmt.Errorf("failed to create device namer: %v", err)
246+
var deviceNamers []nvcdi.DeviceNamer
247+
for _, strategy := range opts.deviceNameStrategies.Value() {
248+
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
249+
if err != nil {
250+
return nil, fmt.Errorf("failed to create device namer: %v", err)
251+
}
252+
deviceNamers = append(deviceNamers, deviceNamer)
247253
}
248254

249255
cdilib, err := nvcdi.New(
@@ -252,7 +258,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
252258
nvcdi.WithDevRoot(opts.devRoot),
253259
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
254260
nvcdi.WithLdconfigPath(opts.ldconfigPath),
255-
nvcdi.WithDeviceNamer(deviceNamer),
261+
nvcdi.WithDeviceNamers(deviceNamers...),
256262
nvcdi.WithMode(opts.mode),
257263
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
258264
nvcdi.WithCSVFiles(opts.csv.files.Value()),

pkg/nvcdi/api.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ type Interface interface {
4848
GetCommonEdits() (*cdi.ContainerEdits, error)
4949
GetAllDeviceSpecs() ([]specs.Device, error)
5050
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
51-
GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error)
51+
GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error)
5252
GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error)
53-
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error)
53+
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error)
5454
GetDeviceSpecsByID(...string) ([]specs.Device, error)
5555
}

pkg/nvcdi/full-gpu-nvml.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,23 +34,26 @@ import (
3434
)
3535

3636
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
37-
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
37+
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
3838
edits, err := l.GetGPUDeviceEdits(d)
3939
if err != nil {
4040
return nil, fmt.Errorf("failed to get edits for device: %v", err)
4141
}
4242

43-
name, err := l.deviceNamer.GetDeviceName(i, convert{d})
43+
var deviceSpecs []specs.Device
44+
names, err := l.deviceNamers.GetDeviceNames(i, convert{d})
4445
if err != nil {
4546
return nil, fmt.Errorf("failed to get device name: %v", err)
4647
}
47-
48-
spec := specs.Device{
49-
Name: name,
50-
ContainerEdits: *edits.ContainerEdits,
48+
for _, name := range names {
49+
spec := specs.Device{
50+
Name: name,
51+
ContainerEdits: *edits.ContainerEdits,
52+
}
53+
deviceSpecs = append(deviceSpecs, spec)
5154
}
5255

53-
return &spec, nil
56+
return deviceSpecs, nil
5457
}
5558

5659
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.

pkg/nvcdi/gds.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func (l *gdslib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
6868
}
6969

7070
// GetGPUDeviceSpecs is unsupported for the gdslib specs
71-
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
71+
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
7272
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
7373
}
7474

@@ -78,7 +78,7 @@ func (l *gdslib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
7878
}
7979

8080
// GetMIGDeviceSpecs is unsupported for the gdslib specs
81-
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
81+
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
8282
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
8383
}
8484

pkg/nvcdi/lib-csv.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,20 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
5858
return nil, fmt.Errorf("failed to create container edits for CSV files: %v", err)
5959
}
6060

61-
name, err := l.deviceNamer.GetDeviceName(0, uuidUnsupported{})
61+
names, err := l.deviceNamers.GetDeviceNames(0, uuidIgnored{})
6262
if err != nil {
6363
return nil, fmt.Errorf("failed to get device name: %v", err)
6464
}
65-
66-
deviceSpec := specs.Device{
67-
Name: name,
68-
ContainerEdits: *e.ContainerEdits,
65+
var deviceSpecs []specs.Device
66+
for _, name := range names {
67+
deviceSpec := specs.Device{
68+
Name: name,
69+
ContainerEdits: *e.ContainerEdits,
70+
}
71+
deviceSpecs = append(deviceSpecs, deviceSpec)
6972
}
70-
return []specs.Device{deviceSpec}, nil
73+
74+
return deviceSpecs, nil
7175
}
7276

7377
// GetCommonEdits generates a CDI specification that can be used for ANY devices
@@ -82,7 +86,7 @@ func (l *csvlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
8286
}
8387

8488
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
85-
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
89+
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
8690
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported for CSV files")
8791
}
8892

@@ -92,7 +96,7 @@ func (l *csvlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
9296
}
9397

9498
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
95-
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
99+
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
96100
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files")
97101
}
98102

pkg/nvcdi/lib-nvml.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,11 @@ func (l *nvmllib) getEditsForMIGDevice(nvmlDevice nvml.Device) (*cdi.ContainerEd
208208
func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
209209
var deviceSpecs []specs.Device
210210
err := l.devicelib.VisitDevices(func(i int, d device.Device) error {
211-
deviceSpec, err := l.GetGPUDeviceSpecs(i, d)
211+
specsForDevice, err := l.GetGPUDeviceSpecs(i, d)
212212
if err != nil {
213213
return err
214214
}
215-
deviceSpecs = append(deviceSpecs, *deviceSpec)
215+
deviceSpecs = append(deviceSpecs, specsForDevice...)
216216

217217
return nil
218218
})
@@ -225,11 +225,11 @@ func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
225225
func (l *nvmllib) getMigDeviceSpecs() ([]specs.Device, error) {
226226
var deviceSpecs []specs.Device
227227
err := l.devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
228-
deviceSpec, err := l.GetMIGDeviceSpecs(i, d, j, mig)
228+
specsForDevice, err := l.GetMIGDeviceSpecs(i, d, j, mig)
229229
if err != nil {
230230
return err
231231
}
232-
deviceSpecs = append(deviceSpecs, *deviceSpec)
232+
deviceSpecs = append(deviceSpecs, specsForDevice...)
233233

234234
return nil
235235
})

pkg/nvcdi/lib-wsl.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func (l *wsllib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
6868
}
6969

7070
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
71-
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
71+
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
7272
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported on WSL")
7373
}
7474

@@ -78,7 +78,7 @@ func (l *wsllib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
7878
}
7979

8080
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
81-
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
81+
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
8282
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported on WSL")
8383
}
8484

pkg/nvcdi/lib.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ type nvcdilib struct {
4444
nvmllib nvml.Interface
4545
mode string
4646
devicelib device.Interface
47-
deviceNamer DeviceNamer
47+
deviceNamers DeviceNamers
4848
driverRoot string
4949
devRoot string
5050
nvidiaCTKPath string
@@ -75,8 +75,9 @@ func New(opts ...Option) (Interface, error) {
7575
if l.logger == nil {
7676
l.logger = logger.New()
7777
}
78-
if l.deviceNamer == nil {
79-
l.deviceNamer, _ = NewDeviceNamer(DeviceNameStrategyIndex)
78+
if len(l.deviceNamers) == 0 {
79+
indexNamer, _ := NewDeviceNamer(DeviceNameStrategyIndex)
80+
l.deviceNamers = []DeviceNamer{indexNamer}
8081
}
8182
if l.driverRoot == "" {
8283
l.driverRoot = "/"

pkg/nvcdi/management.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ func (m *managementlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, e
175175
}
176176

177177
// GetGPUDeviceSpecs is unsupported for the managementlib specs
178-
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
178+
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
179179
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
180180
}
181181

@@ -185,7 +185,7 @@ func (m *managementlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi
185185
}
186186

187187
// GetMIGDeviceSpecs is unsupported for the managementlib specs
188-
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
188+
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
189189
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
190190
}
191191

0 commit comments

Comments
 (0)