Skip to content

Commit 1bc2a9f

Browse files
committed
Return annotation devices from VisibleDevices
This change includes annotation devices in CUDA.VisibleDevices with the highest priority. This allows for the CDI device request extraction to be consistent across all request mechanisms. Note that this does change behaviour in the following ways: 1. Annotations are considered when resolving the runtime mode. 2. Incorrectly formed device names in annotations are no longer treated as an error. Signed-off-by: Evan Lezar <[email protected]>
1 parent dc87dcf commit 1bc2a9f

File tree

4 files changed

+82
-122
lines changed

4 files changed

+82
-122
lines changed

internal/config/image/cuda_image.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,12 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
238238
// In cases where environment variable requests required privileged containers,
239239
// such devices requests are ignored.
240240
func (i CUDA) VisibleDevices() []string {
241+
// If annotation device requests are present, these are preferred.
242+
annotationDeviceRequests := i.cdiDeviceRequestsFromAnnotations()
243+
if len(annotationDeviceRequests) > 0 {
244+
return annotationDeviceRequests
245+
}
246+
241247
// If enabled, try and get the device list from volume mounts first
242248
if i.acceptDeviceListAsVolumeMounts {
243249
volumeMountDeviceRequests := i.visibleDevicesFromMounts()
@@ -264,29 +270,28 @@ func (i CUDA) VisibleDevices() []string {
264270
return nil
265271
}
266272

267-
// CDIDeviceRequestsFromAnnotations returns a list of devices specified in the annotations.
273+
// cdiDeviceRequestsFromAnnotations returns a list of devices specified in the
274+
// annotations.
268275
// Keys starting with the specified prefixes are considered and expected to
269276
// contain a comma-separated list of fully-qualified CDI devices names.
270277
// The format of the requested devices is not checked and the list is not
271278
// deduplicated.
272-
func (i CUDA) CDIDeviceRequestsFromAnnotations() []string {
279+
func (i CUDA) cdiDeviceRequestsFromAnnotations() []string {
273280
if len(i.annotationsPrefixes) == 0 || len(i.annotations) == 0 {
274281
return nil
275282
}
276283

277-
var deviceKeys []string
278-
for key := range i.annotations {
284+
var devices []string
285+
for key, value := range i.annotations {
279286
for _, prefix := range i.annotationsPrefixes {
280287
if strings.HasPrefix(key, prefix) {
281-
deviceKeys = append(deviceKeys, key)
288+
devices = append(devices, strings.Split(value, ",")...)
289+
// There is no need to check additional prefixes since we
290+
// typically deduplicate devices in any case.
291+
break
282292
}
283293
}
284294
}
285-
286-
var devices []string
287-
for _, key := range deviceKeys {
288-
devices = append(devices, strings.Split(i.annotations[key], ",")...)
289-
}
290295
return devices
291296
}
292297

internal/config/image/cuda_image_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ func TestCDIDeviceRequestsFromAnnotations(t *testing.T) {
710710
)
711711
require.NoError(t, err)
712712

713-
devices := image.CDIDeviceRequestsFromAnnotations()
713+
devices := image.cdiDeviceRequestsFromAnnotations()
714714
require.ElementsMatch(t, tc.expectedDevices, devices)
715715
})
716716
}

internal/modifier/cdi.go

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,6 @@ func (c *cdiDeviceRequestor) DeviceRequests() []string {
8989
if c == nil {
9090
return nil
9191
}
92-
annotationDevices, err := getAnnotationDevices(c.image)
93-
if err != nil {
94-
c.logger.Warningf("failed to get device requests from container annotations: %v; ignoring.", err)
95-
annotationDevices = nil
96-
}
97-
if len(annotationDevices) > 0 {
98-
return annotationDevices
99-
}
100-
10192
var devices []string
10293
for _, name := range c.image.VisibleDevices() {
10394
if !parser.IsQualifiedName(name) {
@@ -109,21 +100,6 @@ func (c *cdiDeviceRequestor) DeviceRequests() []string {
109100
return devices
110101
}
111102

112-
// getAnnotationDevices returns a list of devices specified in the annotations.
113-
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
114-
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
115-
// The list of returned devices is deduplicated.
116-
func getAnnotationDevices(image image.CUDA) ([]string, error) {
117-
var annotationDevices []string
118-
for _, device := range image.CDIDeviceRequestsFromAnnotations() {
119-
if !parser.IsQualifiedName(device) {
120-
return nil, fmt.Errorf("invalid device name %q in annotations", device)
121-
}
122-
annotationDevices = append(annotationDevices, device)
123-
}
124-
return annotationDevices, nil
125-
}
126-
127103
// filterAutomaticDevices searches for "automatic" device names in the input slice.
128104
// "Automatic" devices are a well-defined list of CDI device names which, when requested,
129105
// trigger the generation of a CDI spec at runtime. This removes the need to generate a
@@ -192,21 +168,6 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devic
192168
)
193169
}
194170

195-
func deviceRequestorFromImage(image image.CUDA) deviceRequestor {
196-
return &fromImage{image}
197-
}
198-
199-
type fromImage struct {
200-
image.CUDA
201-
}
202-
203-
func (f *fromImage) DeviceRequests() []string {
204-
if f == nil {
205-
return nil
206-
}
207-
return f.CUDA.VisibleDevices()
208-
}
209-
210171
type deduplicatedDeviceRequestor struct {
211172
deviceRequestor
212173
}

internal/modifier/cdi_test.go

Lines changed: 66 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
package modifier
1818

1919
import (
20-
"fmt"
2120
"testing"
2221

2322
"github.com/opencontainers/runtime-spec/specs-go"
@@ -27,122 +26,116 @@ import (
2726
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
2827
)
2928

30-
func TestGetAnnotationDevices(t *testing.T) {
29+
func TestDeviceRequests(t *testing.T) {
30+
logger, _ := testlog.NewNullLogger()
31+
3132
testCases := []struct {
3233
description string
34+
input cdiDeviceRequestor
35+
spec *specs.Spec
3336
prefixes []string
34-
annotations map[string]string
3537
expectedDevices []string
36-
expectedError error
3738
}{
3839
{
39-
description: "no annotations",
40+
description: "empty spec yields no devices",
41+
},
42+
{
43+
description: "cdi devices from mounts",
44+
input: cdiDeviceRequestor{
45+
defaultKind: "nvidia.com/gpu",
46+
},
47+
spec: &specs.Spec{
48+
Mounts: []specs.Mount{
49+
{
50+
Destination: "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
51+
Source: "/dev/null",
52+
},
53+
{
54+
Destination: "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/1",
55+
Source: "/dev/null",
56+
},
57+
},
58+
},
59+
expectedDevices: []string{"nvidia.com/gpu=0", "nvidia.com/gpu=1"},
60+
},
61+
{
62+
description: "cdi devices from envvar",
63+
input: cdiDeviceRequestor{
64+
defaultKind: "nvidia.com/gpu",
65+
},
66+
spec: &specs.Spec{
67+
Process: &specs.Process{
68+
Env: []string{"NVIDIA_VISIBLE_DEVICES=0,example.com/class=device"},
69+
},
70+
},
71+
expectedDevices: []string{"nvidia.com/gpu=0", "example.com/class=device"},
4072
},
4173
{
4274
description: "no matching annotations",
4375
prefixes: []string{"not-prefix/"},
44-
annotations: map[string]string{
45-
"prefix/foo": "example.com/device=bar",
76+
spec: &specs.Spec{
77+
Annotations: map[string]string{
78+
"prefix/foo": "example.com/device=bar",
79+
},
4680
},
4781
},
4882
{
4983
description: "single matching annotation",
5084
prefixes: []string{"prefix/"},
51-
annotations: map[string]string{
52-
"prefix/foo": "example.com/device=bar",
85+
spec: &specs.Spec{
86+
Annotations: map[string]string{
87+
"prefix/foo": "example.com/device=bar",
88+
},
5389
},
5490
expectedDevices: []string{"example.com/device=bar"},
5591
},
5692
{
5793
description: "multiple matching annotations",
5894
prefixes: []string{"prefix/", "another-prefix/"},
59-
annotations: map[string]string{
60-
"prefix/foo": "example.com/device=bar",
61-
"another-prefix/bar": "example.com/device=baz",
95+
spec: &specs.Spec{
96+
Annotations: map[string]string{
97+
"prefix/foo": "example.com/device=bar",
98+
"another-prefix/bar": "example.com/device=baz",
99+
},
62100
},
63101
expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
64102
},
65103
{
66104
description: "multiple matching annotations with duplicate devices",
67105
prefixes: []string{"prefix/", "another-prefix/"},
68-
annotations: map[string]string{
69-
"prefix/foo": "example.com/device=bar",
70-
"another-prefix/bar": "example.com/device=bar",
106+
spec: &specs.Spec{
107+
Annotations: map[string]string{
108+
"prefix/foo": "example.com/device=bar",
109+
"another-prefix/bar": "example.com/device=bar",
110+
},
71111
},
72112
expectedDevices: []string{"example.com/device=bar", "example.com/device=bar"},
73113
},
74114
{
75-
description: "invalid devices",
76-
prefixes: []string{"prefix/"},
77-
annotations: map[string]string{
78-
"prefix/foo": "example.com/device",
79-
},
80-
expectedError: fmt.Errorf("invalid device %q", "example.com/device"),
81-
},
82-
}
83-
84-
for _, tc := range testCases {
85-
t.Run(tc.description, func(t *testing.T) {
86-
image, err := image.New(
87-
image.WithAnnotations(tc.annotations),
88-
image.WithAnnotationsPrefixes(tc.prefixes),
89-
)
90-
require.NoError(t, err)
91-
92-
devices, err := getAnnotationDevices(image)
93-
if tc.expectedError != nil {
94-
require.Error(t, err)
95-
return
96-
}
97-
98-
require.NoError(t, err)
99-
require.ElementsMatch(t, tc.expectedDevices, devices)
100-
})
101-
}
102-
}
103-
104-
func TestDeviceRequests(t *testing.T) {
105-
logger, _ := testlog.NewNullLogger()
106-
107-
testCases := []struct {
108-
description string
109-
input cdiDeviceRequestor
110-
spec *specs.Spec
111-
expectedDevices []string
112-
}{
113-
{
114-
description: "empty spec yields no devices",
115-
},
116-
{
117-
description: "cdi devices from mounts",
115+
description: "devices in annotations are expanded",
118116
input: cdiDeviceRequestor{
119117
defaultKind: "nvidia.com/gpu",
120118
},
119+
prefixes: []string{"prefix/"},
121120
spec: &specs.Spec{
122-
Mounts: []specs.Mount{
123-
{
124-
Destination: "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
125-
Source: "/dev/null",
126-
},
127-
{
128-
Destination: "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/1",
129-
Source: "/dev/null",
130-
},
121+
Annotations: map[string]string{
122+
"prefix/foo": "device",
131123
},
132124
},
133-
expectedDevices: []string{"nvidia.com/gpu=0", "nvidia.com/gpu=1"},
125+
expectedDevices: []string{"nvidia.com/gpu=device"},
134126
},
135127
{
136-
description: "cdi devices from envvar",
128+
description: "invalid devices in annotations are treated as strings",
137129
input: cdiDeviceRequestor{
138130
defaultKind: "nvidia.com/gpu",
139131
},
132+
prefixes: []string{"prefix/"},
140133
spec: &specs.Spec{
141-
Process: &specs.Process{
142-
Env: []string{"NVIDIA_VISIBLE_DEVICES=0,example.com/class=device"},
134+
Annotations: map[string]string{
135+
"prefix/foo": "example.com/device",
143136
},
144137
},
145-
expectedDevices: []string{"nvidia.com/gpu=0", "example.com/class=device"},
138+
expectedDevices: []string{"nvidia.com/gpu=example.com/device"},
146139
},
147140
}
148141

@@ -153,6 +146,7 @@ func TestDeviceRequests(t *testing.T) {
153146
tc.spec,
154147
image.WithAcceptDeviceListAsVolumeMounts(true),
155148
image.WithAcceptEnvvarUnprivileged(true),
149+
image.WithAnnotationsPrefixes(tc.prefixes),
156150
)
157151
require.NoError(t, err)
158152
tc.input.image = image

0 commit comments

Comments
 (0)