Skip to content

Commit 47ebbe5

Browse files
committed
Allow IMEX channels to be requested as volume mounts
This change allows IMEX channels to be requested using the volume mount mechanism. A mount from /dev/null to /var/run/nvidia-container-devices/imex/{{ .ChannelID }} is equivalent to including {{ .ChannelID }} in the NVIDIA_IMEX_CHANNELS envvironment variables. Signed-off-by: Evan Lezar <[email protected]>
1 parent 7bffae9 commit 47ebbe5

File tree

4 files changed

+50
-13
lines changed

4 files changed

+50
-13
lines changed

cmd/nvidia-container-runtime-hook/container_config.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ type nvidiaConfig struct {
3333
Devices []string
3434
MigConfigDevices string
3535
MigMonitorDevices string
36-
ImexChannels string
36+
ImexChannels []string
3737
DriverCapabilities string
3838
// Requirements defines the requirements DSL for the container to run.
3939
// This is empty if no specific requirements are needed, or if requirements are
@@ -209,12 +209,24 @@ func getMigDevices(image image.CUDA, envvar string) *string {
209209
return &devices
210210
}
211211

212-
func getImexChannels(image image.CUDA) *string {
213-
if !image.HasEnvvar(envNVImexChannels) {
212+
func getImexChannels(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
213+
// If enabled, try and get the device list from volume mounts first
214+
if hookConfig.AcceptDeviceListAsVolumeMounts {
215+
devices := image.ImexChannelsFromMounts()
216+
if len(devices) > 0 {
217+
return devices
218+
}
219+
}
220+
devices := image.ImexChannelsFromEnvVar()
221+
if len(devices) == 0 {
214222
return nil
215223
}
216-
chans := image.Getenv(envNVImexChannels)
217-
return &chans
224+
225+
if privileged || hookConfig.AcceptEnvvarUnprivileged {
226+
return devices
227+
}
228+
229+
return nil
218230
}
219231

220232
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
@@ -269,10 +281,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool)
269281
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
270282
}
271283

272-
var imexChannels string
273-
if c := getImexChannels(image); c != nil {
274-
imexChannels = *c
275-
}
284+
imexChannels := getImexChannels(hookConfig, image, privileged)
276285

277286
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
278287

cmd/nvidia-container-runtime-hook/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ func doPrestart() {
126126
if len(nvidia.MigMonitorDevices) > 0 {
127127
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
128128
}
129-
if len(nvidia.ImexChannels) > 0 {
130-
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
129+
if imexString := strings.Join(nvidia.ImexChannels, ","); len(imexString) > 0 {
130+
args = append(args, fmt.Sprintf("--imex-channel=%s", imexString))
131131
}
132132

133133
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {

internal/config/image/cuda_image.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
)
2929

3030
const (
31+
NVIDIAImexChannelsEnvVar = "NVIDIA_IMEX_CHANNELS"
3132
NVIDIAVisibleDevicesEnvVar = "NVIDIA_VISIBLE_DEVICES"
3233
DeviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
3334

@@ -38,7 +39,8 @@ const (
3839
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
3940
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
4041

41-
volumeMountDevicePrefixCDI = "cdi/"
42+
volumeMountDevicePrefixCDI = "cdi/"
43+
volumeMountDevicePrefixImex = "imex/"
4244
)
4345

4446
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
@@ -227,7 +229,10 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
227229
func (i CUDA) VisibleDevicesFromMounts() []string {
228230
var devices []string
229231
for _, device := range i.DevicesFromMounts() {
230-
if strings.HasPrefix(device, volumeMountDevicePrefixCDI) {
232+
switch {
233+
case strings.HasPrefix(device, volumeMountDevicePrefixCDI):
234+
continue
235+
case strings.HasPrefix(device, volumeMountDevicePrefixImex):
231236
continue
232237
}
233238
devices = append(devices, device)
@@ -287,3 +292,21 @@ func (i CUDA) CDIDevicesFromMounts() []string {
287292
}
288293
return devices
289294
}
295+
296+
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image.
297+
func (i CUDA) ImexChannelsFromEnvVar() []string {
298+
return i.DevicesFromEnvvars(NVIDIAImexChannelsEnvVar).List()
299+
300+
}
301+
302+
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image.
303+
func (i CUDA) ImexChannelsFromMounts() []string {
304+
var channels []string
305+
for _, mountDevice := range i.DevicesFromMounts() {
306+
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixImex) {
307+
continue
308+
}
309+
channels = append(channels, strings.TrimPrefix(mountDevice, volumeMountDevicePrefixImex))
310+
}
311+
return channels
312+
}

internal/config/image/cuda_image_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,11 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
189189
mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"),
190190
expectedDevices: []string{"GPU0", "GPU1"},
191191
},
192+
{
193+
description: "imex devices are ignored",
194+
mounts: makeTestMounts("GPU0", "imex/0", "GPU1"),
195+
expectedDevices: []string{"GPU0", "GPU1"},
196+
},
192197
}
193198
for _, tc := range tests {
194199
t.Run(tc.description, func(t *testing.T) {

0 commit comments

Comments
 (0)