Skip to content

Commit 2e6712d

Browse files
committed
Allow IMEX channels to be requested as volume mounts
This change allows IMEX channels to be requested using the volume mount mechanism. A mount from /dev/null to /var/run/nvidia-container-devices/imex/{{ .ChannelID }} is equivalent to including {{ .ChannelID }} in the NVIDIA_IMEX_CHANNELS envvironment variables. Signed-off-by: Evan Lezar <[email protected]>
1 parent 92df542 commit 2e6712d

File tree

4 files changed

+46
-15
lines changed

4 files changed

+46
-15
lines changed

cmd/nvidia-container-runtime-hook/container_config.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ type nvidiaConfig struct {
2121
Devices []string
2222
MigConfigDevices string
2323
MigMonitorDevices string
24-
ImexChannels string
24+
ImexChannels []string
2525
DriverCapabilities string
2626
// Requirements defines the requirements DSL for the container to run.
2727
// This is empty if no specific requirements are needed, or if requirements are
@@ -197,12 +197,24 @@ func getMigDevices(image image.CUDA, envvar string) *string {
197197
return &devices
198198
}
199199

200-
func getImexChannels(i image.CUDA) *string {
201-
if !i.HasEnvvar(image.EnvVarNvidiaImexChannels) {
200+
func getImexChannels(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
201+
// If enabled, try and get the device list from volume mounts first
202+
if hookConfig.AcceptDeviceListAsVolumeMounts {
203+
devices := image.ImexChannelsFromMounts()
204+
if len(devices) > 0 {
205+
return devices
206+
}
207+
}
208+
devices := image.ImexChannelsFromEnvVar()
209+
if len(devices) == 0 {
202210
return nil
203211
}
204-
chans := i.Getenv(image.EnvVarNvidiaImexChannels)
205-
return &chans
212+
213+
if privileged || hookConfig.AcceptEnvvarUnprivileged {
214+
return devices
215+
}
216+
217+
return nil
206218
}
207219

208220
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
@@ -257,10 +269,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool)
257269
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
258270
}
259271

260-
var imexChannels string
261-
if c := getImexChannels(image); c != nil {
262-
imexChannels = *c
263-
}
272+
imexChannels := getImexChannels(hookConfig, image, privileged)
264273

265274
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
266275

cmd/nvidia-container-runtime-hook/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ func doPrestart() {
129129
if len(nvidia.MigMonitorDevices) > 0 {
130130
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
131131
}
132-
if len(nvidia.ImexChannels) > 0 {
133-
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
132+
if imexString := strings.Join(nvidia.ImexChannels, ","); len(imexString) > 0 {
133+
args = append(args, fmt.Sprintf("--imex-channel=%s", imexString))
134134
}
135135

136136
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {

internal/config/image/cuda_image.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ import (
3030
const (
3131
DeviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
3232

33-
volumeMountDevicePrefixCDI = "cdi/"
33+
volumeMountDevicePrefixCDI = "cdi/"
34+
volumeMountDevicePrefixImex = "imex/"
3435
)
3536

3637
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
@@ -225,7 +226,10 @@ func (i CUDA) VisibleDevicesFromEnvVar() []string {
225226
func (i CUDA) VisibleDevicesFromMounts() []string {
226227
var devices []string
227228
for _, device := range i.DevicesFromMounts() {
228-
if strings.HasPrefix(device, volumeMountDevicePrefixCDI) {
229+
switch {
230+
case strings.HasPrefix(device, volumeMountDevicePrefixCDI):
231+
continue
232+
case strings.HasPrefix(device, volumeMountDevicePrefixImex):
229233
continue
230234
}
231235
devices = append(devices, device)
@@ -286,6 +290,19 @@ func (i CUDA) CDIDevicesFromMounts() []string {
286290
return devices
287291
}
288292

289-
func (i CUDA) IsEnabled(envvar string) bool {
290-
return i.Getenv(envvar) == "enabled"
293+
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image.
294+
func (i CUDA) ImexChannelsFromEnvVar() []string {
295+
return i.DevicesFromEnvvars(EnvVarNvidiaImexChannels).List()
296+
}
297+
298+
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image.
299+
func (i CUDA) ImexChannelsFromMounts() []string {
300+
var channels []string
301+
for _, mountDevice := range i.DevicesFromMounts() {
302+
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixImex) {
303+
continue
304+
}
305+
channels = append(channels, strings.TrimPrefix(mountDevice, volumeMountDevicePrefixImex))
306+
}
307+
return channels
291308
}

internal/config/image/cuda_image_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,11 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
189189
mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"),
190190
expectedDevices: []string{"GPU0", "GPU1"},
191191
},
192+
{
193+
description: "imex devices are ignored",
194+
mounts: makeTestMounts("GPU0", "imex/0", "GPU1"),
195+
expectedDevices: []string{"GPU0", "GPU1"},
196+
},
192197
}
193198
for _, tc := range tests {
194199
t.Run(tc.description, func(t *testing.T) {

0 commit comments

Comments
 (0)