Skip to content

Commit 92df542

Browse files
committed
[no-relnote] Use image.CUDA to extract visible devices
Signed-off-by: Evan Lezar <[email protected]>
1 parent 1991b3e commit 92df542

File tree

11 files changed

+313
-337
lines changed

11 files changed

+313
-337
lines changed

cmd/nvidia-container-runtime-hook/container_config.go

Lines changed: 23 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -6,34 +6,17 @@ import (
66
"log"
77
"os"
88
"path"
9-
"path/filepath"
109

1110
"github.com/opencontainers/runtime-spec/specs-go"
1211
"golang.org/x/mod/semver"
1312

1413
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
1514
)
1615

17-
const (
18-
envCUDAVersion = "CUDA_VERSION"
19-
envNVRequirePrefix = "NVIDIA_REQUIRE_"
20-
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
21-
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
22-
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
23-
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
24-
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
25-
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
26-
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
27-
)
28-
2916
const (
3017
capSysAdmin = "CAP_SYS_ADMIN"
3118
)
3219

33-
const (
34-
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
35-
)
36-
3720
type nvidiaConfig struct {
3821
Devices []string
3922
MigConfigDevices string
@@ -76,23 +59,14 @@ type LinuxCapabilities struct {
7659
Ambient []string `json:"ambient,omitempty" platform:"linux"`
7760
}
7861

79-
// Mount from OCI runtime spec
80-
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
81-
type Mount struct {
82-
Destination string `json:"destination"`
83-
Type string `json:"type,omitempty" platform:"linux,solaris"`
84-
Source string `json:"source,omitempty"`
85-
Options []string `json:"options,omitempty"`
86-
}
87-
8862
// Spec from OCI runtime spec
8963
// We use pointers to structs, similarly to the latest version of runtime-spec:
9064
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
9165
type Spec struct {
92-
Version *string `json:"ociVersion"`
93-
Process *Process `json:"process,omitempty"`
94-
Root *Root `json:"root,omitempty"`
95-
Mounts []Mount `json:"mounts,omitempty"`
66+
Version *string `json:"ociVersion"`
67+
Process *Process `json:"process,omitempty"`
68+
Root *Root `json:"root,omitempty"`
69+
Mounts []specs.Mount `json:"mounts,omitempty"`
9670
}
9771

9872
// HookState holds state information about the hook
@@ -171,58 +145,22 @@ func isPrivileged(s *Spec) bool {
171145
return image.IsPrivileged(&fullSpec)
172146
}
173147

174-
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) []string {
148+
func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
175149
// We check if the image has at least one of the Swarm resource envvars defined and use this
176150
// if specified.
177151
for _, envvar := range swarmResourceEnvvars {
178-
if image.HasEnvvar(envvar) {
179-
return image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
152+
if containerImage.HasEnvvar(envvar) {
153+
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
180154
}
181155
}
182156

183-
return image.DevicesFromEnvvars(envNVVisibleDevices).List()
184-
}
185-
186-
func getDevicesFromMounts(mounts []Mount) []string {
187-
var devices []string
188-
for _, m := range mounts {
189-
root := filepath.Clean(deviceListAsVolumeMountsRoot)
190-
source := filepath.Clean(m.Source)
191-
destination := filepath.Clean(m.Destination)
192-
193-
// Only consider mounts who's host volume is /dev/null
194-
if source != "/dev/null" {
195-
continue
196-
}
197-
// Only consider container mount points that begin with 'root'
198-
if len(destination) < len(root) {
199-
continue
200-
}
201-
if destination[:len(root)] != root {
202-
continue
203-
}
204-
// Grab the full path beyond 'root' and add it to the list of devices
205-
device := destination[len(root):]
206-
if len(device) > 0 && device[0] == '/' {
207-
device = device[1:]
208-
}
209-
if len(device) == 0 {
210-
continue
211-
}
212-
devices = append(devices, device)
213-
}
214-
215-
if devices == nil {
216-
return nil
217-
}
218-
219-
return devices
157+
return containerImage.VisibleDevicesFromEnvVar()
220158
}
221159

222-
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) []string {
160+
func getDevices(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
223161
// If enabled, try and get the device list from volume mounts first
224162
if hookConfig.AcceptDeviceListAsVolumeMounts {
225-
devices := getDevicesFromMounts(mounts)
163+
devices := image.VisibleDevicesFromMounts()
226164
if len(devices) > 0 {
227165
return devices
228166
}
@@ -243,12 +181,12 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
243181
return nil
244182
}
245183

246-
func getMigConfigDevices(image image.CUDA) *string {
247-
return getMigDevices(image, envNVMigConfigDevices)
184+
func getMigConfigDevices(i image.CUDA) *string {
185+
return getMigDevices(i, image.EnvVarNvidiaMigConfigDevices)
248186
}
249187

250-
func getMigMonitorDevices(image image.CUDA) *string {
251-
return getMigDevices(image, envNVMigMonitorDevices)
188+
func getMigMonitorDevices(i image.CUDA) *string {
189+
return getMigDevices(i, image.EnvVarNvidiaMigMonitorDevices)
252190
}
253191

254192
func getMigDevices(image image.CUDA, envvar string) *string {
@@ -259,11 +197,11 @@ func getMigDevices(image image.CUDA, envvar string) *string {
259197
return &devices
260198
}
261199

262-
func getImexChannels(image image.CUDA) *string {
263-
if !image.HasEnvvar(envNVImexChannels) {
200+
func getImexChannels(i image.CUDA) *string {
201+
if !i.HasEnvvar(image.EnvVarNvidiaImexChannels) {
264202
return nil
265203
}
266-
chans := image.Getenv(envNVImexChannels)
204+
chans := i.Getenv(image.EnvVarNvidiaImexChannels)
267205
return &chans
268206
}
269207

@@ -274,8 +212,8 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
274212

275213
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
276214

277-
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
278-
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
215+
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
216+
capsEnv := cudaImage.Getenv(image.EnvVarNvidiaDriverCapabilities)
279217

280218
if !capsEnvSpecified && legacyImage {
281219
// Environment variable unset with legacy image: set all capabilities.
@@ -294,10 +232,10 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
294232
return capabilities
295233
}
296234

297-
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
235+
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool) *nvidiaConfig {
298236
legacyImage := image.IsLegacy()
299237

300-
devices := getDevices(hookConfig, image, mounts, privileged)
238+
devices := getDevices(hookConfig, image, privileged)
301239
if len(devices) == 0 {
302240
// empty devices means this is not a GPU container.
303241
return nil
@@ -357,6 +295,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
357295

358296
image, err := image.New(
359297
image.WithEnv(s.Process.Env),
298+
image.WithMounts(s.Mounts),
360299
image.WithDisableRequire(hook.DisableRequire),
361300
)
362301
if err != nil {
@@ -368,6 +307,6 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
368307
Pid: h.Pid,
369308
Rootfs: s.Root.Path,
370309
Image: image,
371-
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
310+
Nvidia: getNvidiaConfig(&hook, image, privileged),
372311
}
373312
}

0 commit comments

Comments
 (0)