Skip to content

Commit d4664fe

Browse files
committed
[no-relnote] Use image.CUDA to extract visible devices
Signed-off-by: Evan Lezar <[email protected]>
1 parent cd63a9d commit d4664fe

File tree

4 files changed

+237
-270
lines changed

4 files changed

+237
-270
lines changed

cmd/nvidia-container-runtime-hook/container_config.go

Lines changed: 19 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"log"
77
"os"
88
"path"
9-
"path/filepath"
109

1110
"github.com/opencontainers/runtime-spec/specs-go"
1211
"golang.org/x/mod/semver"
@@ -15,11 +14,11 @@ import (
1514
)
1615

1716
const (
18-
envCUDAVersion = "CUDA_VERSION"
19-
envNVRequirePrefix = "NVIDIA_REQUIRE_"
20-
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
21-
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
22-
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
17+
envCUDAVersion = "CUDA_VERSION"
18+
envNVRequirePrefix = "NVIDIA_REQUIRE_"
19+
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
20+
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
21+
2322
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
2423
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
2524
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
@@ -30,10 +29,6 @@ const (
3029
capSysAdmin = "CAP_SYS_ADMIN"
3130
)
3231

33-
const (
34-
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
35-
)
36-
3732
type nvidiaConfig struct {
3833
Devices []string
3934
MigConfigDevices string
@@ -76,23 +71,14 @@ type LinuxCapabilities struct {
7671
Ambient []string `json:"ambient,omitempty" platform:"linux"`
7772
}
7873

79-
// Mount from OCI runtime spec
80-
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
81-
type Mount struct {
82-
Destination string `json:"destination"`
83-
Type string `json:"type,omitempty" platform:"linux,solaris"`
84-
Source string `json:"source,omitempty"`
85-
Options []string `json:"options,omitempty"`
86-
}
87-
8874
// Spec from OCI runtime spec
8975
// We use pointers to structs, similarly to the latest version of runtime-spec:
9076
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
9177
type Spec struct {
92-
Version *string `json:"ociVersion"`
93-
Process *Process `json:"process,omitempty"`
94-
Root *Root `json:"root,omitempty"`
95-
Mounts []Mount `json:"mounts,omitempty"`
78+
Version *string `json:"ociVersion"`
79+
Process *Process `json:"process,omitempty"`
80+
Root *Root `json:"root,omitempty"`
81+
Mounts []specs.Mount `json:"mounts,omitempty"`
9682
}
9783

9884
// HookState holds state information about the hook
@@ -171,58 +157,22 @@ func isPrivileged(s *Spec) bool {
171157
return image.IsPrivileged(&fullSpec)
172158
}
173159

174-
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) []string {
160+
func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
175161
// We check if the image has at least one of the Swarm resource envvars defined and use this
176162
// if specified.
177163
for _, envvar := range swarmResourceEnvvars {
178-
if image.HasEnvvar(envvar) {
179-
return image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
164+
if containerImage.HasEnvvar(envvar) {
165+
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
180166
}
181167
}
182168

183-
return image.DevicesFromEnvvars(envNVVisibleDevices).List()
184-
}
185-
186-
func getDevicesFromMounts(mounts []Mount) []string {
187-
var devices []string
188-
for _, m := range mounts {
189-
root := filepath.Clean(deviceListAsVolumeMountsRoot)
190-
source := filepath.Clean(m.Source)
191-
destination := filepath.Clean(m.Destination)
192-
193-
// Only consider mounts who's host volume is /dev/null
194-
if source != "/dev/null" {
195-
continue
196-
}
197-
// Only consider container mount points that begin with 'root'
198-
if len(destination) < len(root) {
199-
continue
200-
}
201-
if destination[:len(root)] != root {
202-
continue
203-
}
204-
// Grab the full path beyond 'root' and add it to the list of devices
205-
device := destination[len(root):]
206-
if len(device) > 0 && device[0] == '/' {
207-
device = device[1:]
208-
}
209-
if len(device) == 0 {
210-
continue
211-
}
212-
devices = append(devices, device)
213-
}
214-
215-
if devices == nil {
216-
return nil
217-
}
218-
219-
return devices
169+
return containerImage.DevicesFromEnvvars(image.NVIDIAVisibleDevicesEnvVar).List()
220170
}
221171

222-
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) []string {
172+
func getDevices(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
223173
// If enabled, try and get the device list from volume mounts first
224174
if hookConfig.AcceptDeviceListAsVolumeMounts {
225-
devices := getDevicesFromMounts(mounts)
175+
devices := image.VisibleDevicesFromMounts()
226176
if len(devices) > 0 {
227177
return devices
228178
}
@@ -294,10 +244,10 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
294244
return capabilities
295245
}
296246

297-
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
247+
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool) *nvidiaConfig {
298248
legacyImage := image.IsLegacy()
299249

300-
devices := getDevices(hookConfig, image, mounts, privileged)
250+
devices := getDevices(hookConfig, image, privileged)
301251
if len(devices) == 0 {
302252
// empty devices means this is not a GPU container.
303253
return nil
@@ -357,6 +307,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
357307

358308
image, err := image.New(
359309
image.WithEnv(s.Process.Env),
310+
image.WithMounts(s.Mounts),
360311
image.WithDisableRequire(hook.DisableRequire),
361312
)
362313
if err != nil {
@@ -368,6 +319,6 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
368319
Pid: h.Pid,
369320
Rootfs: s.Root.Path,
370321
Image: image,
371-
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
322+
Nvidia: getNvidiaConfig(&hook, image, privileged),
372323
}
373324
}

0 commit comments

Comments
 (0)