Skip to content

Commit a02f7f8

Browse files
author
Evan Lezar
committed
Merge branch 'CNT-1554/docker-swarm' into 'master'
Fix bug where docker swarm device selection is overriden by NVIDIA_VISIBLE_DEVICES See merge request nvidia/container-toolkit/container-toolkit!31
2 parents 825990b + 2a92d6a commit a02f7f8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+25512
-72
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
dist
22
*.swp
33
*.swo
4+
/coverage.out

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,9 @@ misspell:
6363
vet:
6464
go vet $(MODULE)/...
6565

66+
COVERAGE_FILE := coverage.out
6667
test:
67-
go test $(MODULE)/...
68+
go test -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
69+
70+
coverage: test
71+
go tool cover -func=$(COVERAGE_FILE)

go.sum

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
2020
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
2121
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
2222
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
23+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
2324
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
2425
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
2526
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

pkg/container_config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
216216
for _, envVar := range envVars {
217217
if devs, ok := env[envVar]; ok {
218218
devices = &devs
219+
break
219220
}
220221
}
221222

pkg/container_test.go

Lines changed: 201 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ package main
22

33
import (
44
"path/filepath"
5-
"reflect"
65
"testing"
6+
7+
"github.com/stretchr/testify/require"
78
)
89

910
func TestGetNvidiaConfig(t *testing.T) {
@@ -414,39 +415,28 @@ func TestGetNvidiaConfig(t *testing.T) {
414415

415416
// For any tests that are expected to panic, make sure they do.
416417
if tc.expectedPanic {
417-
mustPanic(t, getConfig)
418+
require.Panics(t, getConfig)
418419
return
419420
}
420421

421422
// For all other tests, just grab the config
422423
getConfig()
423424

424425
// And start comparing the test results to the expected results.
425-
if config == nil && tc.expectedConfig == nil {
426-
return
427-
}
428-
if config != nil && tc.expectedConfig != nil {
429-
if !reflect.DeepEqual(config.Devices, tc.expectedConfig.Devices) {
430-
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
431-
}
432-
if !reflect.DeepEqual(config.MigConfigDevices, tc.expectedConfig.MigConfigDevices) {
433-
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
434-
}
435-
if !reflect.DeepEqual(config.MigMonitorDevices, tc.expectedConfig.MigMonitorDevices) {
436-
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
437-
}
438-
if !reflect.DeepEqual(config.DriverCapabilities, tc.expectedConfig.DriverCapabilities) {
439-
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
440-
}
441-
if !elementsMatch(config.Requirements, tc.expectedConfig.Requirements) {
442-
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
443-
}
444-
if !reflect.DeepEqual(config.DisableRequire, tc.expectedConfig.DisableRequire) {
445-
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
446-
}
426+
if tc.expectedConfig == nil {
427+
require.Nil(t, config, tc.description)
447428
return
448429
}
449-
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
430+
431+
require.NotNil(t, config, tc.description)
432+
433+
require.Equal(t, tc.expectedConfig.Devices, config.Devices)
434+
require.Equal(t, tc.expectedConfig.MigConfigDevices, config.MigConfigDevices)
435+
require.Equal(t, tc.expectedConfig.MigMonitorDevices, config.MigMonitorDevices)
436+
require.Equal(t, tc.expectedConfig.DriverCapabilities, config.DriverCapabilities)
437+
438+
require.ElementsMatch(t, tc.expectedConfig.Requirements, config.Requirements)
439+
require.Equal(t, tc.expectedConfig.DisableRequire, config.DisableRequire)
450440
})
451441
}
452442
}
@@ -524,9 +514,7 @@ func TestGetDevicesFromMounts(t *testing.T) {
524514
for _, tc := range tests {
525515
t.Run(tc.description, func(t *testing.T) {
526516
devices := getDevicesFromMounts(tc.mounts)
527-
if !reflect.DeepEqual(devices, tc.expectedDevices) {
528-
t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices)
529-
}
517+
require.Equal(t, tc.expectedDevices, devices)
530518
})
531519
}
532520
}
@@ -639,36 +627,198 @@ func TestDeviceListSourcePriority(t *testing.T) {
639627

640628
// For all other tests, just grab the devices and check the results
641629
getDevices()
642-
if !reflect.DeepEqual(devices, tc.expectedDevices) {
643-
t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices)
644-
}
630+
631+
require.Equal(t, tc.expectedDevices, devices)
645632
})
646633
}
647634
}
648635

649-
func elementsMatch(slice0, slice1 []string) bool {
650-
map0 := make(map[string]int)
651-
map1 := make(map[string]int)
652-
653-
for _, e := range slice0 {
654-
map0[e]++
655-
}
636+
func TestGetDevicesFromEnvvar(t *testing.T) {
637+
all := "all"
638+
empty := ""
639+
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
640+
gpuID := "GPU-12345"
641+
anotherGPUID := "GPU-67890"
656642

657-
for _, e := range slice1 {
658-
map1[e]++
643+
var tests = []struct {
644+
description string
645+
envSwarmGPU *string
646+
env map[string]string
647+
legacyImage bool
648+
expectedDevices *string
649+
}{
650+
{
651+
description: "empty env returns nil for non-legacy image",
652+
},
653+
{
654+
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
655+
env: map[string]string{
656+
envNVVisibleDevices: "",
657+
},
658+
},
659+
{
660+
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
661+
env: map[string]string{
662+
envNVVisibleDevices: "void",
663+
},
664+
},
665+
{
666+
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
667+
env: map[string]string{
668+
envNVVisibleDevices: "none",
669+
},
670+
expectedDevices: &empty,
671+
},
672+
{
673+
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
674+
env: map[string]string{
675+
envNVVisibleDevices: gpuID,
676+
},
677+
expectedDevices: &gpuID,
678+
},
679+
{
680+
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
681+
env: map[string]string{
682+
envNVVisibleDevices: gpuID,
683+
},
684+
legacyImage: true,
685+
expectedDevices: &gpuID,
686+
},
687+
{
688+
description: "empty env returns all for legacy image",
689+
legacyImage: true,
690+
expectedDevices: &all,
691+
},
692+
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
693+
// not enabled
694+
{
695+
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
696+
env: map[string]string{
697+
envDockerResourceGPUs: anotherGPUID,
698+
},
699+
},
700+
{
701+
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
702+
env: map[string]string{
703+
envNVVisibleDevices: "",
704+
envDockerResourceGPUs: anotherGPUID,
705+
},
706+
},
707+
{
708+
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
709+
env: map[string]string{
710+
envNVVisibleDevices: "void",
711+
envDockerResourceGPUs: anotherGPUID,
712+
},
713+
},
714+
{
715+
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
716+
env: map[string]string{
717+
envNVVisibleDevices: "none",
718+
envDockerResourceGPUs: anotherGPUID,
719+
},
720+
expectedDevices: &empty,
721+
},
722+
{
723+
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
724+
env: map[string]string{
725+
envNVVisibleDevices: gpuID,
726+
envDockerResourceGPUs: anotherGPUID,
727+
},
728+
expectedDevices: &gpuID,
729+
},
730+
{
731+
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
732+
env: map[string]string{
733+
envNVVisibleDevices: gpuID,
734+
envDockerResourceGPUs: anotherGPUID,
735+
},
736+
legacyImage: true,
737+
expectedDevices: &gpuID,
738+
},
739+
{
740+
description: "empty env returns all for legacy image",
741+
env: map[string]string{
742+
envDockerResourceGPUs: anotherGPUID,
743+
},
744+
legacyImage: true,
745+
expectedDevices: &all,
746+
},
747+
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
748+
// enabled
749+
{
750+
description: "empty env returns nil for non-legacy image",
751+
envSwarmGPU: &envDockerResourceGPUs,
752+
},
753+
{
754+
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
755+
envSwarmGPU: &envDockerResourceGPUs,
756+
env: map[string]string{
757+
envDockerResourceGPUs: "",
758+
},
759+
},
760+
{
761+
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
762+
envSwarmGPU: &envDockerResourceGPUs,
763+
env: map[string]string{
764+
envDockerResourceGPUs: "void",
765+
},
766+
},
767+
{
768+
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
769+
envSwarmGPU: &envDockerResourceGPUs,
770+
env: map[string]string{
771+
envDockerResourceGPUs: "none",
772+
},
773+
expectedDevices: &empty,
774+
},
775+
{
776+
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
777+
envSwarmGPU: &envDockerResourceGPUs,
778+
env: map[string]string{
779+
envDockerResourceGPUs: gpuID,
780+
},
781+
expectedDevices: &gpuID,
782+
},
783+
{
784+
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
785+
envSwarmGPU: &envDockerResourceGPUs,
786+
env: map[string]string{
787+
envDockerResourceGPUs: gpuID,
788+
},
789+
legacyImage: true,
790+
expectedDevices: &gpuID,
791+
},
792+
{
793+
description: "DOCKER_RESOURCE_GPUS is selected if present",
794+
envSwarmGPU: &envDockerResourceGPUs,
795+
env: map[string]string{
796+
envDockerResourceGPUs: anotherGPUID,
797+
},
798+
expectedDevices: &anotherGPUID,
799+
},
800+
{
801+
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
802+
envSwarmGPU: &envDockerResourceGPUs,
803+
env: map[string]string{
804+
envNVVisibleDevices: gpuID,
805+
envDockerResourceGPUs: anotherGPUID,
806+
},
807+
expectedDevices: &anotherGPUID,
808+
},
659809
}
660810

661-
for k0, v0 := range map0 {
662-
if map1[k0] != v0 {
663-
return false
664-
}
665-
}
811+
for i, tc := range tests {
812+
t.Run(tc.description, func(t *testing.T) {
813+
envSwarmGPU = tc.envSwarmGPU
814+
devices := getDevicesFromEnvvar(tc.env, tc.legacyImage)
815+
if tc.expectedDevices == nil {
816+
require.Nil(t, devices, "%d: %v", i, tc)
817+
return
818+
}
666819

667-
for k1, v1 := range map1 {
668-
if map0[k1] != v1 {
669-
return false
670-
}
820+
require.NotNil(t, devices, "%d: %v", i, tc)
821+
require.Equal(t, *tc.expectedDevices, *devices, "%d: %v", i, tc)
822+
})
671823
}
672-
673-
return true
674824
}

pkg/hook_test.go

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package main
33
import (
44
"encoding/json"
55
"testing"
6+
7+
"github.com/stretchr/testify/require"
68
)
79

810
func TestParseCudaVersionValid(t *testing.T) {
@@ -16,22 +18,13 @@ func TestParseCudaVersionValid(t *testing.T) {
1618
{"9.0.116", [3]uint32{9, 0, 116}},
1719
{"4294967295.4294967295.4294967295", [3]uint32{4294967295, 4294967295, 4294967295}},
1820
}
19-
for _, c := range tests {
21+
for i, c := range tests {
2022
vmaj, vmin, vpatch := parseCudaVersion(c.version)
21-
if vmaj != c.expected[0] || vmin != c.expected[1] || vpatch != c.expected[2] {
22-
t.Errorf("parseCudaVersion(%s): %d.%d.%d (expected: %v)", c.version, vmaj, vmin, vpatch, c.expected)
23-
}
24-
}
25-
}
2623

27-
func mustPanic(t *testing.T, f func()) {
28-
defer func() {
29-
if err := recover(); err == nil {
30-
t.Error("Test didn't panic!")
31-
}
32-
}()
24+
version := [3]uint32{vmaj, vmin, vpatch}
3325

34-
f()
26+
require.Equal(t, c.expected, version, "%d: %v", i, c)
27+
}
3528
}
3629

3730
func TestParseCudaVersionInvalid(t *testing.T) {
@@ -53,10 +46,9 @@ func TestParseCudaVersionInvalid(t *testing.T) {
5346
"-9.-1.-116",
5447
}
5548
for _, c := range tests {
56-
mustPanic(t, func() {
57-
t.Logf("parseCudaVersion(%s)", c)
49+
require.Panics(t, func() {
5850
parseCudaVersion(c)
59-
})
51+
}, "parseCudaVersion(%v)", c)
6052
}
6153
}
6254

@@ -132,12 +124,11 @@ func TestIsPrivileged(t *testing.T) {
132124
false,
133125
},
134126
}
135-
for _, tc := range tests {
127+
for i, tc := range tests {
136128
var spec Spec
137129
_ = json.Unmarshal([]byte(tc.spec), &spec)
138130
privileged := isPrivileged(&spec)
139-
if privileged != tc.expected {
140-
t.Errorf("isPrivileged() returned unexpectred value (privileged: %v, tc.expected: %v)", privileged, tc.expected)
141-
}
131+
132+
require.Equal(t, tc.expected, privileged, "%d: %v", i, tc)
142133
}
143134
}

0 commit comments

Comments
 (0)