Skip to content

Commit 1b5edd6

Browse files
Add remote-test option for E2E
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent a990860 commit 1b5edd6

File tree

111 files changed

+17884
-43
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+17884
-43
lines changed

tests/e2e/Makefile

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,27 @@ include $(CURDIR)/versions.mk
1818

1919
E2E_RUNTIME ?= docker
2020

21+
E2E_INSTALL_CTK ?= true
22+
23+
E2E_IMAGE_NAME ?= ghcr.io/nvidia/container-toolkit
24+
E2E_IMAGE_TAG ?=
25+
ifeq ($(E2E_IMAGE_TAG),)
26+
$(error E2E_IMAGE_TAG is not set)
27+
endif
28+
29+
E2E_SSH_KEY ?=
30+
E2E_SSH_USER ?=
31+
E2E_SSH_HOST ?=
32+
2133
.PHONY: test
2234
test:
2335
cd $(CURDIR)/tests/e2e && $(GO_CMD) test -v . -args \
2436
-ginkgo.focus="$(E2E_RUNTIME)" \
2537
-test.timeout=1h \
26-
-ginkgo.v
38+
-ginkgo.v \
39+
-install-ctk=$(E2E_INSTALL_CTK) \
40+
-image-repo=$(E2E_IMAGE_NAME) \
41+
-image-tag=$(E2E_IMAGE_TAG) \
42+
-ssh-key=$(E2E_SSH_KEY) \
43+
-ssh-user=$(E2E_SSH_USER) \
44+
-remote-host=$(E2E_SSH_HOST)

tests/e2e/e2e_test.go

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,8 @@
1717
package e2e
1818

1919
import (
20-
"bytes"
2120
"context"
22-
"fmt"
23-
"os/exec"
21+
"flag"
2422
"testing"
2523

2624
. "github.com/onsi/ginkgo/v2"
@@ -30,8 +28,26 @@ import (
3028
// Test context
3129
var (
3230
ctx context.Context
31+
32+
installCTK bool
33+
34+
imageRepo string
35+
imageTag string
36+
37+
sshKey string
38+
sshUser string
39+
hostURL string
3340
)
3441

42+
func init() {
43+
flag.BoolVar(&installCTK, "install-ctk", false, "Install the NVIDIA Container Toolkit")
44+
flag.StringVar(&imageRepo, "image-repo", "", "Repository of the image to test")
45+
flag.StringVar(&imageTag, "image-tag", "", "Tag of the image to test")
46+
flag.StringVar(&sshKey, "ssh-key", "", "SSH key to use for remote login")
47+
flag.StringVar(&sshUser, "ssh-user", "", "SSH user to use for remote login")
48+
flag.StringVar(&hostURL, "remote-host", "", "Hostname of the remote machine")
49+
}
50+
3551
func TestMain(t *testing.T) {
3652
suiteName := "NVIDIA Container Toolkit E2E"
3753

@@ -45,25 +61,3 @@ func TestMain(t *testing.T) {
4561
var _ = BeforeSuite(func() {
4662
ctx = context.Background()
4763
})
48-
49-
func runScript(script string) (string, error) {
50-
// Create a command to run the script using bash
51-
cmd := exec.Command("bash", "-c", script)
52-
53-
// Buffer to capture standard output
54-
var stdout bytes.Buffer
55-
cmd.Stdout = &stdout
56-
57-
// Buffer to capture standard error
58-
var stderr bytes.Buffer
59-
cmd.Stderr = &stderr
60-
61-
// Run the command
62-
err := cmd.Run()
63-
if err != nil {
64-
return "", fmt.Errorf("script execution failed: %v\nSTDOUT: %s\nSTDERR: %s", err, stdout.String(), stderr.String())
65-
}
66-
67-
// Return the captured stdout and nil error
68-
return stdout.String(), nil
69-
}

tests/e2e/installer.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"bytes"
21+
"fmt"
22+
"text/template"
23+
)
24+
25+
// dockerInstallTemplate is a template for installing the NVIDIA Container Toolkit
26+
// on a host using Docker.
27+
var dockerInstallTemplate = `
28+
#! /usr/bin/env bash
29+
set -xe
30+
31+
: ${IMAGE:={{.Image}}}
32+
33+
# Create a temporary directory
34+
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
35+
mkdir -p "$TEMP_DIR"
36+
37+
# Given that docker has an init function that checks for the existence of the
38+
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
39+
# in the /usr/bin directory.
40+
# See https://github.com/moby/moby/blob/20a05dabf44934447d1a66cdd616cc803b81d4e2/daemon/nvidia_linux.go#L32-L46
41+
sudo rm -f /usr/bin/nvidia-container-runtime-hook
42+
sudo ln -s "$TEMP_DIR/toolkit/nvidia-container-runtime-hook" /usr/bin/nvidia-container-runtime-hook
43+
44+
docker run --pid=host --rm -i --privileged \
45+
-v /:/host \
46+
-v /var/run/docker.sock:/var/run/docker.sock \
47+
-v "$TEMP_DIR:$TEMP_DIR" \
48+
-v /etc/docker:/config-root \
49+
${IMAGE} \
50+
--root "$TEMP_DIR" \
51+
--runtime=docker \
52+
--config=/config-root/daemon.json \
53+
--driver-root=/ \
54+
--no-daemon \
55+
--restart-mode=systemd
56+
`
57+
58+
type ToolkitInstaller struct {
59+
Runner
60+
Image string
61+
Template string
62+
}
63+
64+
type installerOption func(*ToolkitInstaller)
65+
66+
func WithRunner(r Runner) installerOption {
67+
return func(i *ToolkitInstaller) {
68+
i.Runner = r
69+
}
70+
}
71+
72+
func WithImage(image string) installerOption {
73+
return func(i *ToolkitInstaller) {
74+
i.Image = image
75+
}
76+
}
77+
78+
func WithTemplate(template string) installerOption {
79+
return func(i *ToolkitInstaller) {
80+
i.Template = template
81+
}
82+
}
83+
84+
func NewToolkitInstaller(opts ...installerOption) (*ToolkitInstaller, error) {
85+
i := &ToolkitInstaller{
86+
Template: dockerInstallTemplate,
87+
}
88+
89+
for _, opt := range opts {
90+
opt(i)
91+
}
92+
93+
return i, nil
94+
}
95+
96+
func (i *ToolkitInstaller) Install() error {
97+
// Parse the combined template
98+
tmpl, err := template.New("installScript").Parse(i.Template)
99+
if err != nil {
100+
return fmt.Errorf("error parsing template: %w", err)
101+
}
102+
103+
// Execute the template
104+
var renderedScript bytes.Buffer
105+
err = tmpl.Execute(&renderedScript, i)
106+
if err != nil {
107+
return fmt.Errorf("error executing template: %w", err)
108+
}
109+
110+
_, _, err = i.Runner.Run(renderedScript.String())
111+
return err
112+
}

tests/e2e/nvidia-container-toolkit_test.go

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,27 @@ import (
2424
)
2525

2626
// Integration tests for Docker runtime
27-
var _ = Describe("docker", func() {
27+
var _ = Describe("docker", Ordered, func() {
28+
var r Runner
29+
// Install the NVIDIA Container Toolkit
30+
BeforeAll(func(ctx context.Context) {
31+
r = NewRunner(
32+
WithIp(hostURL),
33+
WithSshKey(sshKey),
34+
WithSshUser(sshUser),
35+
)
36+
if installCTK {
37+
installer, err := NewToolkitInstaller(
38+
WithRunner(r),
39+
WithImage(imageRepo+":"+imageTag),
40+
WithTemplate(dockerInstallTemplate),
41+
)
42+
Expect(err).ToNot(HaveOccurred())
43+
err = installer.Install()
44+
Expect(err).ToNot(HaveOccurred())
45+
}
46+
})
47+
2848
// GPUs are accessible in a container: Running nvidia-smi -L inside the
2949
// container shows the same output inside the container as outside the
3050
// container. This means that the following commands must all produce
@@ -33,33 +53,33 @@ var _ = Describe("docker", func() {
3353
var hostOutput string
3454

3555
BeforeAll(func(ctx context.Context) {
36-
_, err := runScript("docker pull ubuntu")
56+
_, _, err := r.Run("docker pull ubuntu")
3757
Expect(err).ToNot(HaveOccurred())
3858

39-
hostOutput, err = runScript("nvidia-smi -L")
59+
hostOutput, _, err = r.Run("nvidia-smi -L")
4060
Expect(err).ToNot(HaveOccurred())
4161
})
4262

4363
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
44-
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
64+
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
4565
Expect(err).ToNot(HaveOccurred())
4666
Expect(containerOutput).To(Equal(hostOutput))
4767
})
4868

4969
It("should support automatic CDI spec generation", func(ctx context.Context) {
50-
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
70+
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
5171
Expect(err).ToNot(HaveOccurred())
5272
Expect(containerOutput).To(Equal(hostOutput))
5373
})
5474

5575
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
56-
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
76+
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
5777
Expect(err).ToNot(HaveOccurred())
5878
Expect(containerOutput).To(Equal(hostOutput))
5979
})
6080

6181
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
62-
containerOutput, err := runScript("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
82+
containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
6383
Expect(err).ToNot(HaveOccurred())
6484
Expect(containerOutput).To(Equal(hostOutput))
6585
})
@@ -69,34 +89,34 @@ var _ = Describe("docker", func() {
6989
// The following should all produce the same result.
7090
When("Running the cuda-vectorAdd sample", Ordered, func() {
7191
BeforeAll(func(ctx context.Context) {
72-
_, err := runScript("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
92+
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
7393
Expect(err).ToNot(HaveOccurred())
7494
})
7595

7696
var referenceOutput string
7797

7898
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
7999
var err error
80-
referenceOutput, err = runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
100+
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
81101
Expect(err).ToNot(HaveOccurred())
82102

83103
Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
84104
})
85105

86106
It("should support automatic CDI spec generation", func(ctx context.Context) {
87-
out2, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
107+
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
88108
Expect(err).ToNot(HaveOccurred())
89109
Expect(referenceOutput).To(Equal(out2))
90110
})
91111

92112
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
93-
out3, err := runScript("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
113+
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
94114
Expect(err).ToNot(HaveOccurred())
95115
Expect(referenceOutput).To(Equal(out3))
96116
})
97117

98118
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
99-
out4, err := runScript("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
119+
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
100120
Expect(err).ToNot(HaveOccurred())
101121
Expect(referenceOutput).To(Equal(out4))
102122
})
@@ -106,34 +126,34 @@ var _ = Describe("docker", func() {
106126
// The following should all produce the same result.
107127
When("Running the cuda-deviceQuery sample", Ordered, func() {
108128
BeforeAll(func(ctx context.Context) {
109-
_, err := runScript("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
129+
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
110130
Expect(err).ToNot(HaveOccurred())
111131
})
112132

113133
var referenceOutput string
114134

115135
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
116136
var err error
117-
referenceOutput, err = runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
137+
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
118138
Expect(err).ToNot(HaveOccurred())
119139

120140
Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
121141
})
122142

123143
It("should support automatic CDI spec generation", func(ctx context.Context) {
124-
out2, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
144+
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
125145
Expect(err).ToNot(HaveOccurred())
126146
Expect(referenceOutput).To(Equal(out2))
127147
})
128148

129149
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
130-
out3, err := runScript("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
150+
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
131151
Expect(err).ToNot(HaveOccurred())
132152
Expect(referenceOutput).To(Equal(out3))
133153
})
134154

135155
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
136-
out4, err := runScript("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
156+
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
137157
Expect(err).ToNot(HaveOccurred())
138158
Expect(referenceOutput).To(Equal(out4))
139159
})

0 commit comments

Comments
 (0)