Skip to content

Commit 718fe70

Browse files
committed
[no-relnote] Allow local nvidia-container-cli tests
Signed-off-by: Evan Lezar <[email protected]>
1 parent ae30adc commit 718fe70

File tree

1 file changed

+54
-22
lines changed

1 file changed

+54
-22
lines changed

tests/e2e/nvidia-container-cli_test.go

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -114,17 +114,21 @@ unshare --mount --pid --fork --propagation private -- sh -eux <<'\''IN_NS'\''
114114
IN_NS
115115
`
116116

117-
dockerRunCmdTemplate = `docker run -d --name node-container-e2e --privileged --runtime=nvidia \
117+
startTestContainerTemplate = `docker run -d --name {{.ContainerName}} --privileged --runtime=nvidia \
118118
-e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all \
119119
-e NVIDIA_DRIVER_CAPABILITIES=all \
120+
{{ range $i, $a := .AdditionalArguments -}}
121+
{{ $a }} \
122+
{{ end -}}
120123
ubuntu sleep infinity`
121124
)
122125

123126
var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libnvidia-container"), func() {
124127
var (
125-
runner Runner
126-
containerName = "node-container-e2e"
127-
hostOutput string
128+
runner Runner
129+
containerName = "node-container-e2e"
130+
hostOutput string
131+
additionalContainerArguments []string
128132
)
129133

130134
BeforeAll(func(ctx context.Context) {
@@ -145,6 +149,21 @@ var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libn
145149

146150
err = installer.Install()
147151
Expect(err).ToNot(HaveOccurred())
152+
} else {
153+
// If installCTK is false, we use the preinstalled toolkit.
154+
// TODO: This should be updated for other distributions and other components of the toolkit.
155+
output, _, err := runner.Run("ls /lib/**/libnvidia-container*.so.*.*")
156+
Expect(err).ToNot(HaveOccurred())
157+
158+
output = strings.TrimSpace(output)
159+
Expect(output).ToNot(BeEmpty())
160+
161+
for _, lib := range strings.Split(output, "\n") {
162+
additionalContainerArguments = append(additionalContainerArguments, "-v "+lib+":"+lib)
163+
}
164+
additionalContainerArguments = append(additionalContainerArguments,
165+
"-v /usr/bin/nvidia-container-cli:/usr/bin/nvidia-container-cli",
166+
)
148167
}
149168

150169
// Capture the host GPU list.
@@ -168,33 +187,46 @@ var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libn
168187

169188
It("should report the same GPUs inside the container as on the host", func(ctx context.Context) {
170189
// Launch the container in detached mode.
171-
_, _, err := runner.Run(dockerRunCmdTemplate)
190+
var startContainerScriptBuilder strings.Builder
191+
startContainerTemplate, err := template.New("startContainer").Parse(startTestContainerTemplate)
172192
Expect(err).ToNot(HaveOccurred())
173-
174-
// Install docker and nvidia-container-toolkit in the container.
175-
// Run as root and use bash for better compatibility
176-
_, _, err = runner.Run(fmt.Sprintf("docker exec -u root %s bash -c '%s'", containerName, installDockerTemplate))
193+
err = startContainerTemplate.Execute(&startContainerScriptBuilder, struct {
194+
ContainerName string
195+
AdditionalArguments []string
196+
}{
197+
ContainerName: containerName,
198+
AdditionalArguments: additionalContainerArguments,
199+
})
177200
Expect(err).ToNot(HaveOccurred())
178201

179-
// Build the docker run command (detached mode) from the template so it
180-
// stays readable while still resulting in a single-line invocation.
181-
tmpl, err := template.New("toolkitInstall").Parse(installCTKTemplate)
202+
startContainerScript := startContainerScriptBuilder.String()
203+
GinkgoLogr.Info("Starting test container", "script", startContainerScript)
204+
_, _, err = runner.Run(startContainerScript)
182205
Expect(err).ToNot(HaveOccurred())
183206

184-
var toolkitInstall strings.Builder
185-
err = tmpl.Execute(&toolkitInstall, struct {
186-
ToolkitImage string
187-
}{
188-
ToolkitImage: imageName + ":" + imageTag,
189-
})
207+
// Install docker in the container.
208+
_, _, err = runner.Run(fmt.Sprintf("docker exec -u root "+containerName+" bash -c '%s'", installDockerTemplate))
190209
Expect(err).ToNot(HaveOccurred())
191210

192-
_, _, err = runner.Run(fmt.Sprintf("docker exec -u root %s bash -c '%s'", containerName, toolkitInstall.String()))
193-
Expect(err).ToNot(HaveOccurred())
211+
if installCTK {
212+
// Install nvidia-container-cli in the container.
213+
tmpl, err := template.New("toolkitInstall").Parse(installCTKTemplate)
214+
Expect(err).ToNot(HaveOccurred())
215+
216+
var toolkitInstall strings.Builder
217+
err = tmpl.Execute(&toolkitInstall, struct {
218+
ToolkitImage string
219+
}{
220+
ToolkitImage: imageName + ":" + imageTag,
221+
})
222+
Expect(err).ToNot(HaveOccurred())
223+
224+
_, _, err = runner.Run(fmt.Sprintf("docker exec -u root "+containerName+" bash -c '%s'", toolkitInstall.String()))
225+
Expect(err).ToNot(HaveOccurred())
226+
}
194227

195228
// Run the test script in the container.
196-
// Capture but don't fail on errors - we'll check the results via container logs.
197-
output, _, err := runner.Run(fmt.Sprintf("docker exec -u root %s bash -c '%s'", containerName, libnvidiaContainerCliTestTemplate))
229+
output, _, err := runner.Run(fmt.Sprintf("docker exec -u root "+containerName+" bash -c '%s'", libnvidiaContainerCliTestTemplate))
198230
Expect(err).ToNot(HaveOccurred())
199231
Expect(strings.TrimSpace(output)).ToNot(BeEmpty())
200232
Expect(hostOutput).To(ContainSubstring(strings.TrimSpace(output)))

0 commit comments

Comments
 (0)