@@ -114,17 +114,21 @@ unshare --mount --pid --fork --propagation private -- sh -eux <<'\''IN_NS'\''
114114IN_NS
115115`
116116
117- dockerRunCmdTemplate = `docker run -d --name node-container-e2e --privileged --runtime=nvidia \
117+ startTestContainerTemplate = `docker run -d --name {{.ContainerName}} --privileged --runtime=nvidia \
118118 -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all \
119119 -e NVIDIA_DRIVER_CAPABILITIES=all \
120+ {{ range $i, $a := .AdditionalArguments -}}
121+ {{ $a }} \
122+ {{ end -}}
120123 ubuntu sleep infinity`
121124)
122125
123126var _ = Describe ("nvidia-container-cli" , Ordered , ContinueOnFailure , Label ("libnvidia-container" ), func () {
124127 var (
125- runner Runner
126- containerName = "node-container-e2e"
127- hostOutput string
128+ runner Runner
129+ containerName = "node-container-e2e"
130+ hostOutput string
131+ additionalContainerArguments []string
128132 )
129133
130134 BeforeAll (func (ctx context.Context ) {
@@ -145,6 +149,21 @@ var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libn
145149
146150 err = installer .Install ()
147151 Expect (err ).ToNot (HaveOccurred ())
152+ } else {
153+ // If installCTK is false, we use the preinstalled toolkit.
154+ // TODO: This should be updated for other distributions and other components of the toolkit.
155+ output , _ , err := runner .Run ("ls /lib/**/libnvidia-container*.so.*.*" )
156+ Expect (err ).ToNot (HaveOccurred ())
157+
158+ output = strings .TrimSpace (output )
159+ Expect (output ).ToNot (BeEmpty ())
160+
161+ for _ , lib := range strings .Split (output , "\n " ) {
162+ additionalContainerArguments = append (additionalContainerArguments , "-v " + lib + ":" + lib )
163+ }
164+ additionalContainerArguments = append (additionalContainerArguments ,
165+ "-v /usr/bin/nvidia-container-cli:/usr/bin/nvidia-container-cli" ,
166+ )
148167 }
149168
150169 // Capture the host GPU list.
@@ -168,33 +187,46 @@ var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libn
168187
169188 It ("should report the same GPUs inside the container as on the host" , func (ctx context.Context ) {
170189 // Launch the container in detached mode.
171- _ , _ , err := runner .Run (dockerRunCmdTemplate )
190+ var startContainerScriptBuilder strings.Builder
191+ startContainerTemplate , err := template .New ("startContainer" ).Parse (startTestContainerTemplate )
172192 Expect (err ).ToNot (HaveOccurred ())
173-
174- // Install docker and nvidia-container-toolkit in the container.
175- // Run as root and use bash for better compatibility
176- _ , _ , err = runner .Run (fmt .Sprintf ("docker exec -u root %s bash -c '%s'" , containerName , installDockerTemplate ))
193+ err = startContainerTemplate .Execute (& startContainerScriptBuilder , struct {
194+ ContainerName string
195+ AdditionalArguments []string
196+ }{
197+ ContainerName : containerName ,
198+ AdditionalArguments : additionalContainerArguments ,
199+ })
177200 Expect (err ).ToNot (HaveOccurred ())
178201
179- // Build the docker run command (detached mode) from the template so it
180- // stays readable while still resulting in a single-line invocation.
181- tmpl , err := template . New ( "toolkitInstall" ). Parse ( installCTKTemplate )
202+ startContainerScript := startContainerScriptBuilder . String ()
203+ GinkgoLogr . Info ( "Starting test container" , "script" , startContainerScript )
204+ _ , _ , err = runner . Run ( startContainerScript )
182205 Expect (err ).ToNot (HaveOccurred ())
183206
184- var toolkitInstall strings.Builder
185- err = tmpl .Execute (& toolkitInstall , struct {
186- ToolkitImage string
187- }{
188- ToolkitImage : imageName + ":" + imageTag ,
189- })
207+ // Install docker in the container.
208+ _ , _ , err = runner .Run (fmt .Sprintf ("docker exec -u root " + containerName + " bash -c '%s'" , installDockerTemplate ))
190209 Expect (err ).ToNot (HaveOccurred ())
191210
192- _ , _ , err = runner .Run (fmt .Sprintf ("docker exec -u root %s bash -c '%s'" , containerName , toolkitInstall .String ()))
193- Expect (err ).ToNot (HaveOccurred ())
211+ if installCTK {
212+ // Install nvidia-container-cli in the container.
213+ tmpl , err := template .New ("toolkitInstall" ).Parse (installCTKTemplate )
214+ Expect (err ).ToNot (HaveOccurred ())
215+
216+ var toolkitInstall strings.Builder
217+ err = tmpl .Execute (& toolkitInstall , struct {
218+ ToolkitImage string
219+ }{
220+ ToolkitImage : imageName + ":" + imageTag ,
221+ })
222+ Expect (err ).ToNot (HaveOccurred ())
223+
224+ _ , _ , err = runner .Run (fmt .Sprintf ("docker exec -u root " + containerName + " bash -c '%s'" , toolkitInstall .String ()))
225+ Expect (err ).ToNot (HaveOccurred ())
226+ }
194227
195228 // Run the test script in the container.
196- // Capture but don't fail on errors - we'll check the results via container logs.
197- output , _ , err := runner .Run (fmt .Sprintf ("docker exec -u root %s bash -c '%s'" , containerName , libnvidiaContainerCliTestTemplate ))
229+ output , _ , err := runner .Run (fmt .Sprintf ("docker exec -u root " + containerName + " bash -c '%s'" , libnvidiaContainerCliTestTemplate ))
198230 Expect (err ).ToNot (HaveOccurred ())
199231 Expect (strings .TrimSpace (output )).ToNot (BeEmpty ())
200232 Expect (hostOutput ).To (ContainSubstring (strings .TrimSpace (output )))
0 commit comments