diff --git a/tests/e2e/installer.go b/tests/e2e/installer.go index 7e7b9cfe5..9956081c5 100644 --- a/tests/e2e/installer.go +++ b/tests/e2e/installer.go @@ -34,12 +34,20 @@ docker run --rm -v {{.CacheDir}}:/cache --entrypoint="sh" {{.ToolkitImage}}-pack var installFromImageTemplate = ` set -xe -cd {{.CacheDir}}/packages/ubuntu18.04/amd64 - -{{if .WithSudo }}sudo {{end}}dpkg -i libnvidia-container1_*_amd64.deb \ - libnvidia-container-tools_*_amd64.deb \ - nvidia-container-toolkit-base_*_amd64.deb \ - nvidia-container-toolkit_*_amd64.deb +arch="$(uname -m)" +case "${arch##*-}" in + x86_64 | amd64) ARCH='amd64' ;; + ppc64el | ppc64le) ARCH='ppc64le' ;; + aarch64 | arm64) ARCH='arm64' ;; + *) echo "unsupported architecture" ; exit 1 ;; +esac + +cd {{.CacheDir}}/packages/ubuntu18.04/${ARCH} + +{{if .WithSudo }}sudo {{end}}dpkg -i libnvidia-container1_*_${ARCH}.deb \ + libnvidia-container-tools_*_${ARCH}.deb \ + nvidia-container-toolkit-base_*_${ARCH}.deb \ + nvidia-container-toolkit_*_${ARCH}.deb cd - diff --git a/tests/e2e/nvidia-cdi-refresh_test.go b/tests/e2e/nvidia-cdi-refresh_test.go index 7240bcf5c..88e435f24 100644 --- a/tests/e2e/nvidia-cdi-refresh_test.go +++ b/tests/e2e/nvidia-cdi-refresh_test.go @@ -132,9 +132,7 @@ var _ = Describe("nvidia-cdi-refresh", Ordered, ContinueOnFailure, Label("system BeforeAll(func(ctx context.Context) { var err error - // TODO: We set installCTK to true here to SKIP the mounting of the files from the host. - // The test here does NOT require the host toolkit. - systemdRunner, err = NewNestedContainerRunner(runner, outerContainerImage, true, containerName, localCacheDir) + systemdRunner, err = NewNestedContainerRunner(runner, outerContainerImage, false, containerName, localCacheDir, true) Expect(err).ToNot(HaveOccurred()) for range 10 { state, _, err := systemdRunner.Run(getSystemStateScript) diff --git a/tests/e2e/nvidia-container-cli_test.go b/tests/e2e/nvidia-container-cli_test.go index fcb26ec94..5746d38b6 100644 --- a/tests/e2e/nvidia-container-cli_test.go +++ b/tests/e2e/nvidia-container-cli_test.go @@ -78,7 +78,7 @@ var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libn BeforeAll(func(ctx context.Context) { var err error - nestedContainerRunner, err = NewNestedContainerRunner(runner, "ubuntu", installCTK, containerName, localCacheDir) + nestedContainerRunner, err = NewNestedContainerRunner(runner, "ubuntu", !installCTK, containerName, localCacheDir, true) Expect(err).ToNot(HaveOccurred()) if installCTK { diff --git a/tests/e2e/runner.go b/tests/e2e/runner.go index c25a918e0..d6ca6ee4c 100644 --- a/tests/e2e/runner.go +++ b/tests/e2e/runner.go @@ -30,9 +30,12 @@ import ( const ( installPrerequisitesScript = ` - export DEBIAN_FRONTEND=noninteractive - apt-get update && apt-get install -y curl gnupg2 - ` +set -e +export DEBIAN_FRONTEND=noninteractive +# Install prerequisites +apt-get update +apt-get install -y curl gnupg2 +` ) type localRunner struct{} @@ -96,7 +99,7 @@ func NewRunner(opts ...runnerOption) Runner { // NewNestedContainerRunner creates a new nested container runner. // A nested container runs a container inside another container based on a // given runner (remote or local). -func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool, containerName string, cacheDir string) (Runner, error) { +func NewNestedContainerRunner(runner Runner, baseImage string, mountToolkitFromHost bool, containerName string, cacheDir string, requiresGPUs bool) (Runner, error) { // If a container with the same name exists from a previous test run, remove it first. // Ignore errors as container might not exist _, _, err := runner.Run(fmt.Sprintf("docker rm -f %s 2>/dev/null || true", containerName)) @@ -106,13 +109,24 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool, var additionalContainerArguments []string + if requiresGPUs { + // If the container requires access to GPUs we explicitly add the nvidia + // runtime and set `NVIDIA_VISIBLE_DEVICES` to trigger jit-cdi spec + // generation. + additionalContainerArguments = append(additionalContainerArguments, + "--runtime=nvidia", + "-e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all", + ) + } + if cacheDir != "" { additionalContainerArguments = append(additionalContainerArguments, "-v "+cacheDir+":"+cacheDir+":ro", ) } - if !installCTK { + if mountToolkitFromHost { + // TODO: This is actually ONLY needed for the CLI tests. // If installCTK is false, we use the preinstalled toolkit. // This means we need to add toolkit libraries and binaries from the "host" @@ -179,6 +193,7 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool, if err != nil { return nil, err } + _, _, err = runner.Run(script) if err != nil { return nil, fmt.Errorf("failed to run start container script: %w", err) @@ -191,7 +206,7 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool, _, _, err = inContainer.Run(installPrerequisitesScript) if err != nil { - return nil, fmt.Errorf("failed to install docker: %w", err) + return nil, fmt.Errorf("failed to install prerequisites: %w", err) } return inContainer, nil @@ -296,10 +311,6 @@ func connectOrDie(sshKey, sshUser, host, port string) (*ssh.Client, error) { // outerContainerTemplate represents a template to start a container with // a name specified. -// The container is given access to all NVIDIA gpus by explicitly using the -// nvidia runtime and the `runtime.nvidia.com/gpu=all` device to trigger JIT -// CDI spec generation. -// The template also allows for additional arguments to be specified. type outerContainer struct { Name string BaseImage string @@ -307,9 +318,7 @@ type outerContainer struct { } func (o *outerContainer) Render() (string, error) { - tmpl, err := template.New("startContainer").Parse(`docker run -d --name {{.Name}} --privileged --runtime=nvidia \ --e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all \ --e NVIDIA_DRIVER_CAPABILITIES=all \ + tmpl, err := template.New("startContainer").Parse(`docker run -d --name {{.Name}} --privileged \ {{ range $i, $a := .AdditionalArguments -}} {{ $a }} \ {{ end -}}