|
| 1 | +/* |
| 2 | + * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package e2e |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "fmt" |
| 22 | + |
| 23 | + . "github.com/onsi/ginkgo/v2" |
| 24 | + . "github.com/onsi/gomega" |
| 25 | +) |
| 26 | + |
| 27 | +const ( |
| 28 | + libnvidiaContainerCliDockerRunTemplate = ` |
| 29 | +docker run -d --name test-nvidia-container-cli \ |
| 30 | + --privileged \ |
| 31 | + --runtime=nvidia \ |
| 32 | + -e NVIDIA_VISIBLE_DEVICES=all \ |
| 33 | + -e NVIDIA_DRIVER_CAPABILITIES=all \ |
| 34 | + -v $HOME/libnvidia-container-cli.sh:/usr/local/bin/libnvidia-container-cli.sh \ |
| 35 | + -v /usr/bin/nvidia-container-cli:/usr/bin/nvidia-container-cli \ |
| 36 | + -v /usr/bin/nvidia-ctk:/usr/bin/nvidia-ctk \ |
| 37 | + -v /usr/bin/nvidia-container-runtime:/usr/bin/nvidia-container-runtime \ |
| 38 | + -v /usr/bin/nvidia-container-runtime-hook:/usr/bin/nvidia-container-runtime-hook \ |
| 39 | + -v /usr/bin/nvidia-container-toolkit:/usr/bin/nvidia-container-toolkit \ |
| 40 | + -v /usr/bin/nvidia-cdi-hook:/usr/bin/nvidia-cdi-hook \ |
| 41 | + -v /usr/bin/nvidia-container-runtime.cdi:/usr/bin/nvidia-container-runtime.cdi \ |
| 42 | + -v /usr/bin/nvidia-container-runtime.legacy:/usr/bin/nvidia-container-runtime.legacy \ |
| 43 | + -v /usr/local/nvidia/toolkit:/usr/local/nvidia/toolkit \ |
| 44 | + -v /etc/nvidia-container-runtime:/etc/nvidia-container-runtime \ |
| 45 | + -v /usr/lib/x86_64-linux-gnu/libnvidia-container.so.1:/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1 \ |
| 46 | + -v /usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.1:/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.1 \ |
| 47 | + -e LD_LIBRARY_PATH=/usr/lib64:/usr/lib/x86_64-linux-gnu:/usr/lib/aarch64-linux-gnu:/lib64:/lib/x86_64-linux-gnu:/lib/aarch64-linux-gnu \ |
| 48 | + --entrypoint /usr/local/bin/libnvidia-container-cli.sh \ |
| 49 | + ubuntu |
| 50 | +` |
| 51 | + |
| 52 | + libnvidiaContainerCliTestTemplate = `#!/usr/bin/env bash |
| 53 | +set -euo pipefail |
| 54 | +
|
| 55 | +apt-get update -y && apt-get install -y curl gnupg2 |
| 56 | +
|
| 57 | +WORKDIR="$(mktemp -d)" |
| 58 | +ROOTFS="${WORKDIR}/rootfs" |
| 59 | +mkdir -p "${ROOTFS}" |
| 60 | +
|
| 61 | +export WORKDIR ROOTFS # make them visible in the child shell |
| 62 | +
|
| 63 | +unshare --mount --pid --fork --propagation private -- bash -eux <<'IN_NS' |
| 64 | + : "${ROOTFS:?}" "${WORKDIR:?}" # abort if either is empty |
| 65 | +
|
| 66 | + # 1 Populate minimal Ubuntu base |
| 67 | + curl -L http://cdimage.ubuntu.com/ubuntu-base/releases/22.04/release/ubuntu-base-22.04-base-amd64.tar.gz \ |
| 68 | + | tar -C "$ROOTFS" -xz |
| 69 | +
|
| 70 | + # 2 Add non-root user |
| 71 | + useradd -R "$ROOTFS" -U -u 1000 -s /bin/bash nvidia |
| 72 | +
|
| 73 | + # 3 Bind-mount new root and unshare mounts |
| 74 | + mount --bind "$ROOTFS" "$ROOTFS" |
| 75 | + mount --make-private "$ROOTFS" |
| 76 | + cd "$ROOTFS" |
| 77 | +
|
| 78 | + # 4 Minimal virtual filesystems |
| 79 | + mount -t proc proc proc |
| 80 | + mount -t sysfs sys sys |
| 81 | + mount -t tmpfs tmp tmp |
| 82 | + mount -t tmpfs run run |
| 83 | +
|
| 84 | + # 5 GPU setup |
| 85 | + nvidia-container-cli --load-kmods --debug=container-cli.log \ |
| 86 | + configure --ldconfig=@/sbin/ldconfig.real \ |
| 87 | + --no-cgroups --utility --device=0 "$(pwd)" |
| 88 | +
|
| 89 | + # 6 Switch root |
| 90 | + mkdir -p mnt |
| 91 | + pivot_root . mnt |
| 92 | + umount -l /mnt |
| 93 | +
|
| 94 | + exec nvidia-smi -L |
| 95 | +IN_NS |
| 96 | +` |
| 97 | +) |
| 98 | + |
| 99 | +// Integration tests for Docker runtime |
| 100 | +var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, func() { |
| 101 | + var runner Runner |
| 102 | + |
| 103 | + // Install the NVIDIA Container Toolkit |
| 104 | + BeforeAll(func(ctx context.Context) { |
| 105 | + runner = NewRunner( |
| 106 | + WithHost(sshHost), |
| 107 | + WithPort(sshPort), |
| 108 | + WithSshKey(sshKey), |
| 109 | + WithSshUser(sshUser), |
| 110 | + ) |
| 111 | + |
| 112 | + if installCTK { |
| 113 | + installer, err := NewToolkitInstaller( |
| 114 | + WithRunner(runner), |
| 115 | + WithImage(imageName+":"+imageTag), |
| 116 | + WithTemplate(dockerInstallTemplate), |
| 117 | + ) |
| 118 | + Expect(err).ToNot(HaveOccurred()) |
| 119 | + |
| 120 | + err = installer.Install() |
| 121 | + Expect(err).ToNot(HaveOccurred()) |
| 122 | + } |
| 123 | + }) |
| 124 | + |
| 125 | + When("running nvidia-smi -L", Ordered, func() { |
| 126 | + var hostOutput string |
| 127 | + var err error |
| 128 | + |
| 129 | + BeforeAll(func(ctx context.Context) { |
| 130 | + hostOutput, _, err = runner.Run("nvidia-smi -L") |
| 131 | + Expect(err).ToNot(HaveOccurred()) |
| 132 | + |
| 133 | + _, _, err := runner.Run("docker pull ubuntu") |
| 134 | + Expect(err).ToNot(HaveOccurred()) |
| 135 | + }) |
| 136 | + |
| 137 | + AfterAll(func(ctx context.Context) { |
| 138 | + _, _, err := runner.Run("docker rm -f test-nvidia-container-cli") |
| 139 | + Expect(err).ToNot(HaveOccurred()) |
| 140 | + }) |
| 141 | + |
| 142 | + It("should support NVIDIA_VISIBLE_DEVICES and NVIDIA_DRIVER_CAPABILITIES", func(ctx context.Context) { |
| 143 | + // 1. Create the test script on the remote host at $HOME/test.sh using a here-document |
| 144 | + testScriptPath := "$HOME/libnvidia-container-cli.sh" |
| 145 | + testScript := libnvidiaContainerCliTestTemplate |
| 146 | + createScriptCmd := fmt.Sprintf("cat > %s <<'EOF'\n%s\nEOF\nchmod +x %s", testScriptPath, testScript, testScriptPath) |
| 147 | + _, _, err := runner.Run(createScriptCmd) |
| 148 | + Expect(err).ToNot(HaveOccurred()) |
| 149 | + |
| 150 | + // 2. Start the container using the docker run template |
| 151 | + dockerRunCmd := libnvidiaContainerCliDockerRunTemplate |
| 152 | + _, _, err = runner.Run(dockerRunCmd) |
| 153 | + Expect(err).ToNot(HaveOccurred()) |
| 154 | + |
| 155 | + // 3. Use Eventually to check the container logs contain hostOutput |
| 156 | + Eventually(func() string { |
| 157 | + logs, _, err := runner.Run("docker logs test-nvidia-container-cli") |
| 158 | + if err != nil { |
| 159 | + return "" |
| 160 | + } |
| 161 | + return logs |
| 162 | + }, "5m", "5s").Should(ContainSubstring(hostOutput)) |
| 163 | + }) |
| 164 | + }) |
| 165 | +}) |
0 commit comments