Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ GINKGO_BIN := $(CURDIR)/bin/ginkgo
# current available tests:
# - nvidia-container-cli
# - docker
# - nvidia-cdi-refresh
GINKGO_FOCUS ?=

test: $(GINKGO_BIN)
Expand Down
56 changes: 48 additions & 8 deletions tests/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"errors"
"os"
"strconv"
"strings"
"testing"

. "github.com/onsi/ginkgo/v2"
Expand All @@ -30,17 +31,21 @@ import (

// Test context
var (
runner Runner

ctx context.Context

installCTK bool

imageName string
imageTag string
nvidiaContainerToolkitImage string

sshKey string
sshUser string
sshHost string
sshPort string

localCacheDir string
toolkitInstaller *ToolkitInstaller
)

func TestMain(t *testing.T) {
Expand All @@ -49,31 +54,66 @@ func TestMain(t *testing.T) {
RegisterFailHandler(Fail)

ctx = context.Background()
getTestEnv()

RunSpecs(t,
suiteName,
)
}

var _ = BeforeSuite(func() {
getTestEnv()

runner = NewRunner(
WithHost(sshHost),
WithPort(sshPort),
WithSshKey(sshKey),
WithSshUser(sshUser),
)

// Create a tempdir on the runner.
tmpdir, _, err := runner.Run("mktemp -d --tmpdir=/tmp nvctk-e2e-test-cacheXXX")
Expect(err).ToNot(HaveOccurred())
Expect(strings.TrimSpace(tmpdir)).ToNot(BeEmpty())

localCacheDir = strings.TrimSpace(tmpdir)

toolkitInstaller, err = NewToolkitInstaller(
WithToolkitImage(nvidiaContainerToolkitImage),
WithCacheDir(localCacheDir),
)
Expect(err).ToNot(HaveOccurred())

_, _, err = toolkitInstaller.PrepareCache(runner)
Expect(err).ToNot(HaveOccurred())

if installCTK {
_, _, err := toolkitInstaller.Install(runner)
Expect(err).ToNot(HaveOccurred())

_, _, err = runner.Run(`sudo nvidia-ctk runtime configure --runtime=docker`)
Expect(err).ToNot(HaveOccurred())

_, _, err = runner.Run(`sudo systemctl restart docker`)
Expect(err).ToNot(HaveOccurred())
}
})

// getTestEnv gets the test environment variables
func getTestEnv() {
defer GinkgoRecover()

installCTK = getEnvVarOrDefault("E2E_INSTALL_CTK", false)

if installCTK {
imageName = getRequiredEnvvar[string]("E2E_IMAGE_NAME")
imageTag = getRequiredEnvvar[string]("E2E_IMAGE_TAG")
}
imageName := getRequiredEnvvar[string]("E2E_IMAGE_NAME")
imageTag := getRequiredEnvvar[string]("E2E_IMAGE_TAG")
nvidiaContainerToolkitImage = imageName + ":" + imageTag

sshHost = getEnvVarOrDefault("E2E_SSH_HOST", "")
if sshHost != "" {
sshKey = getRequiredEnvvar[string]("E2E_SSH_KEY")
sshUser = getRequiredEnvvar[string]("E2E_SSH_USER")
sshPort = getEnvVarOrDefault("E2E_SSH_PORT", "22")
}

}

// getRequiredEnvvar returns the specified envvar if set or raises an error.
Expand Down
131 changes: 69 additions & 62 deletions tests/e2e/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,107 +19,114 @@ package e2e
import (
"bytes"
"fmt"
"strings"
"text/template"
)

// dockerInstallTemplate is a template for installing the NVIDIA Container Toolkit
// on a host using Docker.
var dockerInstallTemplate = `
#! /usr/bin/env bash
var prepareInstallerCacheTemplate = `
set -xe

# if the TEMP_DIR is already set, use it
if [ -f /tmp/ctk_e2e_temp_dir.txt ]; then
TEMP_DIR=$(cat /tmp/ctk_e2e_temp_dir.txt)
else
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
echo "$TEMP_DIR" > /tmp/ctk_e2e_temp_dir.txt
fi

# if TEMP_DIR does not exist, create it
if [ ! -d "$TEMP_DIR" ]; then
mkdir -p "$TEMP_DIR"
fi

# Given that docker has an init function that checks for the existence of the
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
# in the /usr/bin directory.
# See https://github.com/moby/moby/blob/20a05dabf44934447d1a66cdd616cc803b81d4e2/daemon/nvidia_linux.go#L32-L46
sudo rm -f /usr/bin/nvidia-container-runtime-hook
sudo ln -s "$TEMP_DIR/toolkit/nvidia-container-runtime-hook" /usr/bin/nvidia-container-runtime-hook

docker run --pid=host --rm -i --privileged \
-v /:/host \
-v /var/run/docker.sock:/var/run/docker.sock \
-v "$TEMP_DIR:$TEMP_DIR" \
-v /etc/docker:/config-root \
{{.Image}} \
--root "$TEMP_DIR" \
--runtime=docker \
--config=/config-root/daemon.json \
--driver-root=/ \
--no-daemon \
--restart-mode=systemd
mkdir -p {{.CacheDir}}

docker run --rm -v {{.CacheDir}}:/cache --entrypoint="sh" {{.ToolkitImage}}-packaging -c "cp -p -R /artifacts/* /cache/"
`

type ToolkitInstaller struct {
runner Runner
template string
var installFromImageTemplate = `
set -xe

Image string
}
cd {{.CacheDir}}/packages/ubuntu18.04/amd64

type installerOption func(*ToolkitInstaller)
{{if .WithSudo }}sudo {{end}}dpkg -i libnvidia-container1_*_amd64.deb \
libnvidia-container-tools_*_amd64.deb \
nvidia-container-toolkit-base_*_amd64.deb \
nvidia-container-toolkit_*_amd64.deb

func WithRunner(r Runner) installerOption {
return func(i *ToolkitInstaller) {
i.runner = r
}
cd -

nvidia-container-cli --version
`

type ToolkitInstaller struct {
ToolkitImage string
CacheDir string
}

func WithImage(image string) installerOption {
type installerOption func(*ToolkitInstaller)

func WithToolkitImage(image string) installerOption {
return func(i *ToolkitInstaller) {
i.Image = image
i.ToolkitImage = image
}
}

func WithTemplate(template string) installerOption {
func WithCacheDir(cacheDir string) installerOption {
return func(i *ToolkitInstaller) {
i.template = template
i.CacheDir = cacheDir
}
}

func NewToolkitInstaller(opts ...installerOption) (*ToolkitInstaller, error) {
i := &ToolkitInstaller{
runner: localRunner{},
template: dockerInstallTemplate,
}
i := &ToolkitInstaller{}

for _, opt := range opts {
opt(i)
}

if i.Image == "" {
if i.ToolkitImage == "" {
return nil, fmt.Errorf("image is required")
}

return i, nil
}

func (i *ToolkitInstaller) Install() error {
// PrepareCache ensures that the installer (package) cache is created on the runner.
// The can be used to ensure that docker is not REQUIRED in an inner container.
func (i *ToolkitInstaller) PrepareCache(runner Runner) (string, string, error) {
renderedScript, err := i.renderScript(prepareInstallerCacheTemplate, false)
if err != nil {
return "", "", err
}

return runner.Run(renderedScript)
}

func (i *ToolkitInstaller) Install(runner Runner) (string, string, error) {
uid, _, err := runner.Run("id -u")
if err != nil {
return "", "", err
}
withSudo := false
if strings.TrimSpace(uid) != "0" {
withSudo = true
}
renderedScript, err := i.renderScript(installFromImageTemplate, withSudo)
if err != nil {
return "", "", err
}

return runner.Run(renderedScript)
}

func (i *ToolkitInstaller) renderScript(scriptTemplate string, withSudo bool) (string, error) {
// Parse the combined template
tmpl, err := template.New("installScript").Parse(i.template)
tmpl, err := template.New("template").Parse(scriptTemplate)
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
return "", fmt.Errorf("error parsing template: %w", err)
}

templateInfo := struct {
*ToolkitInstaller
WithSudo bool
}{
ToolkitInstaller: i,
WithSudo: withSudo,
}
// Execute the template
var renderedScript bytes.Buffer
err = tmpl.Execute(&renderedScript, i)
err = tmpl.Execute(&renderedScript, templateInfo)
if err != nil {
return fmt.Errorf("error executing template: %w", err)
return "", fmt.Errorf("error executing template: %w", err)
}

_, _, err = i.runner.Run(renderedScript.String())
return err
return renderedScript.String(), nil
}
Loading