Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions internal/modifier/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,10 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image
return nil, fmt.Errorf("failed to get CDI spec: %v", err)
}

cdiModifier, err := cdi.New(
return cdi.New(
cdi.WithLogger(logger),
cdi.WithSpec(spec.Raw()),
)
if err != nil {
return nil, fmt.Errorf("failed to construct CDI modifier: %v", err)
}

modifiers := Merge(
nvidiaContainerRuntimeHookRemover{logger},
cdiModifier,
)

return modifiers, nil
}

func checkRequirements(logger logger.Interface, image image.CUDA) error {
Expand Down
64 changes: 0 additions & 64 deletions internal/modifier/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package modifier
import (
"testing"

"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -74,66 +73,3 @@ func TestNewCSVModifier(t *testing.T) {
})
}
}

func TestCSVModifierRemovesHook(t *testing.T) {
logger, _ := testlog.NewNullLogger()

testCases := []struct {
description string
spec *specs.Spec
expectedError error
expectedSpec *specs.Spec
}{
{
description: "modification removes existing nvidia-container-runtime-hook",
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{},
},
},
},
{
description: "modification removes existing nvidia-container-toolkit",
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{},
},
},
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
m := nvidiaContainerRuntimeHookRemover{logger: logger}

err := m.Modify(tc.spec)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}

require.Empty(t, tc.spec.Hooks.Prestart)
})
}
}
7 changes: 7 additions & 0 deletions internal/modifier/hook_remover.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ type nvidiaContainerRuntimeHookRemover struct {

var _ oci.SpecModifier = (*nvidiaContainerRuntimeHookRemover)(nil)

// NewNvidiaContainerRuntimeHookRemover creates a modifier that removes any NVIDIA Container Runtime hooks from the provided spec.
func NewNvidiaContainerRuntimeHookRemover(logger logger.Interface) oci.SpecModifier {
return nvidiaContainerRuntimeHookRemover{
logger: logger,
}
}

// Modify removes any NVIDIA Container Runtime hooks from the provided spec
func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error {
if spec == nil {
Expand Down
6 changes: 4 additions & 2 deletions internal/runtime/runtime_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
switch modifierType {
case "mode":
modifiers = append(modifiers, modeModifier)
case "nvidia-hook-remover":
modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger))
case "graphics":
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver)
if err != nil {
Expand Down Expand Up @@ -121,10 +123,10 @@ func supportedModifierTypes(mode string) []string {
switch mode {
case "cdi":
// For CDI mode we make no additional modifications.
return []string{"mode"}
return []string{"nvidia-hook-remover", "mode"}
case "csv":
// For CSV mode we support mode and feature-gated modification.
return []string{"mode", "feature-gated"}
return []string{"nvidia-hook-remover", "mode", "feature-gated"}
default:
return []string{"mode", "graphics", "feature-gated"}
}
Expand Down
179 changes: 179 additions & 0 deletions internal/runtime/runtime_factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (

"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)

Expand Down Expand Up @@ -165,3 +166,181 @@ func TestFactoryMethod(t *testing.T) {
})
}
}

func TestNewSpecModifier(t *testing.T) {
Copy link

@jgehrcke jgehrcke Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did this run as part of the CI checks on this PR?

Maybe in https://github.com/NVIDIA/nvidia-container-toolkit/actions/runs/13163645114/job/36738412538?pr=894#step:5:80?

Need to get used to interpreting such test runner log. I see ok github.com/NVIDIA/nvidia-container-toolkit/internal/runtime 0.059s coverage: 39.6% of statements, but I see no mention of TestNewSpecModifier, or of any of the strings below such as "csv mode removes nvidia-container-runtime-hook".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't currently have verbose output enabled for the tests. Running locally with the diff:

diff --git a/Makefile b/Makefile
index be9da4bc..d2cf9b18 100644
--- a/Makefile
+++ b/Makefile
@@ -103,7 +103,7 @@ licenses:
 
 COVERAGE_FILE := coverage.out
 test: build cmds
-	go test -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
+	go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
 
 coverage: test
 	cat $(COVERAGE_FILE) | grep -v "_mock.go" > $(COVERAGE_FILE).no-mocks
$ make test > test-output-txt
$ grep TestNewSpecModifier test-output.txt
=== RUN   TestNewSpecModifier
=== RUN   TestNewSpecModifier/csv_mode_removes_nvidia-container-runtime-hook
=== RUN   TestNewSpecModifier/csv_mode_removes_nvidia-container-toolkit
=== RUN   TestNewSpecModifier/cdi_mode_removes_nvidia-container-runtime-hook
=== RUN   TestNewSpecModifier/cdi_mode_removes_nvidia-container-toolkit
=== RUN   TestNewSpecModifier/legacy_mode_keeps_nvidia-container-runtime-hook
=== RUN   TestNewSpecModifier/legacy_mode_keeps_nvidia-container-toolkit
--- PASS: TestNewSpecModifier (0.00s)
    --- PASS: TestNewSpecModifier/csv_mode_removes_nvidia-container-runtime-hook (0.00s)
    --- PASS: TestNewSpecModifier/csv_mode_removes_nvidia-container-toolkit (0.00s)
    --- PASS: TestNewSpecModifier/cdi_mode_removes_nvidia-container-runtime-hook (0.00s)
    --- PASS: TestNewSpecModifier/cdi_mode_removes_nvidia-container-toolkit (0.00s)
    --- PASS: TestNewSpecModifier/legacy_mode_keeps_nvidia-container-runtime-hook (0.00s)
    --- PASS: TestNewSpecModifier/legacy_mode_keeps_nvidia-container-toolkit (0.00s)

logger, _ := testlog.NewNullLogger()
driver := root.New(
root.WithDriverRoot("/nvidia/driver/root"),
)
testCases := []struct {
description string
config *config.Config
spec *specs.Spec
expectedSpec *specs.Spec
}{
{
description: "csv mode removes nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "csv",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "csv mode removes nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "csv",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "cdi mode removes nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "cdi",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "cdi mode removes nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "cdi",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "legacy mode keeps nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "legacy",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
},
{
description: "legacy mode keeps nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "legacy",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
spec := &oci.SpecMock{
LoadFunc: func() (*specs.Spec, error) {
return tc.spec, nil
},
}
m, err := newSpecModifier(logger, tc.config, spec, driver)
require.NoError(t, err)

err = m.Modify(tc.spec)
require.NoError(t, err)
require.EqualValues(t, tc.expectedSpec, tc.spec)
})
}
}
6 changes: 6 additions & 0 deletions tests/e2e/nvidia-container-toolkit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ var _ = Describe("docker", Ordered, func() {
Expect(containerOutput).To(Equal(hostOutput))
})

It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
Copy link

@jgehrcke jgehrcke Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Judging by the CI check names I see have run on this PR -- this test wasn't executed on those, right? (I was looking for some kind of e2e name).

If this test ran on this PR -- can you point me to a log?

If this test did not run on this PR -- can you briefly explain where/when we run this test? Also, did you run this manually?

(I trust that you did all the right things here; this is just a good opportunity for me to learn what we do/have)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that this triggered the e2e tests, and if it did, I think it probably failed. We definitely need to improve the visibility / traceability of the tests.

One issue is that we trigger the tests once the images are built, but this does not seem to trigger a "checks" entry at a PR level.

containerOutput, _, err := r.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})

It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expand Down