diff --git a/cmd/nvidia-ctk-installer/container/container.go b/cmd/nvidia-ctk-installer/container/container.go index 0583e051a..07fd9d589 100644 --- a/cmd/nvidia-ctk-installer/container/container.go +++ b/cmd/nvidia-ctk-installer/container/container.go @@ -35,6 +35,8 @@ const ( // Options defines the shared options for the CLIs to configure containers runtimes. type Options struct { + DropInConfig string + // TODO: Rename to TopLevelConfig Config string Socket string // ExecutablePath specifies the path to the container runtime executable. @@ -71,8 +73,12 @@ func (o Options) Unconfigure(cfg engine.Interface) error { // flush flushes the specified config to disk func (o Options) flush(cfg engine.Interface) error { - logrus.Infof("Flushing config to %v", o.Config) - n, err := cfg.Save(o.Config) + filepath := o.DropInConfig + if filepath == "" { + filepath = o.Config + } + logrus.Infof("Flushing config to %v", filepath) + n, err := cfg.Save(filepath) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } diff --git a/cmd/nvidia-ctk-installer/container/runtime/containerd/config_test.go b/cmd/nvidia-ctk-installer/container/runtime/containerd/config_test.go index 8d9780f04..83afb0a98 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/containerd/config_test.go +++ b/cmd/nvidia-ctk-installer/container/runtime/containerd/config_test.go @@ -34,6 +34,7 @@ func TestContainerdConfigLifecycle(t *testing.T) { Name: "test", } + // TODO: Add test case for v1 and DropInConfig. This should fail. testCases := []struct { description string containerOptions container.Options @@ -561,6 +562,7 @@ func TestContainerdConfigLifecycle(t *testing.T) { description: "v2: top-level config does not exist", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/containerd/config.toml", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: false, @@ -576,7 +578,17 @@ func TestContainerdConfigLifecycle(t *testing.T) { actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 2 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 +` + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := `version = 2 [plugins] @@ -613,11 +625,12 @@ func TestContainerdConfigLifecycle(t *testing.T) { [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options] BinaryName = "/usr/bin/nvidia-container-runtime.legacy" ` - require.Equal(t, expected, string(actual)) + require.Equal(t, expectedDropIn, string(actualDropIn)) return nil }, assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { - require.NoFileExists(t, co.Config, "Config file should be deleted if it didn't exist originally") + require.NoFileExists(t, co.Config) + require.NoFileExists(t, co.DropInConfig) return nil }, }, @@ -625,6 +638,7 @@ func TestContainerdConfigLifecycle(t *testing.T) { description: "v2: existing config without nvidia runtime and CDI enabled", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/containerd/config.toml", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", EnableCDI: true, @@ -662,16 +676,42 @@ func TestContainerdConfigLifecycle(t *testing.T) { actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 2 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 [plugins] [plugins."io.containerd.grpc.v1.cri"] - enable_cdi = true [plugins."io.containerd.grpc.v1.cri".containerd] default_runtime_name = "runc" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "/usr/bin/runc" +` + + // TODO (follow-up): Add a function to compare toml files by contents. + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := `version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + enable_cdi = true + + [plugins."io.containerd.grpc.v1.cri".containerd] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] @@ -691,14 +731,8 @@ func TestContainerdConfigLifecycle(t *testing.T) { [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options] BinaryName = "/usr/bin/nvidia-container-runtime.legacy" - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] - runtime_type = "io.containerd.runc.v2" - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" ` - require.Equal(t, expected, string(actual)) + require.Equal(t, expectedDropIn, string(actualDropIn)) return nil }, assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { @@ -707,12 +741,16 @@ func TestContainerdConfigLifecycle(t *testing.T) { actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 2 + // TODO: What is the expectation here? Do we expect that the imports \ + // are updated. + // TODO: Add a test case, where the original config consists of only imports and version + // with the imports referring to another location. + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 [plugins] [plugins."io.containerd.grpc.v1.cri"] - enable_cdi = true [plugins."io.containerd.grpc.v1.cri".containerd] default_runtime_name = "runc" @@ -726,6 +764,9 @@ func TestContainerdConfigLifecycle(t *testing.T) { BinaryName = "/usr/bin/runc" ` require.Equal(t, expected, string(actual)) + + require.NoFileExists(t, co.DropInConfig) + return nil }, }, @@ -733,6 +774,7 @@ func TestContainerdConfigLifecycle(t *testing.T) { description: "v2: existing config with nvidia runtime using old path already present", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/containerd/config.toml", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: true, @@ -774,7 +816,39 @@ func TestContainerdConfigLifecycle(t *testing.T) { actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 2 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + + [plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "nvidia" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] + BinaryName = "/old/path/nvidia-container-runtime" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "/usr/bin/runc" +` + + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := `version = 2 [plugins] @@ -802,14 +876,8 @@ func TestContainerdConfigLifecycle(t *testing.T) { [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options] BinaryName = "/usr/bin/nvidia-container-runtime.legacy" - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] - runtime_type = "io.containerd.runc.v2" - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" ` - require.Equal(t, expected, string(actual)) + require.Equal(t, expectedDropIn, string(actualDropIn)) return nil }, assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { @@ -818,11 +886,31 @@ func TestContainerdConfigLifecycle(t *testing.T) { // If file exists, verify nvidia runtimes were removed actual, err := os.ReadFile(co.Config) require.NoError(t, err) - contentStr := string(actual) - require.NotContains(t, contentStr, "nvidia") - require.NotContains(t, contentStr, "nvidia-cdi") - require.NotContains(t, contentStr, "nvidia-legacy") + // TODO: Do we expect that the default_runtime = nvidia be removed? + // TODO: Should the `nvidia` runtimes be removed from the top-level config? + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + + [plugins."io.containerd.grpc.v1.cri".containerd] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "/usr/bin/runc" +` + + require.Equal(t, expected, string(actual)) + + require.NoFileExists(t, co.DropInConfig) + return nil }, }, @@ -830,6 +918,7 @@ func TestContainerdConfigLifecycle(t *testing.T) { description: "v2: complex config with multiple plugins and settings", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/containerd/config.toml", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", EnableCDI: true, @@ -886,14 +975,14 @@ state = "/run/containerd" actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `root = "/var/lib/containerd" + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +root = "/var/lib/containerd" state = "/run/containerd" version = 2 [plugins] [plugins."io.containerd.grpc.v1.cri"] - enable_cdi = true [plugins."io.containerd.grpc.v1.cri".containerd] default_runtime_name = "runc" @@ -907,6 +996,41 @@ version = 2 [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.custom.options] TypeUrl = "custom.runtime/options" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "/usr/bin/runc" + SystemdCgroup = true + + [plugins."io.containerd.grpc.v1.cri".registry] + + [plugins."io.containerd.grpc.v1.cri".registry.mirrors] + + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] + endpoint = ["https://registry-1.docker.io"] + + [plugins."io.containerd.internal.v1.opt"] + path = "/opt/containerd" +` + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := `version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + enable_cdi = true + + [plugins."io.containerd.grpc.v1.cri".containerd] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] container_annotations = ["cdi.k8s.io*"] runtime_type = "io.containerd.runc.v2" @@ -930,6 +1054,36 @@ version = 2 [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options] BinaryName = "/usr/bin/nvidia-container-runtime.legacy" SystemdCgroup = true +` + require.Equal(t, expectedDropIn, string(actualDropIn)) + return nil + }, + assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { + require.FileExists(t, co.Config) + + actual, err := os.ReadFile(co.Config) + require.NoError(t, err) + + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +root = "/var/lib/containerd" +state = "/run/containerd" +version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + + [plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "runc" + snapshotter = "overlayfs" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.custom] + runtime_type = "io.containerd.custom.v1" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.custom.options] + TypeUrl = "custom.runtime/options" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runc.v2" @@ -949,19 +1103,9 @@ version = 2 path = "/opt/containerd" ` require.Equal(t, expected, string(actual)) - return nil - }, - assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { - require.FileExists(t, co.Config) - // If file exists, verify nvidia runtimes were removed - actual, err := os.ReadFile(co.Config) - require.NoError(t, err) - contentStr := string(actual) + require.NoFileExists(t, co.DropInConfig) - require.NotContains(t, contentStr, "nvidia") - require.NotContains(t, contentStr, "nvidia-cdi") - require.NotContains(t, contentStr, "nvidia-legacy") return nil }, }, @@ -969,6 +1113,7 @@ version = 2 description: "v2: existing config without default runtime (SetAsDefault=false)", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/containerd/config.toml", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: false, @@ -1004,7 +1149,31 @@ version = 2 actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 2 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + + [plugins."io.containerd.grpc.v1.cri".containerd] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "/usr/bin/runc" +` + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := `version = 2 [plugins] @@ -1031,17 +1200,10 @@ version = 2 [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options] BinaryName = "/usr/bin/nvidia-container-runtime.legacy" - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] - runtime_type = "io.containerd.runc.v2" - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" ` - require.Equal(t, expected, string(actual)) - // Verify that default_runtime_name is NOT present - require.NotContains(t, string(actual), "default_runtime_name") + require.Equal(t, expectedDropIn, string(actualDropIn)) + return nil }, assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { @@ -1050,7 +1212,8 @@ version = 2 actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 2 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 [plugins] @@ -1068,9 +1231,8 @@ version = 2 ` require.Equal(t, expected, string(actual)) - // Verify that default_runtime_name is still NOT present - require.NotContains(t, string(actual), "default_runtime_name") - require.NotContains(t, string(actual), "nvidia") + require.NoFileExists(t, co.DropInConfig) + return nil }, }, @@ -1078,6 +1240,7 @@ version = 2 description: "v2: existing config without default runtime (SetAsDefault=true)", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/containerd/config.toml", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: true, @@ -1096,7 +1259,6 @@ version = 2 [plugins] [plugins."io.containerd.grpc.v1.cri"] [plugins."io.containerd.grpc.v1.cri".containerd] - # Note: No default_runtime_name specified [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] @@ -1114,7 +1276,32 @@ version = 2 actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 2 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + + [plugins."io.containerd.grpc.v1.cri".containerd] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "/usr/bin/runc" +` + + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := `version = 2 [plugins] @@ -1142,14 +1329,8 @@ version = 2 [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options] BinaryName = "/usr/bin/nvidia-container-runtime.legacy" - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] - runtime_type = "io.containerd.runc.v2" - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" ` - require.Equal(t, expected, string(actual)) + require.Equal(t, expectedDropIn, string(actualDropIn)) return nil }, assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { @@ -1158,7 +1339,9 @@ version = 2 actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 2 + // TODO: Add test where imports are already specified. + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 2 [plugins] @@ -1176,6 +1359,8 @@ version = 2 ` require.Equal(t, expected, string(actual)) + require.NoFileExists(t, co.DropInConfig) + return nil }, }, @@ -1184,6 +1369,7 @@ version = 2 description: "v3: minimal config without nvidia runtime", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/containerd/config.toml", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: false, @@ -1218,7 +1404,31 @@ version = 2 actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 3 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 3 + +[plugins] + + [plugins."io.containerd.cri.v1.runtime"] + + [plugins."io.containerd.cri.v1.runtime".containerd] + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes] + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options] + BinaryName = "/usr/bin/runc" +` + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := `version = 3 [plugins] @@ -1245,14 +1455,9 @@ version = 2 [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.nvidia-legacy.options] BinaryName = "/usr/bin/nvidia-container-runtime.legacy" - - [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc] - runtime_type = "io.containerd.runc.v2" - - [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" ` - require.Equal(t, expected, string(actual)) + require.Equal(t, expectedDropIn, string(actualDropIn)) + return nil }, assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { @@ -1262,7 +1467,8 @@ version = 2 require.NoError(t, err) // Should return to original v3 config with just runc - expected := `version = 3 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 3 [plugins] @@ -1279,6 +1485,9 @@ version = 2 BinaryName = "/usr/bin/runc" ` require.Equal(t, expected, string(actual)) + + require.NoFileExists(t, co.DropInConfig) + return nil }, }, @@ -1286,6 +1495,7 @@ version = 2 description: "v3: existing config with runtime and OPTIONS inheritance", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/containerd/config.toml", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: true, @@ -1325,7 +1535,35 @@ version = 2 actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := `version = 3 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 3 + +[plugins] + + [plugins."io.containerd.cri.v1.runtime"] + + [plugins."io.containerd.cri.v1.runtime".containerd] + default_runtime_name = "runc" + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes] + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options] + BinaryName = "/usr/bin/runc" + NoPivotRoot = false + Root = "/run/containerd/runc" + SystemdCgroup = true +` + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := `version = 3 [plugins] @@ -1362,17 +1600,9 @@ version = 2 NoPivotRoot = false Root = "/run/containerd/runc" SystemdCgroup = true - - [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc] - runtime_type = "io.containerd.runc.v2" - - [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" - NoPivotRoot = false - Root = "/run/containerd/runc" - SystemdCgroup = true ` - require.Equal(t, expected, string(actual)) + require.Equal(t, expectedDropIn, string(actualDropIn)) + return nil }, assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { @@ -1382,13 +1612,15 @@ version = 2 require.NoError(t, err) // After cleanup, should return to original state - expected := `version = 3 + expected := `imports = ["` + filepath.Dir(co.DropInConfig) + `/*.toml"] +version = 3 [plugins] [plugins."io.containerd.cri.v1.runtime"] [plugins."io.containerd.cri.v1.runtime".containerd] + default_runtime_name = "runc" [plugins."io.containerd.cri.v1.runtime".containerd.runtimes] @@ -1402,6 +1634,9 @@ version = 2 SystemdCgroup = true ` require.Equal(t, expected, string(actual)) + + require.NoFileExists(t, co.DropInConfig) + return nil }, }, @@ -1414,6 +1649,7 @@ version = 2 // Update paths tc.containerOptions.Config = strings.ReplaceAll(tc.containerOptions.Config, "{{ .testRoot }}", testRoot) + tc.containerOptions.DropInConfig = strings.ReplaceAll(tc.containerOptions.DropInConfig, "{{ .testRoot }}", testRoot) tc.containerOptions.RuntimeDir = strings.ReplaceAll(tc.containerOptions.RuntimeDir, "{{ .testRoot }}", testRoot) // Prepare the environment diff --git a/cmd/nvidia-ctk-installer/container/runtime/containerd/containerd.go b/cmd/nvidia-ctk-installer/container/runtime/containerd/containerd.go index 342837f6f..a63b9d6aa 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/containerd/containerd.go +++ b/cmd/nvidia-ctk-installer/container/runtime/containerd/containerd.go @@ -32,7 +32,9 @@ import ( const ( Name = "containerd" - DefaultConfig = "/etc/containerd/config.toml" + DefaultConfig = "/etc/containerd/config.toml" + DefaultDropInConfig = "/etc/containerd/config.d/99-nvidia.toml" + DefaultSocket = "/run/containerd/containerd.sock" DefaultRestartMode = "signal" @@ -170,7 +172,7 @@ func GetLowlevelRuntimePaths(o *container.Options, co *Options) ([]string, error func getRuntimeConfig(o *container.Options, co *Options) (engine.Interface, error) { return containerd.New( - containerd.WithPath(o.Config), + containerd.WithTopLevelConfigPath(o.Config), containerd.WithConfigSource( toml.LoadFirst( containerd.CommandLineSource(o.HostRootMount, o.ExecutablePath), diff --git a/cmd/nvidia-ctk-installer/container/runtime/crio/config_test.go b/cmd/nvidia-ctk-installer/container/runtime/crio/config_test.go index 2adaf8bdc..5a07cc39c 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/crio/config_test.go +++ b/cmd/nvidia-ctk-installer/container/runtime/crio/config_test.go @@ -48,6 +48,7 @@ func TestCrioConfigLifecycle(t *testing.T) { description: "config mode: top-level config does not exist", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/crio/crio.conf", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: false, @@ -57,9 +58,10 @@ func TestCrioConfigLifecycle(t *testing.T) { configMode: "config", }, assertSetupPostConditions: func(t *testing.T, co *container.Options, _ *Options) error { - require.FileExists(t, co.Config) + require.NoFileExists(t, co.Config) + require.FileExists(t, co.DropInConfig) - actual, err := os.ReadFile(co.Config) + actual, err := os.ReadFile(co.DropInConfig) require.NoError(t, err) expected := ` @@ -86,6 +88,7 @@ func TestCrioConfigLifecycle(t *testing.T) { }, assertCleanupPostConditions: func(t *testing.T, co *container.Options, _ *Options) error { require.NoFileExists(t, co.Config) + require.NoFileExists(t, co.DropInConfig) return nil }, }, @@ -93,6 +96,7 @@ func TestCrioConfigLifecycle(t *testing.T) { description: "config mode: existing config without nvidia runtime", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/crio/crio.conf", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: false, @@ -124,26 +128,36 @@ signature_policy = "/etc/crio/policy.json" assertSetupPostConditions: func(t *testing.T, co *container.Options, _ *Options) error { require.FileExists(t, co.Config) - actual, err := os.ReadFile(co.Config) + actualTopLevel, err := os.ReadFile(co.Config) + require.NoError(t, err) + + expectedTopLevel := `[crio] +[crio.runtime] +default_runtime = "crun" + +[crio.runtime.runtimes.crun] +runtime_path = "/usr/bin/crun" +runtime_type = "oci" +runtime_root = "/run/crun" +monitor_path = "/usr/libexec/crio/conmon" + +[crio.image] +signature_policy = "/etc/crio/policy.json" +` + + require.Equal(t, expectedTopLevel, string(actualTopLevel)) + + require.FileExists(t, co.DropInConfig) + actual, err := os.ReadFile(co.DropInConfig) require.NoError(t, err) expected := ` [crio] - [crio.image] - signature_policy = "/etc/crio/policy.json" - [crio.runtime] - default_runtime = "crun" [crio.runtime.runtimes] - [crio.runtime.runtimes.crun] - monitor_path = "/usr/libexec/crio/conmon" - runtime_path = "/usr/bin/crun" - runtime_root = "/run/crun" - runtime_type = "oci" - [crio.runtime.runtimes.nvidia] monitor_path = "/usr/libexec/crio/conmon" runtime_path = "/usr/bin/nvidia-container-runtime" @@ -165,31 +179,30 @@ signature_policy = "/etc/crio/policy.json" require.Equal(t, expected, string(actual)) return nil }, - assertCleanupPostConditions: func(t *testing.T, co *container.Options, _ *Options) error { + assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { require.FileExists(t, co.Config) - actual, err := os.ReadFile(co.Config) - require.NoError(t, err) + require.NoFileExists(t, co.DropInConfig) - // Should restore to original config - expected := ` -[crio] - - [crio.image] - signature_policy = "/etc/crio/policy.json" + actualTopLevel, err := os.ReadFile(co.Config) + require.NoError(t, err) - [crio.runtime] - default_runtime = "crun" + // Leaves original config unchanged + expectedTopLevel := `[crio] +[crio.runtime] +default_runtime = "crun" - [crio.runtime.runtimes] +[crio.runtime.runtimes.crun] +runtime_path = "/usr/bin/crun" +runtime_type = "oci" +runtime_root = "/run/crun" +monitor_path = "/usr/libexec/crio/conmon" - [crio.runtime.runtimes.crun] - monitor_path = "/usr/libexec/crio/conmon" - runtime_path = "/usr/bin/crun" - runtime_root = "/run/crun" - runtime_type = "oci" +[crio.image] +signature_policy = "/etc/crio/policy.json" ` - require.Equal(t, expected, string(actual)) + require.Equal(t, expectedTopLevel, string(actualTopLevel)) + return nil }, }, @@ -197,6 +210,7 @@ signature_policy = "/etc/crio/policy.json" description: "config mode: existing config with nvidia runtime already present", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/crio/crio.conf", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: true, @@ -227,7 +241,29 @@ runtime_type = "oci" assertSetupPostConditions: func(t *testing.T, co *container.Options, _ *Options) error { require.FileExists(t, co.Config) - actual, err := os.ReadFile(co.Config) + actualTopLevel, err := os.ReadFile(co.Config) + require.NoError(t, err) + + // TODO: Do we expect the top-level config to change? i.e. Should + // we REMOVE the default_runtime = "nvidia" setting? + expectedTopLevel := `[crio] +[crio.runtime] +default_runtime = "nvidia" + +[crio.runtime.runtimes.crun] +runtime_path = "/usr/bin/crun" +runtime_type = "oci" + +[crio.runtime.runtimes.nvidia] +runtime_path = "/old/path/nvidia-container-runtime" +runtime_type = "oci" +` + + require.Equal(t, expectedTopLevel, string(actualTopLevel)) + + require.FileExists(t, co.DropInConfig) + + actual, err := os.ReadFile(co.DropInConfig) require.NoError(t, err) expected := ` @@ -238,10 +274,6 @@ runtime_type = "oci" [crio.runtime.runtimes] - [crio.runtime.runtimes.crun] - runtime_path = "/usr/bin/crun" - runtime_type = "oci" - [crio.runtime.runtimes.nvidia] runtime_path = "/usr/bin/nvidia-container-runtime" runtime_type = "oci" @@ -257,25 +289,31 @@ runtime_type = "oci" require.Equal(t, expected, string(actual)) return nil }, - assertCleanupPostConditions: func(t *testing.T, co *container.Options, _ *Options) error { + assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { require.FileExists(t, co.Config) - actual, err := os.ReadFile(co.Config) + actualTopLevel, err := os.ReadFile(co.Config) require.NoError(t, err) - // Note: cleanup removes nvidia runtimes but doesn't restore original default_runtime - expected := ` -[crio] - - [crio.runtime] + // TODO: Do we expect the top-level config to change? i.e. Should + // we REMOVE the default_runtime = "nvidia" setting? + expectedTopLevel := `[crio] +[crio.runtime] +default_runtime = "nvidia" - [crio.runtime.runtimes] +[crio.runtime.runtimes.crun] +runtime_path = "/usr/bin/crun" +runtime_type = "oci" - [crio.runtime.runtimes.crun] - runtime_path = "/usr/bin/crun" - runtime_type = "oci" +[crio.runtime.runtimes.nvidia] +runtime_path = "/old/path/nvidia-container-runtime" +runtime_type = "oci" ` - require.Equal(t, expected, string(actual)) + + require.Equal(t, expectedTopLevel, string(actualTopLevel)) + + require.NoFileExists(t, co.DropInConfig) + return nil }, }, @@ -283,6 +321,7 @@ runtime_type = "oci" description: "config mode: complex config with multiple settings", containerOptions: container.Options{ Config: "{{ .testRoot }}/etc/crio/crio.conf", + DropInConfig: "{{ .testRoot }}/conf.d/99-nvidia.toml", RuntimeName: "nvidia", RuntimeDir: "/usr/bin", SetAsDefault: false, @@ -335,31 +374,51 @@ plugin_dirs = [ actual, err := os.ReadFile(co.Config) require.NoError(t, err) - expected := ` -[crio] + expected := `[crio] +[crio.runtime] +default_runtime = "crun" +conmon = "/usr/libexec/crio/conmon" +conmon_cgroup = "pod" +selinux = true - [crio.image] - insecure_registries = ["localhost:5000"] - signature_policy = "/etc/crio/policy.json" +[crio.runtime.runtimes.crun] +runtime_path = "/usr/bin/crun" +runtime_type = "oci" +runtime_root = "/run/crun" +monitor_path = "/usr/libexec/crio/conmon" - [crio.network] - network_dir = "/etc/cni/net.d/" - plugin_dirs = ["/opt/cni/bin", "/usr/libexec/cni"] +[crio.runtime.runtimes.runc] +runtime_path = "/usr/bin/runc" +runtime_type = "oci" +runtime_root = "/run/runc" + +[crio.image] +signature_policy = "/etc/crio/policy.json" +insecure_registries = [ + "localhost:5000" +] + +[crio.network] +network_dir = "/etc/cni/net.d/" +plugin_dirs = [ + "/opt/cni/bin", + "/usr/libexec/cni" +] +` + require.Equal(t, expected, string(actual)) + + require.FileExists(t, co.DropInConfig) + + actualDropIn, err := os.ReadFile(co.DropInConfig) + require.NoError(t, err) + + expectedDropIn := ` +[crio] [crio.runtime] - conmon = "/usr/libexec/crio/conmon" - conmon_cgroup = "pod" - default_runtime = "crun" - selinux = true [crio.runtime.runtimes] - [crio.runtime.runtimes.crun] - monitor_path = "/usr/libexec/crio/conmon" - runtime_path = "/usr/bin/crun" - runtime_root = "/run/crun" - runtime_type = "oci" - [crio.runtime.runtimes.nvidia] monitor_path = "/usr/libexec/crio/conmon" runtime_path = "/usr/bin/nvidia-container-runtime" @@ -377,53 +436,52 @@ plugin_dirs = [ runtime_path = "/usr/bin/nvidia-container-runtime.legacy" runtime_root = "/run/crun" runtime_type = "oci" - - [crio.runtime.runtimes.runc] - runtime_path = "/usr/bin/runc" - runtime_root = "/run/runc" - runtime_type = "oci" ` - require.Equal(t, expected, string(actual)) + require.Equal(t, expectedDropIn, string(actualDropIn)) return nil }, - assertCleanupPostConditions: func(t *testing.T, co *container.Options, _ *Options) error { + assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error { require.FileExists(t, co.Config) actual, err := os.ReadFile(co.Config) require.NoError(t, err) // Should restore to original complex config - expected := ` -[crio] - - [crio.image] - insecure_registries = ["localhost:5000"] - signature_policy = "/etc/crio/policy.json" - - [crio.network] - network_dir = "/etc/cni/net.d/" - plugin_dirs = ["/opt/cni/bin", "/usr/libexec/cni"] + expected := `[crio] +[crio.runtime] +default_runtime = "crun" +conmon = "/usr/libexec/crio/conmon" +conmon_cgroup = "pod" +selinux = true - [crio.runtime] - conmon = "/usr/libexec/crio/conmon" - conmon_cgroup = "pod" - default_runtime = "crun" - selinux = true +[crio.runtime.runtimes.crun] +runtime_path = "/usr/bin/crun" +runtime_type = "oci" +runtime_root = "/run/crun" +monitor_path = "/usr/libexec/crio/conmon" - [crio.runtime.runtimes] +[crio.runtime.runtimes.runc] +runtime_path = "/usr/bin/runc" +runtime_type = "oci" +runtime_root = "/run/runc" - [crio.runtime.runtimes.crun] - monitor_path = "/usr/libexec/crio/conmon" - runtime_path = "/usr/bin/crun" - runtime_root = "/run/crun" - runtime_type = "oci" +[crio.image] +signature_policy = "/etc/crio/policy.json" +insecure_registries = [ + "localhost:5000" +] - [crio.runtime.runtimes.runc] - runtime_path = "/usr/bin/runc" - runtime_root = "/run/runc" - runtime_type = "oci" +[crio.network] +network_dir = "/etc/cni/net.d/" +plugin_dirs = [ + "/opt/cni/bin", + "/usr/libexec/cni" +] ` require.Equal(t, expected, string(actual)) + + require.NoFileExists(t, co.DropInConfig) + return nil }, }, @@ -563,11 +621,15 @@ plugin_dirs = [ }, } - for _, tc := range testCases { + for i, tc := range testCases { + if i > 3 { + t.SkipNow() + } t.Run(tc.description, func(t *testing.T) { // Update any paths as required testRoot := t.TempDir() tc.containerOptions.Config = strings.ReplaceAll(tc.containerOptions.Config, "{{ .testRoot }}", testRoot) + tc.containerOptions.DropInConfig = strings.ReplaceAll(tc.containerOptions.DropInConfig, "{{ .testRoot }}", testRoot) tc.options.hooksDir = strings.ReplaceAll(tc.options.hooksDir, "{{ .testRoot }}", testRoot) tc.options.hookFilename = "99-nvidia.json" diff --git a/cmd/nvidia-ctk-installer/container/runtime/crio/crio.go b/cmd/nvidia-ctk-installer/container/runtime/crio/crio.go index 10fb1cbfd..f9e986937 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/crio/crio.go +++ b/cmd/nvidia-ctk-installer/container/runtime/crio/crio.go @@ -42,7 +42,9 @@ const ( defaultHookFilename = "oci-nvidia-hook.json" // Config-based settings - DefaultConfig = "/etc/crio/crio.conf" + DefaultConfig = "/etc/crio/crio.conf" + DefaultDropInConfig = "/etc/crio/conf.d/99-nvidia.toml" + DefaultSocket = "/var/run/crio/crio.sock" DefaultRestartMode = "systemd" ) @@ -199,7 +201,7 @@ func GetLowlevelRuntimePaths(o *container.Options) ([]string, error) { func getRuntimeConfig(o *container.Options) (engine.Interface, error) { return crio.New( - crio.WithPath(o.Config), + crio.WithTopLevelConfigPath(o.Config), crio.WithConfigSource( toml.LoadFirst( crio.CommandLineSource(o.HostRootMount, o.ExecutablePath), diff --git a/cmd/nvidia-ctk-installer/container/runtime/runtime.go b/cmd/nvidia-ctk-installer/container/runtime/runtime.go index 320c3e0e1..22a307848 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/runtime.go +++ b/cmd/nvidia-ctk-installer/container/runtime/runtime.go @@ -54,6 +54,13 @@ func Flags(opts *Options) []cli.Flag { Destination: &opts.Config, Sources: cli.EnvVars("RUNTIME_CONFIG", "CONTAINERD_CONFIG", "DOCKER_CONFIG"), }, + &cli.StringFlag{ + Name: "drop-in-config", + Usage: "Path to the NVIDIA-specific drop-in config file", + Value: runtimeSpecificDefault, + Destination: &opts.DropInConfig, + Sources: cli.EnvVars("RUNTIME_DROP_IN_CONFIG"), + }, &cli.StringFlag{ Name: "executable-path", Usage: "The path to the runtime executable. This is used to extract the current config", @@ -120,17 +127,30 @@ func (opts *Options) Validate(logger logger.Interface, c *cli.Command, runtime s opts.EnableCDI = to.CDI.Enabled } - if opts.ExecutablePath != "" && opts.RuntimeName == docker.Name { - logger.Warningf("Ignoring executable-path=%q flag for %v", opts.ExecutablePath, opts.RuntimeName) - opts.ExecutablePath = "" + switch runtime { + case docker.Name: + if opts.ExecutablePath != "" { + logger.Warningf("Ignoring executable-path=%q flag for %v", opts.ExecutablePath, opts.RuntimeName) + opts.ExecutablePath = "" + } + if opts.DropInConfig != "" && opts.DropInConfig != runtimeSpecificDefault { + logger.Warningf("Ignoring drop-in-config=%q flag for %v", opts.DropInConfig, opts.RuntimeName) + opts.DropInConfig = "" + } + case containerd.Name: + case crio.Name: } // Apply the runtime-specific config changes. + // TODO: Add the runtime-specific DropInConfigs here. switch runtime { case containerd.Name: if opts.Config == runtimeSpecificDefault { opts.Config = containerd.DefaultConfig } + if opts.DropInConfig == runtimeSpecificDefault { + opts.DropInConfig = containerd.DefaultDropInConfig + } if opts.Socket == runtimeSpecificDefault { opts.Socket = containerd.DefaultSocket } @@ -141,6 +161,9 @@ func (opts *Options) Validate(logger logger.Interface, c *cli.Command, runtime s if opts.Config == runtimeSpecificDefault { opts.Config = crio.DefaultConfig } + if opts.DropInConfig == runtimeSpecificDefault { + opts.DropInConfig = crio.DefaultDropInConfig + } if opts.Socket == runtimeSpecificDefault { opts.Socket = crio.DefaultSocket } @@ -151,6 +174,10 @@ func (opts *Options) Validate(logger logger.Interface, c *cli.Command, runtime s if opts.Config == runtimeSpecificDefault { opts.Config = docker.DefaultConfig } + // Docker does not support drop-in configs. + if opts.DropInConfig == runtimeSpecificDefault { + opts.DropInConfig = "" + } if opts.Socket == runtimeSpecificDefault { opts.Socket = docker.DefaultSocket } diff --git a/cmd/nvidia-ctk-installer/main_test.go b/cmd/nvidia-ctk-installer/main_test.go index e08aa867e..66310fcc0 100644 --- a/cmd/nvidia-ctk-installer/main_test.go +++ b/cmd/nvidia-ctk-installer/main_test.go @@ -40,10 +40,11 @@ func TestApp(t *testing.T) { hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1") testCases := []struct { - description string - args []string - expectedToolkitConfig string - expectedRuntimeConfig string + description string + args []string + expectedToolkitConfig string + expectedRuntimeConfig string + expectedDropInRuntimeConfig string }{ { description: "no args", @@ -285,7 +286,10 @@ swarm-resource = "" [nvidia-ctk] path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk" `, - expectedRuntimeConfig: `version = 2 + expectedRuntimeConfig: `imports = ["{{ .testRoot }}/config.d/*.toml"] +version = 2 +`, + expectedDropInRuntimeConfig: `version = 2 [plugins] @@ -371,7 +375,10 @@ swarm-resource = "" [nvidia-ctk] path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk" `, - expectedRuntimeConfig: `version = 2 + expectedRuntimeConfig: `imports = ["{{ .testRoot }}/config.d/*.toml"] +version = 2 +`, + expectedDropInRuntimeConfig: `version = 2 [plugins] @@ -418,6 +425,7 @@ swarm-resource = "" cdiOutputDir := filepath.Join(testRoot, "/var/run/cdi") runtimeConfigFile := filepath.Join(testRoot, "config.file") + runtimeDropInConfigFile := filepath.Join(testRoot, "config.d/config.toml") toolkitRoot := filepath.Join(testRoot, "toolkit-test") toolkitConfigFile := filepath.Join(toolkitRoot, "toolkit/.config/nvidia-container-runtime/config.toml") @@ -430,6 +438,7 @@ swarm-resource = "" "--no-daemon", "--cdi-output-dir=" + cdiOutputDir, "--config=" + runtimeConfigFile, + "--drop-in-config=" + runtimeDropInConfigFile, "--create-device-nodes=none", "--driver-root-ctr-path=" + hostRoot, "--pid-file=" + filepath.Join(testRoot, "toolkit.pid"), @@ -446,10 +455,24 @@ swarm-resource = "" require.NoError(t, err) require.EqualValues(t, strings.ReplaceAll(tc.expectedToolkitConfig, "{{ .toolkitRoot }}", toolkitRoot), string(toolkitConfigFileContents)) - require.FileExists(t, runtimeConfigFile) - runtimeConfigFileContents, err := os.ReadFile(runtimeConfigFile) - require.NoError(t, err) - require.EqualValues(t, strings.ReplaceAll(tc.expectedRuntimeConfig, "{{ .toolkitRoot }}", toolkitRoot), string(runtimeConfigFileContents)) + if len(tc.expectedRuntimeConfig) > 0 { + require.FileExists(t, runtimeConfigFile) + runtimeConfigFileContents, err := os.ReadFile(runtimeConfigFile) + require.NoError(t, err) + expected := strings.ReplaceAll(tc.expectedRuntimeConfig, "{{ .testRoot }}", testRoot) + require.Equal(t, strings.ReplaceAll(expected, "{{ .toolkitRoot }}", toolkitRoot), string(runtimeConfigFileContents)) + } else { + require.NoFileExists(t, runtimeConfigFile) + } + + if len(tc.expectedDropInRuntimeConfig) > 0 { + require.FileExists(t, runtimeDropInConfigFile) + runtimeDropInConfigFileContents, err := os.ReadFile(runtimeDropInConfigFile) + require.NoError(t, err) + require.Equal(t, strings.ReplaceAll(tc.expectedDropInRuntimeConfig, "{{ .toolkitRoot }}", toolkitRoot), string(runtimeDropInConfigFileContents)) + } else { + require.NoFileExists(t, runtimeDropInConfigFile) + } }) } diff --git a/cmd/nvidia-ctk/runtime/configure/configure.go b/cmd/nvidia-ctk/runtime/configure/configure.go index ee419af83..95620d7ee 100644 --- a/cmd/nvidia-ctk/runtime/configure/configure.go +++ b/cmd/nvidia-ctk/runtime/configure/configure.go @@ -266,13 +266,13 @@ func (m command) configureConfigFile(config *config) error { case "containerd": cfg, err = containerd.New( containerd.WithLogger(m.logger), - containerd.WithPath(config.configFilePath), + containerd.WithTopLevelConfigPath(config.configFilePath), containerd.WithConfigSource(configSource), ) case "crio": cfg, err = crio.New( crio.WithLogger(m.logger), - crio.WithPath(config.configFilePath), + crio.WithTopLevelConfigPath(config.configFilePath), crio.WithConfigSource(configSource), ) case "docker": diff --git a/pkg/config/engine/config.go b/pkg/config/engine/config.go new file mode 100644 index 000000000..3c286b94a --- /dev/null +++ b/pkg/config/engine/config.go @@ -0,0 +1,85 @@ +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package engine + +// A Config represents a config for a container engine. +// These include container, cri-o, and docker. +// The config is logically split into a Source and Destination. This allows an +// existing config to be updated (Source == Destination) or runtime-specific +// settings to be written to a new config. The latter is useful when creation +// NVIDIA-specific drop-in files for container engines that support this. +type Config struct { + Source RuntimeConfigSource + Destination RuntimeConfigDestination +} + +// A RuntimeConfigSource allows runtime-specific settings to be READ from a +// config. +type RuntimeConfigSource interface { + DefaultRuntime() string + GetRuntimeConfig(string) (RuntimeConfig, error) + GetDefaultRuntimeOptions() interface{} + String() string +} + +// A RuntimeConfigDestination allows a runtime with specific settings to be +// WRITTEN to a config. +type RuntimeConfigDestination interface { + AddRuntimeWithOptions(string, string, bool, interface{}) error + EnableCDI() + RemoveRuntime(string) error + Save(string) (int64, error) + String() string +} + +// AddRuntime adds a runtime to the destination config and optionally sets it as the default. +// The options to apply to the added runtime are read from the source config +// default runtime. +func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { + options := c.Source.GetDefaultRuntimeOptions() + return c.Destination.AddRuntimeWithOptions(name, path, setAsDefault, options) +} + +// RemoveRuntime removes a runtime from the destination config. +func (c *Config) RemoveRuntime(runtime string) error { + return c.Destination.RemoveRuntime(runtime) +} + +// EnableCDI enables CDI in the destination config. +func (c *Config) EnableCDI() { + c.Destination.EnableCDI() +} + +// DefaultRuntime returns the default runtime for the source config. +func (c *Config) DefaultRuntime() string { + return c.Source.DefaultRuntime() +} + +// GetRuntimeConfig returns the source config for the specified runtime. +func (c *Config) GetRuntimeConfig(runtime string) (RuntimeConfig, error) { + return c.Source.GetRuntimeConfig(runtime) +} + +// Save saves the destination runtime to the specified path. +func (c *Config) Save(path string) (int64, error) { + return c.Destination.Save(path) +} + +func (c *Config) String() string { + return c.Destination.String() +} diff --git a/pkg/config/engine/containerd/config.go b/pkg/config/engine/containerd/config.go index 9911a6fb5..9cd2e0c0b 100644 --- a/pkg/config/engine/containerd/config.go +++ b/pkg/config/engine/containerd/config.go @@ -60,7 +60,7 @@ func (c *Config) AddRuntimeWithOptions(name string, path string, setAsDefault bo config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name}, options) } if len(c.ContainerAnnotations) > 0 { - annotations, err := c.getRuntimeAnnotations([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "container_annotations"}) + annotations, err := c.getStringArrayValue([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "container_annotations"}) if err != nil { return err } @@ -82,7 +82,7 @@ func (c *Config) AddRuntimeWithOptions(name string, path string, setAsDefault bo return nil } -func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) { +func (c *Config) getStringArrayValue(path []string) ([]string, error) { if c == nil || c.Tree == nil { return nil, nil } diff --git a/pkg/config/engine/containerd/config_drop_in.go b/pkg/config/engine/containerd/config_drop_in.go new file mode 100644 index 000000000..95284b113 --- /dev/null +++ b/pkg/config/engine/containerd/config_drop_in.go @@ -0,0 +1,165 @@ +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package containerd + +import ( + "fmt" + "path/filepath" + + "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" +) + +// A ConfigWithDropIn represents a pair of containerd configs. +// The first is the top-level config and the second is an in-memory drop-in config +// that only contains modifications made to the config. +type ConfigWithDropIn struct { + topLevelConfig *topLevelConfig + engine.Interface +} + +var _ engine.Interface = (*ConfigWithDropIn)(nil) + +// A topLevelConfig stores the original on-disk top-level config. +// The path to the config is also stored to allow it to be modified if required. +type topLevelConfig struct { + path string + config *Config +} + +func NewConfigWithDropIn(topLevelConfigPath string, tlConfig *Config, dropInConfig engine.Interface) *ConfigWithDropIn { + return &ConfigWithDropIn{ + topLevelConfig: &topLevelConfig{ + path: topLevelConfigPath, + config: tlConfig, + }, + Interface: dropInConfig, + } +} + +// Save the drop-in config to the specified path. +// The top-level config is optionally updated to include the required imports +// to allow the drop-in-file to be created. +func (c *ConfigWithDropIn) Save(dropInPath string) (int64, error) { + bytesWritten, err := c.Interface.Save(dropInPath) + if err != nil { + return 0, err + } + + switch { + case bytesWritten == 0: + // If the drop-in config is empty, we try to simplify the config. + c.topLevelConfig.simplify(dropInPath) + case bytesWritten > 0: + // If the drop-in config has contents, we need to ensure that the + // drop-in path is included in the imports. + c.topLevelConfig.ensureImports(dropInPath) + } + + // TODO: Only do this if we've actually modified the config. + if err := c.topLevelConfig.flush(); err != nil { + return 0, fmt.Errorf("failed to save top-level config: %w", err) + } + + return bytesWritten, nil +} + +// RemoveRuntime removes the runtime from both configs. +func (c *ConfigWithDropIn) RemoveRuntime(name string) error { + if err := c.topLevelConfig.RemoveRuntime(name); err != nil { + return err + } + return c.Interface.RemoveRuntime(name) +} + +// flush saves the top-level config to it's path. +// If the config is empty, the file will be deleted. +func (c *topLevelConfig) flush() error { + _, err := c.config.Save(c.path) + if err != nil { + return fmt.Errorf("failed to flush config to %q: %w", c.path, err) + } + return nil +} + +func (c *topLevelConfig) simplify(dropInFilename string) { + c.removeImports(dropInFilename) + c.removeVersion() +} + +// removeImports removes the imports specified in the file if the only entry +// corresponds to the path for the drop-in-file and the only other field in the +// file is the version field. +func (c *topLevelConfig) removeImports(dropInFilename string) { + if len(c.config.Keys()) != 2 { + return + } + if c.config.Get("version") == nil || c.config.Get("imports") == nil { + return + } + + currentImports, _ := c.config.getStringArrayValue([]string{"imports"}) + if len(currentImports) != 1 { + return + } + + requiredImport := filepath.Dir(dropInFilename) + "/*.toml" + if currentImports[0] != requiredImport { + return + } + c.config.Delete("imports") +} + +// removeVersion removes the version if it is the ONLY field in the file. +func (c *topLevelConfig) removeVersion() { + if len(c.config.Keys()) > 1 { + return + } + if c.config.Get("version") == nil { + return + } + c.config.Delete("version") +} + +func (c *topLevelConfig) ensureImports(dropInFilename string) { + config := c.config.Tree + var currentImports []string + if ci, ok := c.config.Get("imports").([]string); ok { + currentImports = ci + } + + requiredImport := filepath.Dir(dropInFilename) + "/*.toml" + for _, currentImport := range currentImports { + // If the requiredImport is already present, then we need not update the config. + if currentImport == requiredImport { + return + } + } + + currentImports = append(currentImports, requiredImport) + + // If the config is empty we need to set the version too. + if len(config.Keys()) == 0 { + config.Set("version", c.config.Version) + } + config.Set("imports", currentImports) +} + +// RemoveRuntime removes the specified runtime from the top-level config. +func (c *topLevelConfig) RemoveRuntime(name string) error { + return c.config.RemoveRuntime(name) +} diff --git a/pkg/config/engine/containerd/config_test.go b/pkg/config/engine/containerd/config_test.go index dc6373263..0267f643f 100644 --- a/pkg/config/engine/containerd/config_test.go +++ b/pkg/config/engine/containerd/config_test.go @@ -95,14 +95,6 @@ func TestAddRuntime(t *testing.T) { [plugins."io.containerd.grpc.v1.cri"] [plugins."io.containerd.grpc.v1.cri".containerd] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] - privileged_without_host_devices = true - runtime_engine = "engine" - runtime_root = "root" - runtime_type = "type" - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" - SystemdCgroup = true [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test] privileged_without_host_devices = true runtime_engine = "engine" @@ -136,16 +128,7 @@ func TestAddRuntime(t *testing.T) { [plugins] [plugins."io.containerd.grpc.v1.cri"] [plugins."io.containerd.grpc.v1.cri".containerd] - default_runtime_name = "default" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.default] - privileged_without_host_devices = true - runtime_engine = "engine" - runtime_root = "root" - runtime_type = "type" - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.default.options] - BinaryName = "/usr/bin/default" - SystemdCgroup = true [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test] privileged_without_host_devices = true runtime_engine = "engine" @@ -187,24 +170,7 @@ func TestAddRuntime(t *testing.T) { [plugins] [plugins."io.containerd.grpc.v1.cri"] [plugins."io.containerd.grpc.v1.cri".containerd] - default_runtime_name = "default" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] - privileged_without_host_devices = true - runtime_engine = "engine" - runtime_root = "root" - runtime_type = "type" - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" - SystemdCgroup = true - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.default] - privileged_without_host_devices = false - runtime_engine = "defaultengine" - runtime_root = "defaultroot" - runtime_type = "defaulttype" - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.default.options] - BinaryName = "/usr/bin/default" - SystemdCgroup = false [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test] privileged_without_host_devices = false runtime_engine = "defaultengine" @@ -259,14 +225,6 @@ func TestAddRuntime(t *testing.T) { [plugins."io.containerd.cri.v1.runtime"] [plugins."io.containerd.cri.v1.runtime".containerd] [plugins."io.containerd.cri.v1.runtime".containerd.runtimes] - [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc] - privileged_without_host_devices = true - runtime_engine = "engine" - runtime_root = "root" - runtime_type = "type" - [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options] - BinaryName = "/usr/bin/runc" - SystemdCgroup = true [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.test] privileged_without_host_devices = true runtime_engine = "engine" diff --git a/pkg/config/engine/containerd/containerd.go b/pkg/config/engine/containerd/containerd.go index ca35c75db..be6cac058 100644 --- a/pkg/config/engine/containerd/containerd.go +++ b/pkg/config/engine/containerd/containerd.go @@ -32,6 +32,10 @@ const ( // Config represents the containerd config type Config struct { *toml.Tree + configOptions +} + +type configOptions struct { Version int64 Logger logger.Interface RuntimeType string @@ -80,15 +84,15 @@ func New(opts ...Option) (engine.Interface, error) { b.logger = logger.New() } if b.configSource == nil { - b.configSource = toml.FromFile(b.path) + b.configSource = toml.FromFile(b.topLevelConfigPath) } - tomlConfig, err := b.configSource.Load() + sourceConfigTree, err := b.configSource.Load() if err != nil { return nil, fmt.Errorf("failed to load config: %v", err) } - configVersion, err := b.parseVersion(tomlConfig) + configVersion, err := b.parseVersion(sourceConfigTree) if err != nil { return nil, fmt.Errorf("failed to parse config version: %w", err) } @@ -100,8 +104,7 @@ func New(opts ...Option) (engine.Interface, error) { } b.logger.Infof("Using CRI runtime plugin name %q", criRuntimePluginName) - cfg := &Config{ - Tree: tomlConfig, + sourceConfigOptions := configOptions{ Version: configVersion, CRIRuntimePluginName: criRuntimePluginName, Logger: b.logger, @@ -109,11 +112,36 @@ func New(opts ...Option) (engine.Interface, error) { UseLegacyConfig: b.useLegacyConfig, ContainerAnnotations: b.containerAnnotations, } - + sourceConfig := &Config{ + Tree: sourceConfigTree, + configOptions: sourceConfigOptions, + } switch configVersion { case 1: - return (*ConfigV1)(cfg), nil + // Version 1 configs do not support imports / drop-in files. + // We return the sourceConfig as is. + return (*ConfigV1)(sourceConfig), nil default: + // For other versions, we create a DropInConfig with a reference to the + // top-level config if present. + topLevelConfig := &Config{ + Tree: func() *toml.Tree { + t, _ := toml.FromFile(b.topLevelConfigPath).Load() + return t + }(), + configOptions: sourceConfigOptions, + } + dropInConfig := &engine.Config{ + Source: sourceConfig, + // The destinationConfig is an empty config with the same options + // as the source config. + Destination: &Config{ + Tree: toml.NewEmpty(), + configOptions: sourceConfigOptions, + }, + } + + cfg := NewConfigWithDropIn(b.topLevelConfigPath, topLevelConfig, dropInConfig) return cfg, nil } } diff --git a/pkg/config/engine/containerd/option.go b/pkg/config/engine/containerd/option.go index 2dec62efb..dee26654a 100644 --- a/pkg/config/engine/containerd/option.go +++ b/pkg/config/engine/containerd/option.go @@ -26,7 +26,7 @@ type builder struct { configSource toml.Loader configVersion int useLegacyConfig bool - path string + topLevelConfigPath string runtimeType string containerAnnotations []string } @@ -41,10 +41,10 @@ func WithLogger(logger logger.Interface) Option { } } -// WithPath sets the path for the config builder -func WithPath(path string) Option { +// WithTopLevelConfigPath sets the path for the top-level containerd config. +func WithTopLevelConfigPath(path string) Option { return func(b *builder) { - b.path = path + b.topLevelConfigPath = path } } diff --git a/pkg/config/engine/crio/crio.go b/pkg/config/engine/crio/crio.go index ded4dcabd..9effd7246 100644 --- a/pkg/config/engine/crio/crio.go +++ b/pkg/config/engine/crio/crio.go @@ -59,19 +59,26 @@ func New(opts ...Option) (engine.Interface, error) { b.logger = logger.New() } if b.configSource == nil { - b.configSource = toml.FromFile(b.path) + b.configSource = toml.FromFile(b.topLevelConfigPath) } - tomlConfig, err := b.configSource.Load() + sourceConfig, err := b.configSource.Load() if err != nil { return nil, err } - cfg := Config{ - Tree: tomlConfig, - Logger: b.logger, + cfg := &engine.Config{ + Source: &Config{ + Tree: sourceConfig, + Logger: b.logger, + }, + Destination: &Config{ + Tree: toml.NewEmpty(), + Logger: b.logger, + }, } - return &cfg, nil + + return cfg, nil } // AddRuntime adds a new runtime to the crio config. diff --git a/pkg/config/engine/crio/crio_test.go b/pkg/config/engine/crio/crio_test.go index a3f2c5bc5..32866d8e0 100644 --- a/pkg/config/engine/crio/crio_test.go +++ b/pkg/config/engine/crio/crio_test.go @@ -68,10 +68,6 @@ func TestAddRuntime(t *testing.T) { `, expectedConfig: ` [crio] - [crio.runtime.runtimes.runc] - runtime_path = "/usr/bin/runc" - runtime_type = "runcoci" - runc_option = "option" [crio.runtime.runtimes.test] runtime_path = "/usr/bin/test" runtime_type = "oci" @@ -92,11 +88,6 @@ func TestAddRuntime(t *testing.T) { expectedConfig: ` [crio] [crio.runtime] - default_runtime = "default" - [crio.runtime.runtimes.default] - runtime_path = "/usr/bin/default" - runtime_type = "defaultoci" - default_option = "option" [crio.runtime.runtimes.test] runtime_path = "/usr/bin/test" runtime_type = "oci" @@ -121,15 +112,6 @@ func TestAddRuntime(t *testing.T) { expectedConfig: ` [crio] [crio.runtime] - default_runtime = "default" - [crio.runtime.runtimes.default] - runtime_path = "/usr/bin/default" - runtime_type = "defaultoci" - default_option = "option" - [crio.runtime.runtimes.runc] - runtime_path = "/usr/bin/runc" - runtime_type = "runcoci" - runc_option = "option" [crio.runtime.runtimes.test] runtime_path = "/usr/bin/test" runtime_type = "oci" diff --git a/pkg/config/engine/crio/option.go b/pkg/config/engine/crio/option.go index 7079fb150..4df7528f2 100644 --- a/pkg/config/engine/crio/option.go +++ b/pkg/config/engine/crio/option.go @@ -22,9 +22,9 @@ import ( ) type builder struct { - logger logger.Interface - configSource toml.Loader - path string + logger logger.Interface + configSource toml.Loader + topLevelConfigPath string } // Option defines a function that can be used to configure the config builder @@ -37,10 +37,10 @@ func WithLogger(logger logger.Interface) Option { } } -// WithPath sets the path for the config builder -func WithPath(path string) Option { +// WithTopLevelConfigPath sets the path for the top-level containerd config. +func WithTopLevelConfigPath(path string) Option { return func(b *builder) { - b.path = path + b.topLevelConfigPath = path } } diff --git a/pkg/config/engine/engine.go b/pkg/config/engine/engine.go index b20d0558d..5696e53dd 100644 --- a/pkg/config/engine/engine.go +++ b/pkg/config/engine/engine.go @@ -24,7 +24,7 @@ import "strings" // the default runtime, "runc", and "crun" // // If an nvidia* runtime is set as the default runtime, this is ignored. -func GetBinaryPathsForRuntimes(cfg Interface) []string { +func GetBinaryPathsForRuntimes(cfg defaultRuntimesGetter) []string { var binaryPaths []string seen := make(map[string]bool) @@ -45,9 +45,14 @@ func GetBinaryPathsForRuntimes(cfg Interface) []string { return binaryPaths } +type defaultRuntimesGetter interface { + DefaultRuntime() string + GetRuntimeConfig(string) (RuntimeConfig, error) +} + // GetLowLevelRuntimes returns a predefined list low-level runtimes from the specified config. // nvidia* runtimes are ignored. -func GetLowLevelRuntimes(cfg Interface) []string { +func GetLowLevelRuntimes(cfg defaultRuntimesGetter) []string { var runtimes []string isValidDefault := func(s string) bool { if s == "" {