From 9f867bbf284c8e0f48138bd011a4da6400311565 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Mon, 17 Feb 2025 17:50:34 -0800 Subject: [PATCH 1/6] Detect if container runtime versions on worker nodes support CDI This commit adds a 'runtimeSupportsCDI' field to the ClusterPolicyController struct. When fetching the container runtime version strings from worker nodes, the 'runtimeSupportsCDI' field is set accordingly. If any worker node is running containerd < 1.7.0, then we set runtimeSupportsCDI=false. As part of this commit, we now return an error if there are different container runtimes running on different worker nodes. Signed-off-by: Christopher Desiniotis --- controllers/state_manager.go | 60 +++++------ controllers/state_manager_test.go | 161 +++++++++++++++++++++++++++++- 2 files changed, 188 insertions(+), 33 deletions(-) diff --git a/controllers/state_manager.go b/controllers/state_manager.go index d8f3ef7e6..f7318d383 100644 --- a/controllers/state_manager.go +++ b/controllers/state_manager.go @@ -161,10 +161,11 @@ type ClusterPolicyController struct { openshift string ocpDriverToolkit OpenShiftDriverToolkit - runtime gpuv1.Runtime - hasGPUNodes bool - hasNFDLabels bool - sandboxEnabled bool + runtime gpuv1.Runtime + runtimeSupportsCDI bool + hasGPUNodes bool + hasNFDLabels bool + sandboxEnabled bool } func addState(n *ClusterPolicyController, path string) { @@ -580,7 +581,7 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) { return clusterHasNFDLabels, gpuNodesTotal, nil } -func getRuntimeString(node corev1.Node) (gpuv1.Runtime, error) { +func getRuntimeVersionString(node corev1.Node) (gpuv1.Runtime, string, error) { // ContainerRuntimeVersion string will look like :// runtimeVer := node.Status.NodeInfo.ContainerRuntimeVersion var runtime gpuv1.Runtime @@ -592,9 +593,11 @@ func getRuntimeString(node corev1.Node) (gpuv1.Runtime, error) { case strings.HasPrefix(runtimeVer, "cri-o"): runtime = gpuv1.CRIO default: - return "", fmt.Errorf("runtime not recognized: %s", runtimeVer) + return "", "", fmt.Errorf("runtime not recognized: %s", runtimeVer) } - return runtime, nil + version := strings.SplitAfter(runtimeVer, "//")[1] + vVersion := "v" + strings.TrimPrefix(version, "v") + return runtime, vVersion, nil } func (n *ClusterPolicyController) setPodSecurityLabelsForNamespace() error { @@ -706,13 +709,14 @@ func (n *ClusterPolicyController) ocpEnsureNamespaceMonitoring() error { return nil } -// getRuntime will detect the container runtime used by nodes in the -// cluster and correctly set the value for clusterPolicyController.runtime -// For openshift, set runtime to crio. Otherwise, the default runtime is -// containerd -- if >=1 node is configured with containerd, set -// clusterPolicyController.runtime = containerd -func (n *ClusterPolicyController) getRuntime() error { +// getContainerRuntimeInfo will detect the container runtime version used by nodes +// in the cluster and correctly set the value for clusterPolicyController.runtime +// and clusterPolicyController.runtimeSupportsCDI. On OpenShift, the runtime +// is always assumed to be cri-o. We assume the runtime supports CDI unless +// containerd < 1.7.0 is detected. +func (n *ClusterPolicyController) getContainerRuntimeInfo() error { ctx := n.ctx + n.runtimeSupportsCDI = true // assume crio for openshift clusters if n.openshift != "" { n.runtime = gpuv1.CRIO @@ -725,27 +729,26 @@ func (n *ClusterPolicyController) getRuntime() error { list := &corev1.NodeList{} err := n.client.List(ctx, list, opts...) if err != nil { - return fmt.Errorf("Unable to list nodes prior to checking container runtime: %v", err) + return fmt.Errorf("failed to list nodes: %w", err) } var runtime gpuv1.Runtime - for _, node := range list.Items { - rt, err := getRuntimeString(node) + for i, node := range list.Items { + rt, version, err := getRuntimeVersionString(node) if err != nil { - n.logger.Info(fmt.Sprintf("Unable to get runtime info for node %s: %v", node.Name, err)) - continue + return fmt.Errorf("failed to get runtime info for node %s: %w", node.Name, err) + } + if i == 0 { + runtime = rt + } else if rt != runtime { + n.logger.Error(nil, "Different runtimes on different worker nodes is not supported") + return fmt.Errorf("different runtimes on different worker nodes is not supported") } - runtime = rt - if runtime == gpuv1.Containerd { - // default to containerd if >=1 node running containerd - break + if runtime == gpuv1.Containerd && semver.Compare(version, "v1.7.0") < 0 { + n.runtimeSupportsCDI = false } } - if runtime.String() == "" { - n.logger.Info("Unable to get runtime info from the cluster, defaulting to containerd") - runtime = gpuv1.Containerd - } n.runtime = runtime return nil } @@ -868,11 +871,12 @@ func (n *ClusterPolicyController) init(ctx context.Context, reconciler *ClusterP } // detect the container runtime on worker nodes - err = n.getRuntime() + err = n.getContainerRuntimeInfo() if err != nil { - return err + return fmt.Errorf("failed to get container runtime info: %w", err) } n.logger.Info(fmt.Sprintf("Using container runtime: %s", n.runtime.String())) + n.logger.Info(fmt.Sprintf("Container runtime supports CDI: %t", n.runtimeSupportsCDI)) // fetch all kernel versions from the GPU nodes in the cluster if n.singleton.Spec.Driver.IsEnabled() && n.singleton.Spec.Driver.UsePrecompiledDrivers() { diff --git a/controllers/state_manager_test.go b/controllers/state_manager_test.go index bdec856e0..2dd792526 100644 --- a/controllers/state_manager_test.go +++ b/controllers/state_manager_test.go @@ -19,36 +19,56 @@ package controllers import ( "testing" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" ) -func TestGetRuntimeString(t *testing.T) { +func TestGetRuntimeVersionString(t *testing.T) { testCases := []struct { description string runtimeVer string expectedRuntime gpuv1.Runtime + expectedVersion string + errorExpected bool }{ { "containerd", "containerd://1.0.0", gpuv1.Containerd, + "v1.0.0", + false, }, { "docker", "docker://1.0.0", gpuv1.Docker, + "v1.0.0", + false, }, { "crio", "cri-o://1.0.0", gpuv1.CRIO, + "v1.0.0", + false, }, { "unknown", "unknown://1.0.0", "", + "v1.0.0", + true, + }, + { + "containerd with v prefix", + "containerd://v1.0.0", + gpuv1.Containerd, + "v1.0.0", + false, }, } @@ -61,11 +81,142 @@ func TestGetRuntimeString(t *testing.T) { }, }, } - runtime, _ := getRuntimeString(node) - // TODO: update to use require pkg after MR !311 is merged - if runtime != tc.expectedRuntime { - t.Errorf("expected %s but got %s", tc.expectedRuntime.String(), runtime.String()) + runtime, version, err := getRuntimeVersionString(node) + if tc.errorExpected { + require.Error(t, err) + return + } + require.NoError(t, err) + require.EqualValues(t, tc.expectedRuntime, runtime) + require.EqualValues(t, tc.expectedVersion, version) + }) + } +} + +func TestGetContainerRuntimeInfo(t *testing.T) { + testCases := []struct { + description string + ctrl *ClusterPolicyController + expectedRuntime gpuv1.Runtime + runtimeSupportsCDI bool + errorExpected bool + }{ + { + description: "containerd", + ctrl: &ClusterPolicyController{ + client: fake.NewFakeClient( + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Labels: map[string]string{commonGPULabelKey: "true"}, + }, + Status: corev1.NodeStatus{ + NodeInfo: corev1.NodeSystemInfo{ContainerRuntimeVersion: "containerd://1.7.0"}, + }, + }), + }, + expectedRuntime: gpuv1.Containerd, + runtimeSupportsCDI: true, + errorExpected: false, + }, + { + description: "cri-o", + ctrl: &ClusterPolicyController{ + client: fake.NewFakeClient( + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Labels: map[string]string{commonGPULabelKey: "true"}, + }, + Status: corev1.NodeStatus{ + NodeInfo: corev1.NodeSystemInfo{ContainerRuntimeVersion: "cri-o://1.30.0"}, + }, + }), + }, + expectedRuntime: gpuv1.CRIO, + runtimeSupportsCDI: true, + errorExpected: false, + }, + { + description: "openshift", + ctrl: &ClusterPolicyController{ + openshift: "1.0.0", + }, + expectedRuntime: gpuv1.CRIO, + runtimeSupportsCDI: true, + errorExpected: false, + }, + { + description: "containerd, multiple nodes, cdi not supported", + ctrl: &ClusterPolicyController{ + client: fake.NewFakeClient( + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Labels: map[string]string{commonGPULabelKey: "true"}, + }, + Status: corev1.NodeStatus{ + NodeInfo: corev1.NodeSystemInfo{ + ContainerRuntimeVersion: "containerd://1.7.0", + }, + }, + }, + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-2", + Labels: map[string]string{commonGPULabelKey: "true"}, + }, + Status: corev1.NodeStatus{ + NodeInfo: corev1.NodeSystemInfo{ + ContainerRuntimeVersion: "containerd://1.6.30", + }, + }, + }), + }, + expectedRuntime: gpuv1.Containerd, + runtimeSupportsCDI: false, + errorExpected: false, + }, + { + description: "multiple nodes, different runtimes", + ctrl: &ClusterPolicyController{ + client: fake.NewFakeClient( + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Labels: map[string]string{commonGPULabelKey: "true"}, + }, + Status: corev1.NodeStatus{ + NodeInfo: corev1.NodeSystemInfo{ + ContainerRuntimeVersion: "containerd://1.7.0", + }, + }, + }, + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-2", + Labels: map[string]string{commonGPULabelKey: "true"}, + }, + Status: corev1.NodeStatus{ + NodeInfo: corev1.NodeSystemInfo{ + ContainerRuntimeVersion: "cri-o://1.30.0", + }, + }, + }), + }, + errorExpected: true, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + err := tc.ctrl.getContainerRuntimeInfo() + if tc.errorExpected { + require.Error(t, err) + return } + require.NoError(t, err) + require.EqualValues(t, tc.expectedRuntime, tc.ctrl.runtime) + require.EqualValues(t, tc.runtimeSupportsCDI, tc.ctrl.runtimeSupportsCDI) }) } } From 72756e71a33d96ecf345931abbd49aa6be403d90 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Tue, 18 Feb 2025 14:50:10 -0800 Subject: [PATCH 2/6] Use native CDI support in runtimes for workload containers when cdi.enabled=true This commit updates the default behavior when cdi.enabled=true. If the container runtime (containerd, cri-o) supports CDI, we leverage it to inject GPU devices into workload containers. This means we no longer configure 'nvidia' as the default runtime. Signed-off-by: Christopher Desiniotis --- controllers/object_controls.go | 68 +++++++++--- controllers/transforms_test.go | 195 +++++++++++++++++++++++++++++++++ 2 files changed, 247 insertions(+), 16 deletions(-) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 46b65f6df..badf540e8 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -180,6 +180,8 @@ const ( // DriverInstallDirCtrPathEnvName is the name of the envvar used by the driver-validator to represent the path // of the driver install dir mounted in the container DriverInstallDirCtrPathEnvName = "DRIVER_INSTALL_DIR_CTR_PATH" + // NvidiaRuntimeSetAsDefaultEnvName is the name of the toolkit container env for configuring NVIDIA Container Runtime as the default runtime + NvidiaRuntimeSetAsDefaultEnvName = "NVIDIA_RUNTIME_SET_AS_DEFAULT" ) // ContainerProbe defines container probe types @@ -1195,6 +1197,31 @@ func getProxyEnv(proxyConfig *apiconfigv1.Proxy) []corev1.EnvVar { return envVars } +func transformToolkitForCDI(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) { + if !config.CDI.IsEnabled() { + return + } + + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true") + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CrioConfigModeEnvName, "config") + + if !n.runtimeSupportsCDI { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeCDIPrefixesEnvName, "nvidia.cdi.k8s.io/") + } + + if config.CDI.IsDefault() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeModeEnvName, "cdi") + } + + // When the container runtime supports CDI, we do not configure 'nvidia' as the default runtime. + // Instead, we leverage native CDI support in containerd / cri-o to inject GPUs into workloads. + // The 'nvidia' runtime will be set as the runtime class for our management containers so that they + // get access to all GPUs. + if n.runtimeSupportsCDI { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaRuntimeSetAsDefaultEnvName, "false") + } +} + // TransformToolkit transforms Nvidia container-toolkit daemonset with required config as per ClusterPolicy func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error { // update validation container @@ -1233,14 +1260,7 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n } // update env required for CDI support - if config.CDI.IsEnabled() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true") - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeCDIPrefixesEnvName, "nvidia.cdi.k8s.io/") - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CrioConfigModeEnvName, "config") - if config.CDI.IsDefault() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeModeEnvName, "cdi") - } - } + transformToolkitForCDI(obj, config, n) // set install directory for the toolkit if config.Toolkit.InstallDir != "" && config.Toolkit.InstallDir != DefaultToolkitInstallDir { @@ -1352,6 +1372,29 @@ func transformForRuntime(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, return nil } +func transformDevicePluginForCDI(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) { + if !config.CDI.IsEnabled() { + return + } + + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true") + if config.Toolkit.IsEnabled() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCDIHookPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-cdi-hook")) + } + + // When the container runtime supports CDI, we leverage native CDI support in container / cri-o + // to inject GPUs into workloads. In this case, we use the standard CDI annotation prefix 'cdi.k8s.io/' + // instead of a custom annotation prefix that the 'nvidia' container runtime is configured to look for. + deviceListStrategy := "cdi-annotations" + cdiAnnotationPrefix := "cdi.k8s.io/" + if !n.runtimeSupportsCDI { + deviceListStrategy = "envvar,cdi-annotations" + cdiAnnotationPrefix = "nvidia.cdi.k8s.io/" + } + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, deviceListStrategy) + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIAnnotationPrefixEnvName, cdiAnnotationPrefix) +} + // TransformDevicePlugin transforms k8s-device-plugin daemonset with required config as per ClusterPolicy func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error { // update validation container @@ -1412,14 +1455,7 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe applyMIGConfiguration(&(obj.Spec.Template.Spec.Containers[0]), config.MIG.Strategy) // update env required for CDI support - if config.CDI.IsEnabled() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true") - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations") - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIAnnotationPrefixEnvName, "nvidia.cdi.k8s.io/") - if config.Toolkit.IsEnabled() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCDIHookPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-cdi-hook")) - } - } + transformDevicePluginForCDI(obj, config, n) // update MPS volumes and set MPS_ROOT env var if a custom MPS root is configured if config.DevicePlugin.MPS != nil && config.DevicePlugin.MPS.Root != "" && diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 83b504b7e..697d3af0b 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -1163,3 +1163,198 @@ func TestTransformSandboxValidator(t *testing.T) { }) } } + +func TestTransformToolkitForCDI(t *testing.T) { + testCases := []struct { + description string + ds Daemonset + cpSpec *gpuv1.ClusterPolicySpec + ctrl ClusterPolicyController + expectedDs Daemonset + }{ + { + description: "cdi disabled", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + CDI: gpuv1.CDIConfigSpec{ + Enabled: newBoolPtr(false), + }, + }, + ctrl: ClusterPolicyController{}, + expectedDs: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + }, + { + description: "cdi enabled, runtime does not support cdi", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + CDI: gpuv1.CDIConfigSpec{ + Enabled: newBoolPtr(true), + }, + }, + ctrl: ClusterPolicyController{}, + expectedDs: NewDaemonset().WithContainer( + corev1.Container{ + Name: "main-ctr", + Env: []corev1.EnvVar{ + {Name: CDIEnabledEnvName, Value: "true"}, + {Name: CrioConfigModeEnvName, Value: "config"}, + {Name: NvidiaCtrRuntimeCDIPrefixesEnvName, Value: "nvidia.cdi.k8s.io/"}, + }, + }), + }, + { + description: "cdi enabled, cdi default, runtime does not support cdi", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + CDI: gpuv1.CDIConfigSpec{ + Enabled: newBoolPtr(true), + Default: newBoolPtr(true), + }, + }, + ctrl: ClusterPolicyController{}, + expectedDs: NewDaemonset().WithContainer( + corev1.Container{ + Name: "main-ctr", + Env: []corev1.EnvVar{ + {Name: CDIEnabledEnvName, Value: "true"}, + {Name: CrioConfigModeEnvName, Value: "config"}, + {Name: NvidiaCtrRuntimeCDIPrefixesEnvName, Value: "nvidia.cdi.k8s.io/"}, + {Name: NvidiaCtrRuntimeModeEnvName, Value: "cdi"}, + }, + }), + }, + { + description: "cdi enabled, cdi default, runtime supports cdi", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + CDI: gpuv1.CDIConfigSpec{ + Enabled: newBoolPtr(true), + Default: newBoolPtr(true), + }, + }, + ctrl: ClusterPolicyController{ + runtimeSupportsCDI: true, + }, + expectedDs: NewDaemonset().WithContainer( + corev1.Container{ + Name: "main-ctr", + Env: []corev1.EnvVar{ + {Name: CDIEnabledEnvName, Value: "true"}, + {Name: CrioConfigModeEnvName, Value: "config"}, + {Name: NvidiaCtrRuntimeModeEnvName, Value: "cdi"}, + {Name: NvidiaRuntimeSetAsDefaultEnvName, Value: "false"}, + }, + }), + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + transformToolkitForCDI(tc.ds.DaemonSet, tc.cpSpec, tc.ctrl) + require.EqualValues(t, tc.expectedDs, tc.ds) + }) + } +} + +func TestTransformDevicePluginForCDI(t *testing.T) { + testCases := []struct { + description string + ds Daemonset + cpSpec *gpuv1.ClusterPolicySpec + ctrl ClusterPolicyController + expectedDs Daemonset + }{ + { + description: "cdi disabled", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + CDI: gpuv1.CDIConfigSpec{ + Enabled: newBoolPtr(false), + }, + }, + ctrl: ClusterPolicyController{}, + expectedDs: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + }, + { + description: "cdi enabled, toolkit disabled, runtime supports cdi", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + CDI: gpuv1.CDIConfigSpec{ + Enabled: newBoolPtr(true), + }, + Toolkit: gpuv1.ToolkitSpec{ + Enabled: newBoolPtr(false), + }, + }, + ctrl: ClusterPolicyController{ + runtimeSupportsCDI: true, + }, + expectedDs: NewDaemonset().WithContainer( + corev1.Container{ + Name: "main-ctr", + Env: []corev1.EnvVar{ + {Name: CDIEnabledEnvName, Value: "true"}, + {Name: DeviceListStrategyEnvName, Value: "cdi-annotations"}, + {Name: CDIAnnotationPrefixEnvName, Value: "cdi.k8s.io/"}, + }, + }), + }, + { + description: "cdi enabled, toolkit disabled, runtime does not support cdi", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + CDI: gpuv1.CDIConfigSpec{ + Enabled: newBoolPtr(true), + }, + Toolkit: gpuv1.ToolkitSpec{ + Enabled: newBoolPtr(false), + }, + }, + ctrl: ClusterPolicyController{ + runtimeSupportsCDI: false, + }, + expectedDs: NewDaemonset().WithContainer( + corev1.Container{ + Name: "main-ctr", + Env: []corev1.EnvVar{ + {Name: CDIEnabledEnvName, Value: "true"}, + {Name: DeviceListStrategyEnvName, Value: "envvar,cdi-annotations"}, + {Name: CDIAnnotationPrefixEnvName, Value: "nvidia.cdi.k8s.io/"}, + }, + }), + }, + { + description: "cdi enabled, toolkit enabled, runtime supports cdi", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + CDI: gpuv1.CDIConfigSpec{ + Enabled: newBoolPtr(true), + }, + Toolkit: gpuv1.ToolkitSpec{ + Enabled: newBoolPtr(true), + InstallDir: "/path/to/install", + }, + }, + ctrl: ClusterPolicyController{ + runtimeSupportsCDI: true, + }, + expectedDs: NewDaemonset().WithContainer( + corev1.Container{ + Name: "main-ctr", + Env: []corev1.EnvVar{ + {Name: CDIEnabledEnvName, Value: "true"}, + {Name: NvidiaCDIHookPathEnvName, Value: "/path/to/install/toolkit/nvidia-cdi-hook"}, + {Name: DeviceListStrategyEnvName, Value: "cdi-annotations"}, + {Name: CDIAnnotationPrefixEnvName, Value: "cdi.k8s.io/"}, + }, + }), + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + transformDevicePluginForCDI(tc.ds.DaemonSet, tc.cpSpec, tc.ctrl) + require.EqualValues(t, tc.expectedDs, tc.ds) + }) + } +} From 56998b291e60455db667ea52cdf46364d1de8d26 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Tue, 18 Feb 2025 15:03:32 -0800 Subject: [PATCH 3/6] Set runtimeClass for operands unless CDI is disabled and cri-o is the runtime When CDI is disabled and cri-o is the container runtime, we fallback to installing the prestart OCI hook. Our operands will depend on the prestart hook for getting access to GPUs. In all other scenarios, we want our operands to leverage the 'nvidia' runtime class to get access to GPUs. Signed-off-by: Christopher Desiniotis --- controllers/object_controls.go | 28 +++++++++++++++------------- controllers/transforms_test.go | 6 +++++- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index badf540e8..cf864c5b7 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -941,7 +941,7 @@ func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol } // set RuntimeClass for supported runtimes - setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass) + setRuntimeClass(&obj.Spec.Template.Spec, n, config.Operator.RuntimeClass) // update env required for MIG support applyMIGConfiguration(&(obj.Spec.Template.Spec.Containers[0]), config.MIG.Strategy) @@ -1449,7 +1449,7 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe } // set RuntimeClass for supported runtimes - setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass) + setRuntimeClass(&obj.Spec.Template.Spec, n, config.Operator.RuntimeClass) // update env required for MIG support applyMIGConfiguration(&(obj.Spec.Template.Spec.Containers[0]), config.MIG.Strategy) @@ -1530,7 +1530,7 @@ func TransformMPSControlDaemon(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic } // set RuntimeClass for supported runtimes - setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass) + setRuntimeClass(&obj.Spec.Template.Spec, n, config.Operator.RuntimeClass) // update env required for MIG support applyMIGConfiguration(mainContainer, config.MIG.Strategy) @@ -1644,7 +1644,7 @@ func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe } // set RuntimeClass for supported runtimes - setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass) + setRuntimeClass(&obj.Spec.Template.Spec, n, config.Operator.RuntimeClass) // mount configmap for custom metrics if provided by user if config.DCGMExporter.MetricsConfig != nil && config.DCGMExporter.MetricsConfig.Name != "" { @@ -1761,7 +1761,7 @@ func TransformDCGM(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n Clu } // set RuntimeClass for supported runtimes - setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass) + setRuntimeClass(&obj.Spec.Template.Spec, n, config.Operator.RuntimeClass) return nil } @@ -1811,7 +1811,7 @@ func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, } // set RuntimeClass for supported runtimes - setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass) + setRuntimeClass(&obj.Spec.Template.Spec, n, config.Operator.RuntimeClass) // set ConfigMap name for "mig-parted-config" Volume for i, vol := range obj.Spec.Template.Spec.Volumes { @@ -2096,7 +2096,7 @@ func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, } // set RuntimeClass for supported runtimes - setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass) + setRuntimeClass(&obj.Spec.Template.Spec, n, config.Operator.RuntimeClass) var validatorErr error // apply changes for individual component validators(initContainers) @@ -2428,13 +2428,15 @@ func getRuntimeClass(config *gpuv1.ClusterPolicySpec) string { return DefaultRuntimeClass } -func setRuntimeClass(podSpec *corev1.PodSpec, runtime gpuv1.Runtime, runtimeClass string) { - if runtime == gpuv1.Containerd { - if runtimeClass == "" { - runtimeClass = DefaultRuntimeClass - } - podSpec.RuntimeClassName = &runtimeClass +func setRuntimeClass(podSpec *corev1.PodSpec, n ClusterPolicyController, runtimeClass string) { + if !n.singleton.Spec.CDI.IsEnabled() && n.runtime != gpuv1.Containerd { + return + } + + if runtimeClass == "" { + runtimeClass = DefaultRuntimeClass } + podSpec.RuntimeClassName = &runtimeClass } func setContainerProbe(container *corev1.Container, probe *gpuv1.ContainerProbeSpec, probeType ContainerProbe) { diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 697d3af0b..06a7e9505 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -1096,7 +1096,11 @@ func TestTransformValidator(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - err := TransformValidator(tc.ds.DaemonSet, tc.cpSpec, ClusterPolicyController{runtime: gpuv1.Containerd, logger: ctrl.Log.WithName("test")}) + err := TransformValidator(tc.ds.DaemonSet, tc.cpSpec, ClusterPolicyController{ + runtime: gpuv1.Containerd, + logger: ctrl.Log.WithName("test"), + singleton: &gpuv1.ClusterPolicy{Spec: *tc.cpSpec}, + }) if tc.errorExpected { require.Error(t, err) return From 7a07a99c5d4b05760cf6b908cf62864fc19e5ca1 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Tue, 18 Feb 2025 15:35:26 -0800 Subject: [PATCH 4/6] Make the cdi.default field a no-op Signed-off-by: Christopher Desiniotis --- api/nvidia/v1/clusterpolicy_types.go | 10 ++++---- ...rator-certified.clusterserviceversion.yaml | 14 +++++++++++ .../manifests/nvidia.com_clusterpolicies.yaml | 9 ++++--- .../crd/bases/nvidia.com_clusterpolicies.yaml | 9 ++++--- controllers/object_controls.go | 5 +--- controllers/transforms_test.go | 25 ++----------------- .../crds/nvidia.com_clusterpolicies.yaml | 9 ++++--- 7 files changed, 37 insertions(+), 44 deletions(-) diff --git a/api/nvidia/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go index 2e7612cf3..c1f17022a 100644 --- a/api/nvidia/v1/clusterpolicy_types.go +++ b/api/nvidia/v1/clusterpolicy_types.go @@ -1632,20 +1632,20 @@ type VGPUDevicesConfigSpec struct { // CDIConfigSpec defines how the Container Device Interface is used in the cluster. type CDIConfigSpec struct { - // Enabled indicates whether CDI can be used to make GPUs accessible to containers. + // Enabled indicates whether CDI should be used as the mechanism for making GPUs accessible to containers. // +kubebuilder:validation:Optional // +kubebuilder:default=false // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true - // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable CDI as a mechanism for making GPUs accessible to containers" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable CDI as the mechanism for making GPUs accessible to containers" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" Enabled *bool `json:"enabled,omitempty"` - // Default indicates whether to use CDI as the default mechanism for providing GPU access to containers. + // Deprecated: This field is no longer used. Setting cdi.enabled=true will configure CDI as the default mechanism for making GPUs accessible to containers. // +kubebuilder:validation:Optional // +kubebuilder:default=false // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true - // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Configure CDI as the default mechanism for making GPUs accessible to containers" - // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Deprecated: This field is no longer used" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch,urn:alm:descriptor:com.tectonic.ui:hidden" Default *bool `json:"default,omitempty"` } diff --git a/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml index 0f8d48728..0836da07a 100644 --- a/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml +++ b/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml @@ -536,6 +536,20 @@ spec: path: toolkit.imagePullPolicy x-descriptors: - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy' + - displayName: CDI + description: Container Device Interface (CDI) Configuration + path: cdi + - displayName: Enabled + description: 'Enabled indicates whether CDI should be used as the mechanism for making GPUs accessible to containers.' + path: cdi.enabled + x-descriptors: + - 'urn:alm:descriptor:com.tectonic.ui:booleanSwitch' + - displayName: Default + description: 'Deprecated: This field is no longer used. Setting cdi.enabled=true will configure CDI as the default mechanism for making GPUs accessible to containers.' + path: cdi.default + x-descriptors: + - 'urn:alm:descriptor:com.tectonic.ui:hidden' + - 'urn:alm:descriptor:com.tectonic.ui:booleanSwitch' - displayName: NVIDIA DCGM config description: NVIDIA DCGM config path: dcgm diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index dfa368e9c..ffe836e7d 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -136,13 +136,14 @@ spec: properties: default: default: false - description: Default indicates whether to use CDI as the default - mechanism for providing GPU access to containers. + description: 'Deprecated: This field is no longer used. Setting + cdi.enabled=true will configure CDI as the default mechanism + for making GPUs accessible to containers.' type: boolean enabled: default: false - description: Enabled indicates whether CDI can be used to make - GPUs accessible to containers. + description: Enabled indicates whether CDI should be used as the + mechanism for making GPUs accessible to containers. type: boolean type: object daemonsets: diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index dfa368e9c..ffe836e7d 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -136,13 +136,14 @@ spec: properties: default: default: false - description: Default indicates whether to use CDI as the default - mechanism for providing GPU access to containers. + description: 'Deprecated: This field is no longer used. Setting + cdi.enabled=true will configure CDI as the default mechanism + for making GPUs accessible to containers.' type: boolean enabled: default: false - description: Enabled indicates whether CDI can be used to make - GPUs accessible to containers. + description: Enabled indicates whether CDI should be used as the + mechanism for making GPUs accessible to containers. type: boolean type: object daemonsets: diff --git a/controllers/object_controls.go b/controllers/object_controls.go index cf864c5b7..5f4fc5608 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -1204,15 +1204,12 @@ func transformToolkitForCDI(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySp setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true") setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CrioConfigModeEnvName, "config") + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeModeEnvName, "cdi") if !n.runtimeSupportsCDI { setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeCDIPrefixesEnvName, "nvidia.cdi.k8s.io/") } - if config.CDI.IsDefault() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeModeEnvName, "cdi") - } - // When the container runtime supports CDI, we do not configure 'nvidia' as the default runtime. // Instead, we leverage native CDI support in containerd / cri-o to inject GPUs into workloads. // The 'nvidia' runtime will be set as the runtime class for our management containers so that they diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 06a7e9505..07e7ea05d 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -1202,38 +1202,17 @@ func TestTransformToolkitForCDI(t *testing.T) { Env: []corev1.EnvVar{ {Name: CDIEnabledEnvName, Value: "true"}, {Name: CrioConfigModeEnvName, Value: "config"}, - {Name: NvidiaCtrRuntimeCDIPrefixesEnvName, Value: "nvidia.cdi.k8s.io/"}, - }, - }), - }, - { - description: "cdi enabled, cdi default, runtime does not support cdi", - ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), - cpSpec: &gpuv1.ClusterPolicySpec{ - CDI: gpuv1.CDIConfigSpec{ - Enabled: newBoolPtr(true), - Default: newBoolPtr(true), - }, - }, - ctrl: ClusterPolicyController{}, - expectedDs: NewDaemonset().WithContainer( - corev1.Container{ - Name: "main-ctr", - Env: []corev1.EnvVar{ - {Name: CDIEnabledEnvName, Value: "true"}, - {Name: CrioConfigModeEnvName, Value: "config"}, - {Name: NvidiaCtrRuntimeCDIPrefixesEnvName, Value: "nvidia.cdi.k8s.io/"}, {Name: NvidiaCtrRuntimeModeEnvName, Value: "cdi"}, + {Name: NvidiaCtrRuntimeCDIPrefixesEnvName, Value: "nvidia.cdi.k8s.io/"}, }, }), }, { - description: "cdi enabled, cdi default, runtime supports cdi", + description: "cdi enabled, runtime supports cdi", ds: NewDaemonset().WithContainer(corev1.Container{Name: "main-ctr"}), cpSpec: &gpuv1.ClusterPolicySpec{ CDI: gpuv1.CDIConfigSpec{ Enabled: newBoolPtr(true), - Default: newBoolPtr(true), }, }, ctrl: ClusterPolicyController{ diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml index dfa368e9c..ffe836e7d 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml @@ -136,13 +136,14 @@ spec: properties: default: default: false - description: Default indicates whether to use CDI as the default - mechanism for providing GPU access to containers. + description: 'Deprecated: This field is no longer used. Setting + cdi.enabled=true will configure CDI as the default mechanism + for making GPUs accessible to containers.' type: boolean enabled: default: false - description: Enabled indicates whether CDI can be used to make - GPUs accessible to containers. + description: Enabled indicates whether CDI should be used as the + mechanism for making GPUs accessible to containers. type: boolean type: object daemonsets: From 78007bc160b8e6f6681d807c859c18a629de08ff Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Tue, 18 Feb 2025 16:01:22 -0800 Subject: [PATCH 5/6] Add e2e test for CDI Signed-off-by: Christopher Desiniotis --- tests/scripts/enable-cdi.sh | 12 ++++++++++++ tests/scripts/end-to-end.sh | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100755 tests/scripts/enable-cdi.sh diff --git a/tests/scripts/enable-cdi.sh b/tests/scripts/enable-cdi.sh new file mode 100755 index 000000000..3647aa5e1 --- /dev/null +++ b/tests/scripts/enable-cdi.sh @@ -0,0 +1,12 @@ +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source ${SCRIPT_DIR}/.definitions.sh + +# Import the check definitions +source ${SCRIPT_DIR}/checks.sh + +kubectl patch clusterpolicy/cluster-policy --type='json' -p='[{"op": "replace", "path": "/spec/cdi/enabled", "value": true}]' +if [ "$?" -ne 0 ]; then + echo "failed to enable CDI in clusterpolicy" + exit 1 +fi +sleep 5 diff --git a/tests/scripts/end-to-end.sh b/tests/scripts/end-to-end.sh index 46fceb949..39c384dd9 100755 --- a/tests/scripts/end-to-end.sh +++ b/tests/scripts/end-to-end.sh @@ -19,6 +19,7 @@ echo "-------------------------------------------------------------------------- # Install a workload and verify that this works as expected "${SCRIPT_DIR}"/install-workload.sh "${SCRIPT_DIR}"/verify-workload.sh +"${SCRIPT_DIR}"/uninstall-workload.sh echo "" echo "" @@ -27,6 +28,16 @@ echo "-------------------------------------------------------------------------- # Test updates through ClusterPolicy "${SCRIPT_DIR}"/update-clusterpolicy.sh +echo "" +echo "" +echo "--------------CDI Tests--------------------------------------------" +echo "------------------------------------------------------------------------------------" +"${SCRIPT_DIR}"/enable-cdi.sh +"${SCRIPT_DIR}"/verify-operator.sh +"${SCRIPT_DIR}"/install-workload.sh +"${SCRIPT_DIR}"/verify-workload.sh +"${SCRIPT_DIR}"/uninstall-workload.sh + echo "" echo "" echo "--------------GPU Operator Restart Test---------------------------------------------" @@ -43,8 +54,7 @@ test_restart_operator ${TEST_NAMESPACE} ${CONTAINER_RUNTIME} "${SCRIPT_DIR}"/enable-operands.sh "${SCRIPT_DIR}"/verify-operator.sh -# Uninstall the workload and operator -"${SCRIPT_DIR}"/uninstall-workload.sh +# Uninstall the operator "${SCRIPT_DIR}"/uninstall-operator.sh echo "" From 0c440fb569b001cb2062393ad4ec0fd21a5b41a3 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Thu, 20 Feb 2025 08:55:37 -0800 Subject: [PATCH 6/6] Use cdi-cri device-list-strategy by default when CDI is enabled Signed-off-by: Christopher Desiniotis --- controllers/object_controls.go | 6 +++--- controllers/transforms_test.go | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 5f4fc5608..0b8c57552 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -1380,9 +1380,9 @@ func transformDevicePluginForCDI(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol } // When the container runtime supports CDI, we leverage native CDI support in container / cri-o - // to inject GPUs into workloads. In this case, we use the standard CDI annotation prefix 'cdi.k8s.io/' - // instead of a custom annotation prefix that the 'nvidia' container runtime is configured to look for. - deviceListStrategy := "cdi-annotations" + // to inject GPUs into workloads. If native CDI is not supported, we leverage CDI support in + // NVIDIA Container Toolkit. + deviceListStrategy := "cdi-cri" cdiAnnotationPrefix := "cdi.k8s.io/" if !n.runtimeSupportsCDI { deviceListStrategy = "envvar,cdi-annotations" diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 07e7ea05d..14458d6b0 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -1277,7 +1277,7 @@ func TestTransformDevicePluginForCDI(t *testing.T) { Name: "main-ctr", Env: []corev1.EnvVar{ {Name: CDIEnabledEnvName, Value: "true"}, - {Name: DeviceListStrategyEnvName, Value: "cdi-annotations"}, + {Name: DeviceListStrategyEnvName, Value: "cdi-cri"}, {Name: CDIAnnotationPrefixEnvName, Value: "cdi.k8s.io/"}, }, }), @@ -1327,7 +1327,7 @@ func TestTransformDevicePluginForCDI(t *testing.T) { Env: []corev1.EnvVar{ {Name: CDIEnabledEnvName, Value: "true"}, {Name: NvidiaCDIHookPathEnvName, Value: "/path/to/install/toolkit/nvidia-cdi-hook"}, - {Name: DeviceListStrategyEnvName, Value: "cdi-annotations"}, + {Name: DeviceListStrategyEnvName, Value: "cdi-cri"}, {Name: CDIAnnotationPrefixEnvName, Value: "cdi.k8s.io/"}, }, }),