Skip to content

Commit 435a38f

Browse files
committed
Support for the DevicePluginCDIDevices feature
This patch adds support for the `DevicePluginCDIDevices` feature gate by adding `spec.operator.useDevicePluginCDIDevicesFeature` to `ClusterPolicy`. When this field is set, the operator sets the `DEVICE_LIST_STRATEGY` device plug-in environment variable to `cdi-cri`. Signed-off-by: Jean-Francois Roy <[email protected]>
1 parent 0d77853 commit 435a38f

File tree

8 files changed

+39
-1
lines changed

8 files changed

+39
-1
lines changed

api/nvidia/v1/clusterpolicy_types.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ type OperatorSpec struct {
148148
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="On OpenShift, enable DriverToolkit image to build and install driver modules"
149149
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
150150
UseOpenShiftDriverToolkit *bool `json:"use_ocp_driver_toolkit,omitempty"`
151+
152+
// UseDevicePluginCDIDevicesFeature indicates if the device plug-in should be configured to use the CDI devices feature
153+
UseDevicePluginCDIDevicesFeature *bool `json:"useDevicePluginCDIDevicesFeature,omitempty"`
151154
}
152155

153156
// HostPathsSpec defines various paths on the host needed by GPU Operator components
@@ -1827,6 +1830,15 @@ func ImagePullPolicy(pullPolicy string) corev1.PullPolicy {
18271830
return imagePullPolicy
18281831
}
18291832

1833+
// DevicePluginCDIDevicesFeatureEnabled returns true if use DevicePluginCDIDevices feature is enabled
1834+
func (s *OperatorSpec) DevicePluginCDIDevicesFeatureEnabled() bool {
1835+
if s.UseDevicePluginCDIDevicesFeature == nil {
1836+
// default is false if not specified by user
1837+
return false
1838+
}
1839+
return *s.UseDevicePluginCDIDevicesFeature
1840+
}
1841+
18301842
// IsEnabled returns true if driver install is enabled(default) through gpu-operator
18311843
func (d *DriverSpec) IsEnabled() bool {
18321844
if d.Enabled == nil {

api/nvidia/v1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/nvidia.com_clusterpolicies.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1558,6 +1558,10 @@ spec:
15581558
image should be used on OpenShift to build and install driver
15591559
modules
15601560
type: boolean
1561+
useDevicePluginCDIDevicesFeature:
1562+
description: UseDevicePluginCDIDevicesFeature indicates if the device plug-in
1563+
should be configured to use the CDI devices feature
1564+
type: boolean
15611565
required:
15621566
- defaultRuntime
15631567
type: object

config/crd/bases/nvidia.com_clusterpolicies.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1558,6 +1558,10 @@ spec:
15581558
image should be used on OpenShift to build and install driver
15591559
modules
15601560
type: boolean
1561+
useDevicePluginCDIDevicesFeature:
1562+
description: UseDevicePluginCDIDevicesFeature indicates if the
1563+
device plug-in should be configured to use the CDI devices feature
1564+
type: boolean
15611565
required:
15621566
- defaultRuntime
15631567
type: object

controllers/object_controls.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1388,7 +1388,11 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
13881388
// update env required for CDI support
13891389
if config.CDI.IsEnabled() {
13901390
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true")
1391-
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations")
1391+
if config.Operator.DevicePluginCDIDevicesFeatureEnabled() {
1392+
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "cdi-cri")
1393+
} else {
1394+
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations")
1395+
}
13921396
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIAnnotationPrefixEnvName, "nvidia.cdi.k8s.io/")
13931397
if config.Toolkit.IsEnabled() {
13941398
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCDIHookPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-cdi-hook"))

deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1558,6 +1558,10 @@ spec:
15581558
image should be used on OpenShift to build and install driver
15591559
modules
15601560
type: boolean
1561+
useDevicePluginCDIDevicesFeature:
1562+
description: UseDevicePluginCDIDevicesFeature indicates if the device plug-in
1563+
should be configured to use the CDI devices feature
1564+
type: boolean
15611565
required:
15621566
- defaultRuntime
15631567
type: object

deployments/gpu-operator/templates/clusterpolicy.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ spec:
4646
{{- if .Values.operator.use_ocp_driver_toolkit }}
4747
use_ocp_driver_toolkit: {{ .Values.operator.use_ocp_driver_toolkit }}
4848
{{- end }}
49+
{{- if .Values.operator.useDevicePluginCDIDevicesFeature }}
50+
useDevicePluginCDIDevicesFeature: {{ .Values.operator.useDevicePluginCDIDevicesFeature }}
51+
{{- end }}
4952
daemonsets:
5053
labels:
5154
{{- include "gpu-operator.operand-labels" . | nindent 6 }}

deployments/gpu-operator/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ operator:
8080
# upgrade CRD on chart upgrade, requires --disable-openapi-validation flag
8181
# to be passed during helm upgrade.
8282
upgradeCRD: false
83+
# use DevicePluginCDIDevices feature
84+
useDevicePluginCDIDevicesFeature: false
8385
initContainer:
8486
image: cuda
8587
repository: nvcr.io/nvidia

0 commit comments

Comments
 (0)