Skip to content

Commit 39e13ab

Browse files
authored
Merge pull request #1578 from NVIDIA/cdi-by-default
Use native CDI by default as the mechanism for injecting GPUs into workload containers
2 parents 5a17101 + ea036db commit 39e13ab

File tree

10 files changed

+258
-89
lines changed

10 files changed

+258
-89
lines changed

api/nvidia/v1/clusterpolicy_types.go

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1658,20 +1658,20 @@ type VGPUDevicesConfigSpec struct {
16581658

16591659
// CDIConfigSpec defines how the Container Device Interface is used in the cluster.
16601660
type CDIConfigSpec struct {
1661-
// Enabled indicates whether CDI can be used to make GPUs accessible to containers.
1661+
// Enabled indicates whether the Container Device Interface (CDI) should be used as the mechanism for making GPUs accessible to containers.
16621662
// +kubebuilder:validation:Optional
1663-
// +kubebuilder:default=false
1663+
// +kubebuilder:default=true
16641664
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
1665-
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable CDI as a mechanism for making GPUs accessible to containers"
1665+
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable CDI as the mechanism for making GPUs accessible to containers"
16661666
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
16671667
Enabled *bool `json:"enabled,omitempty"`
16681668

1669-
// Default indicates whether to use CDI as the default mechanism for providing GPU access to containers.
1669+
// Deprecated: This field is no longer used. Setting cdi.enabled=true will configure CDI as the default mechanism for making GPUs accessible to containers.
16701670
// +kubebuilder:validation:Optional
16711671
// +kubebuilder:default=false
16721672
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
1673-
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Configure CDI as the default mechanism for making GPUs accessible to containers"
1674-
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
1673+
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Deprecated: This field is no longer used"
1674+
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch,urn:alm:descriptor:com.tectonic.ui:hidden"
16751675
Default *bool `json:"default,omitempty"`
16761676
}
16771677

@@ -2070,20 +2070,11 @@ func (l *DriverLicensingConfigSpec) IsNLSEnabled() bool {
20702070
// providing GPU access to containers
20712071
func (c *CDIConfigSpec) IsEnabled() bool {
20722072
if c.Enabled == nil {
2073-
return false
2073+
return true
20742074
}
20752075
return *c.Enabled
20762076
}
20772077

2078-
// IsDefault returns true if CDI is enabled as the default
2079-
// mechanism for providing GPU access to containers
2080-
func (c *CDIConfigSpec) IsDefault() bool {
2081-
if c.Default == nil {
2082-
return false
2083-
}
2084-
return *c.Default
2085-
}
2086-
20872078
// IsEnabled returns true if Kata Manager is enabled
20882079
func (k *KataManagerSpec) IsEnabled() bool {
20892080
if k.Enabled == nil {

bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ metadata:
3434
"initContainer": {
3535
}
3636
},
37+
"cdi": {
38+
"enabled": true
39+
},
3740
"sandboxWorkloads": {
3841
"enabled": false,
3942
"defaultWorkload": "container"
@@ -531,6 +534,20 @@ spec:
531534
path: toolkit.imagePullPolicy
532535
x-descriptors:
533536
- 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
537+
- displayName: CDI
538+
description: Container Device Interface (CDI) Configuration
539+
path: cdi
540+
- displayName: Enabled
541+
description: 'Enabled indicates whether CDI should be used as the mechanism for making GPUs accessible to containers.'
542+
path: cdi.enabled
543+
x-descriptors:
544+
- 'urn:alm:descriptor:com.tectonic.ui:booleanSwitch'
545+
- displayName: Default
546+
description: 'Deprecated: This field is no longer used. Setting cdi.enabled=true will configure CDI as the default mechanism for making GPUs accessible to containers.'
547+
path: cdi.default
548+
x-descriptors:
549+
- 'urn:alm:descriptor:com.tectonic.ui:hidden'
550+
- 'urn:alm:descriptor:com.tectonic.ui:booleanSwitch'
534551
- displayName: NVIDIA DCGM config
535552
description: NVIDIA DCGM config
536553
path: dcgm

bundle/manifests/nvidia.com_clusterpolicies.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,15 @@ spec:
136136
properties:
137137
default:
138138
default: false
139-
description: Default indicates whether to use CDI as the default
140-
mechanism for providing GPU access to containers.
139+
description: 'Deprecated: This field is no longer used. Setting
140+
cdi.enabled=true will configure CDI as the default mechanism
141+
for making GPUs accessible to containers.'
141142
type: boolean
142143
enabled:
143-
default: false
144-
description: Enabled indicates whether CDI can be used to make
145-
GPUs accessible to containers.
144+
default: true
145+
description: Enabled indicates whether the Container Device Interface
146+
(CDI) should be used as the mechanism for making GPUs accessible
147+
to containers.
146148
type: boolean
147149
type: object
148150
daemonsets:

config/crd/bases/nvidia.com_clusterpolicies.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,15 @@ spec:
136136
properties:
137137
default:
138138
default: false
139-
description: Default indicates whether to use CDI as the default
140-
mechanism for providing GPU access to containers.
139+
description: 'Deprecated: This field is no longer used. Setting
140+
cdi.enabled=true will configure CDI as the default mechanism
141+
for making GPUs accessible to containers.'
141142
type: boolean
142143
enabled:
143-
default: false
144-
description: Enabled indicates whether CDI can be used to make
145-
GPUs accessible to containers.
144+
default: true
145+
description: Enabled indicates whether the Container Device Interface
146+
(CDI) should be used as the mechanism for making GPUs accessible
147+
to containers.
146148
type: boolean
147149
type: object
148150
daemonsets:

0 commit comments

Comments
 (0)