Skip to content

Commit f9c0086

Browse files
authored
Deprecated RUNAI_VISIBLE_DEVICES key from GPU sharing configmap (#302)
Deprecated RUNAI_VISIBLE_DEVICES key from GPU sharing configmap
1 parent 827374c commit f9c0086

File tree

5 files changed

+17
-12
lines changed

5 files changed

+17
-12
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
66

77
## [Unreleased]
88

9+
### Changed
10+
- Changed RUNAI-VISIBLE-DEVICES key in GPU sharing configmap to NVIDIA_VISIBLE_DEVICES
11+
912
## [v0.7.3] - 2025-07-08
1013

1114
### Removed

pkg/binder/binding/default_binder_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ func TestBindApplyResourceReceivedType(t *testing.T) {
110110
Name: common.NvidiaVisibleDevices,
111111
ValueFrom: &v1.EnvVarSource{
112112
ConfigMapKeyRef: &v1.ConfigMapKeySelector{
113-
Key: common.VisibleDevices,
113+
Key: common.NvidiaVisibleDevices,
114114
LocalObjectReference: v1.LocalObjectReference{
115115
Name: "my-config-0",
116116
},

pkg/binder/binding/fraction_binder_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ var _ = Describe("FractionBinder", func() {
216216
if err := fakeClient.Get(context.TODO(), client.ObjectKeyFromObject(configMap), configMap); err != nil {
217217
Fail(fmt.Sprintf("Failed to read configmap: %v", err))
218218
} else {
219-
Expect(configMap.Data[common.VisibleDevices]).To(Equal(testData.gpuIndexByGroupIndex))
219+
Expect(configMap.Data[common.NvidiaVisibleDevices]).To(Equal(testData.gpuIndexByGroupIndex))
220220
Expect(configMap.Data[common.NumOfGpusEnvVar]).To(Equal("0.5"))
221221
}
222222
})

pkg/binder/common/constants.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ package common
55

66
const (
77
NvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
8-
VisibleDevices = "RUNAI-VISIBLE-DEVICES"
98
NumOfGpusEnvVar = "RUNAI_NUM_OF_GPUS"
109
ReceivedTypeFraction = "Fraction"
1110
ReceivedTypeRegular = "Regular"

pkg/binder/common/gpu_access.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@ import (
1515
"github.com/NVIDIA/KAI-scheduler/pkg/binder/common/gpusharingconfigmap"
1616
)
1717

18+
const (
19+
visibleDevicesBC = "RUNAI-VISIBLE-DEVICES" // Deprecated, this value was replaced with NVIDIA_VISIBLE_DEVICES
20+
)
21+
1822
func AddVisibleDevicesEnvVars(container *v1.Container, sharedGpuConfigMapName string) {
1923
AddEnvVarToContainer(container, v1.EnvVar{
2024
Name: NvidiaVisibleDevices,
2125
ValueFrom: &v1.EnvVarSource{
2226
ConfigMapKeyRef: &v1.ConfigMapKeySelector{
23-
Key: VisibleDevices,
27+
Key: NvidiaVisibleDevices,
2428
LocalObjectReference: v1.LocalObjectReference{
2529
Name: sharedGpuConfigMapName,
2630
},
@@ -58,20 +62,19 @@ func SetNvidiaVisibleDevices(
5862
var updateFunc func(data map[string]string) error
5963
if nvidiaVisibleDevicesDefinedInSpec {
6064
configMapName, err = gpusharingconfigmap.ExtractCapabilitiesConfigMapName(pod, containerRef)
61-
updateFunc = func(data map[string]string) error {
62-
data[VisibleDevices] = visibleDevicesValue
63-
return nil
64-
}
6565
} else {
6666
configMapName, err = gpusharingconfigmap.ExtractDirectEnvVarsConfigMapName(pod, containerRef)
67-
updateFunc = func(data map[string]string) error {
68-
data[NvidiaVisibleDevices] = visibleDevicesValue
69-
return nil
70-
}
7167
}
7268
if err != nil {
7369
return err
7470
}
71+
updateFunc = func(data map[string]string) error {
72+
if _, found := data[visibleDevicesBC]; found {
73+
data[visibleDevicesBC] = visibleDevicesValue
74+
}
75+
data[NvidiaVisibleDevices] = visibleDevicesValue
76+
return nil
77+
}
7578
err = UpdateConfigMapEnvironmentVariable(ctx, kubeClient, pod, configMapName, updateFunc)
7679
if err != nil {
7780
return fmt.Errorf("failed to update gpu sharing configmap %s for pod <%s/%s>: %v",

0 commit comments

Comments
 (0)