Skip to content

Commit 0ff9c0c

Browse files
authored
Changed shared GPU configmap name suffix from runai-sh-gpu to shared-gpu (#283)
* Changed shared GPU configmap name suffix from runai-sh-gpu to shared-gpu * Changed sharedGPUConfigMapNamePrefix to sharedGPUConfigMapAnnotation
1 parent f26ca5d commit 0ff9c0c

File tree

9 files changed

+25
-318
lines changed

9 files changed

+25
-318
lines changed

cmd/podgroupcontroller/app/app.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func Run() error {
6565
cacheOptions := cache.Options{}
6666
cacheOptions.ByObject = map[client.Object]cache.ByObject{
6767
&v1.Pod{}: {Field: schedulerSelector},
68-
&v1.Node{}: {}, // TODO: filter by strict/non-strict runai nodes
68+
&v1.Node{}: {},
6969
&schedulingv1.PriorityClass{}: {},
7070
&v2alpha2.PodGroup{}: {},
7171
}

pkg/binder/binding/fraction_binder_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ var happyFlowObjectsBc = []runtime.Object{
4646
ValueFrom: &v1.EnvVarSource{
4747
ConfigMapKeyRef: &v1.ConfigMapKeySelector{
4848
LocalObjectReference: v1.LocalObjectReference{
49-
Name: "my-configmap-runai-sh-gpu",
49+
Name: "my-configmap-shared-gpu",
5050
},
5151
},
5252
},
@@ -58,7 +58,7 @@ var happyFlowObjectsBc = []runtime.Object{
5858
VolumeSource: v1.VolumeSource{
5959
ConfigMap: &v1.ConfigMapVolumeSource{
6060
LocalObjectReference: v1.LocalObjectReference{
61-
Name: "my-configmap-runai-sh-gpu",
61+
Name: "my-configmap-shared-gpu",
6262
},
6363
},
6464
},
@@ -102,7 +102,7 @@ var happyFlowObjects = []runtime.Object{
102102
VolumeSource: v1.VolumeSource{
103103
ConfigMap: &v1.ConfigMapVolumeSource{
104104
LocalObjectReference: v1.LocalObjectReference{
105-
Name: "my-configmap-runai-sh-gpu",
105+
Name: "my-configmap-shared-gpu",
106106
},
107107
},
108108
},
@@ -209,7 +209,7 @@ var _ = Describe("FractionBinder", func() {
209209

210210
configMap := &v1.ConfigMap{
211211
ObjectMeta: metav1.ObjectMeta{
212-
Name: "my-configmap-runai-sh-gpu",
212+
Name: "my-configmap-shared-gpu",
213213
Namespace: "my-ns",
214214
},
215215
}

pkg/binder/common/gpusharingconfigmap/config_map.go

Lines changed: 10 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"context"
88
"fmt"
99
"strconv"
10-
"strings"
1110

1211
v1 "k8s.io/api/core/v1"
1312
"k8s.io/apimachinery/pkg/api/errors"
@@ -18,11 +17,11 @@ import (
1817
)
1918

2019
const (
21-
GPUSharingConfigMap = "runai-sh-gpu"
22-
DesiredConfigMapPrefixKey = "runai/shared-gpu-configmap"
23-
maxVolumeNameLength = 63
24-
configMapNameNumRandomChars = 7
25-
configMapNameExtraChars = configMapNameNumRandomChars + 6
20+
gpuSharingConfigMapAnnotation = "runai/shared-gpu-configmap"
21+
gpuSharingConfigMap = "shared-gpu"
22+
maxVolumeNameLength = 63
23+
configMapNameNumRandomChars = 7
24+
configMapNameExtraChars = configMapNameNumRandomChars + 6
2625
)
2726

2827
func UpsertJobConfigMap(ctx context.Context,
@@ -107,7 +106,7 @@ func patchConfigMap(
107106
}
108107

109108
func SetGpuCapabilitiesConfigMapName(pod *v1.Pod, containerIndex int, containerType ContainerType) string {
110-
namePrefix, found := pod.Annotations[DesiredConfigMapPrefixKey]
109+
namePrefix, found := pod.Annotations[gpuSharingConfigMapAnnotation]
111110
if !found {
112111
namePrefix = generateConfigMapNamePrefix(pod, containerIndex)
113112
setConfigMapNameAnnotation(pod, namePrefix)
@@ -128,7 +127,7 @@ func generateConfigMapNamePrefix(pod *v1.Pod, containerIndex int) string {
128127
}
129128
// volume name is the `${configMapName}-vol` and should be up to 63 bytes long,
130129
// 4 for "-vol" , 7 random chars, and 2 hyphens - 13 in total
131-
maxBaseNameLength := (maxVolumeNameLength - configMapNameExtraChars) - len(GPUSharingConfigMap)
130+
maxBaseNameLength := (maxVolumeNameLength - configMapNameExtraChars) - len(gpuSharingConfigMap)
132131
// also remove from the max length for "-{containerIndex}" or "-i{initContainerIndex}" in the name
133132
maxBaseNameLength = maxBaseNameLength - len(strconv.Itoa(containerIndex)) - 1
134133
// also allow for appending "-evar" in case of envFrom config map
@@ -137,7 +136,7 @@ func generateConfigMapNamePrefix(pod *v1.Pod, containerIndex int) string {
137136
baseName = baseName[:maxBaseNameLength]
138137
}
139138
return fmt.Sprintf("%v-%v-%v", baseName,
140-
utilrand.String(configMapNameNumRandomChars), GPUSharingConfigMap)
139+
utilrand.String(configMapNameNumRandomChars), gpuSharingConfigMap)
141140
}
142141

143142
func ExtractCapabilitiesConfigMapName(pod *v1.Pod, containerIndex int, containerType ContainerType) (string, error) {
@@ -146,7 +145,7 @@ func ExtractCapabilitiesConfigMapName(pod *v1.Pod, containerIndex int, container
146145
containerIndexStr = "i" + containerIndexStr
147146
}
148147

149-
namePrefix, found := pod.Annotations[DesiredConfigMapPrefixKey]
148+
namePrefix, found := pod.Annotations[gpuSharingConfigMapAnnotation]
150149
if !found {
151150
return "", fmt.Errorf("no desired configmap name found in pod %s/%s annotations", pod.Namespace, pod.Name)
152151
}
@@ -162,59 +161,11 @@ func ExtractDirectEnvVarsConfigMapName(pod *v1.Pod, containerIndex int, containe
162161
return fmt.Sprintf("%s-evar", configNameBase), nil
163162
}
164163

165-
func HandleBCPod(ctx context.Context, kubeClient client.Client, pod *v1.Pod) (string, error) {
166-
logger := log.FromContext(ctx)
167-
desiredConfigMapName, err := GetDesiredConfigMapNameBC(pod)
168-
if err != nil {
169-
return "", err
170-
}
171-
logger.Info("Desired configmap name for backwards compatibility pod",
172-
"namespace", pod.Namespace, "name", pod.Name, "configMapName", desiredConfigMapName)
173-
err = UpdateBCPod(ctx, kubeClient, pod, desiredConfigMapName)
174-
return desiredConfigMapName, err
175-
}
176-
177-
func GetDesiredConfigMapNameBC(pod *v1.Pod) (string, error) {
178-
cmName := ""
179-
for _, volume := range pod.Spec.Volumes {
180-
if volume.ConfigMap == nil {
181-
continue
182-
}
183-
184-
possibleCmName := volume.ConfigMap.LocalObjectReference.Name
185-
if strings.HasSuffix(possibleCmName, GPUSharingConfigMap) {
186-
if cmName != "" {
187-
return "", fmt.Errorf("multiple desired gpu sharing configmap volumes detected for backwards "+
188-
"compatibility pod %s/%s", pod.Namespace, pod.Name)
189-
}
190-
cmName = possibleCmName
191-
}
192-
}
193-
194-
if cmName == "" {
195-
return "", fmt.Errorf("no desired gpu sharing configmap volume detected for backwards compatibility pod "+
196-
"%s/%s", pod.Namespace, pod.Name)
197-
}
198-
199-
return cmName, nil
200-
}
201-
202-
func UpdateBCPod(ctx context.Context, kubeClient client.Client, pod *v1.Pod, desiredConfigMapName string) error {
203-
updatedPod := pod.DeepCopy()
204-
setConfigMapNameAnnotation(updatedPod, desiredConfigMapName)
205-
err := kubeClient.Patch(ctx, updatedPod, client.MergeFrom(pod))
206-
if err != nil {
207-
return fmt.Errorf("failed to update pod %v/%v with desired configmap name %v, error: %v",
208-
pod.Namespace, pod.Name, desiredConfigMapName, err)
209-
}
210-
return nil
211-
}
212-
213164
func setConfigMapNameAnnotation(pod *v1.Pod, name string) {
214165
if pod.Annotations == nil {
215166
pod.Annotations = map[string]string{}
216167
}
217-
pod.Annotations[DesiredConfigMapPrefixKey] = name
168+
pod.Annotations[gpuSharingConfigMapAnnotation] = name
218169
}
219170

220171
// ownerReferencesDifferent compares two OwnerReferences and returns true if they are not the same

0 commit comments

Comments
 (0)