Skip to content

Commit 7894d92

Browse files
committed
fix: volume names getting longer than DNS_LABEL
1 parent b3aac37 commit 7894d92

File tree

2 files changed

+167
-43
lines changed

2 files changed

+167
-43
lines changed

operator/internal/controller/skyhook_controller.go

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,24 @@ func ptr[E any](e E) *E {
777777
return &e
778778
}
779779

780+
// generateSafeName generates a consistent name for Kubernetes resources that is unique
781+
// while staying within the specified character limit
782+
func generateSafeName(maxLen int, nameParts ...string) string {
783+
name := strings.Join(nameParts, "-")
784+
// Replace dots with dashes as they're not allowed in resource names
785+
name = strings.ReplaceAll(name, ".", "-")
786+
787+
unique := sha256.Sum256([]byte(name))
788+
uniqueStr := hex.EncodeToString(unique[:])[:8]
789+
790+
maxlen := maxLen - len(uniqueStr) - 1
791+
if len(name) > maxlen {
792+
name = name[:maxlen]
793+
}
794+
795+
return strings.ToLower(fmt.Sprintf("%s-%s", name, uniqueStr))
796+
}
797+
780798
func (r *SkyhookReconciler) UpsertNodeLabelsAnnotations(ctx context.Context, skyhook *wrapper.Skyhook, node *corev1.Node) error {
781799
// No work to do if there is no labels or annotations for node
782800
if len(node.Labels) == 0 && len(node.Annotations) == 0 {
@@ -793,11 +811,10 @@ func (r *SkyhookReconciler) UpsertNodeLabelsAnnotations(ctx context.Context, sky
793811
return fmt.Errorf("error converting labels into byte array: %w", err)
794812
}
795813

796-
// node names in different CSPs might include dots which isn't allowed in configmap names
797-
// so we have to replace all dots with dashes
814+
configMapName := generateSafeName(253, skyhook.Name, node.Name, "metadata")
798815
newCM := &corev1.ConfigMap{
799816
ObjectMeta: metav1.ObjectMeta{
800-
Name: strings.ReplaceAll(fmt.Sprintf("%s-%s-metadata", skyhook.Name, node.Name), ".", "-"),
817+
Name: configMapName,
801818
Namespace: r.opts.Namespace,
802819
Labels: map[string]string{
803820
fmt.Sprintf("%s/skyhook-node-meta", v1alpha1.METADATA_PREFIX): skyhook.Name,
@@ -818,7 +835,7 @@ func (r *SkyhookReconciler) UpsertNodeLabelsAnnotations(ctx context.Context, sky
818835
}
819836

820837
existingConfigMap := &corev1.ConfigMap{}
821-
err = r.Get(ctx, client.ObjectKey{Namespace: r.opts.Namespace, Name: strings.ReplaceAll(fmt.Sprintf("%s-%s-metadata", skyhook.Name, node.Name), ".", "-")}, existingConfigMap)
838+
err = r.Get(ctx, client.ObjectKey{Namespace: r.opts.Namespace, Name: configMapName}, existingConfigMap)
822839
if err != nil {
823840
if apierrors.IsNotFound(err) {
824841
// create
@@ -1355,7 +1372,7 @@ func (r *SkyhookReconciler) ValidateNodeConfigmaps(ctx context.Context, skyhookN
13551372

13561373
shouldExist := make(map[string]struct{})
13571374
for _, node := range nodes {
1358-
shouldExist[strings.ReplaceAll(fmt.Sprintf("%s-%s-metadata", skyhookName, node.GetNode().Name), ".", "-")] = struct{}{}
1375+
shouldExist[generateSafeName(253, skyhookName, node.GetNode().Name, "metadata")] = struct{}{}
13591376
}
13601377

13611378
update := false
@@ -1418,7 +1435,7 @@ func (r *SkyhookReconciler) CreateInterruptPodForPackage(_interrupt *v1alpha1.In
14181435
{
14191436
// node names in different CSPs might include dots which isn't allowed in volume names
14201437
// so we have to replace all dots with dashes
1421-
Name: strings.ReplaceAll(fmt.Sprintf("%s-metadata", nodeName), ".", "-"),
1438+
Name: generateSafeName(63, skyhook.Name, nodeName, "metadata"),
14221439
VolumeSource: corev1.VolumeSource{
14231440
ConfigMap: &corev1.ConfigMapVolumeSource{
14241441
LocalObjectReference: corev1.LocalObjectReference{
@@ -1438,7 +1455,7 @@ func (r *SkyhookReconciler) CreateInterruptPodForPackage(_interrupt *v1alpha1.In
14381455

14391456
return &corev1.Pod{
14401457
ObjectMeta: metav1.ObjectMeta{
1441-
Name: generatePodName(fmt.Sprintf("%s-interrupt-%s", skyhook.Name, _interrupt.Type), nodeName),
1458+
Name: generateSafeName(63, skyhook.Name, "interrupt", string(_interrupt.Type), nodeName),
14421459
Namespace: r.opts.Namespace,
14431460
Labels: map[string]string{
14441461
fmt.Sprintf("%s/name", v1alpha1.METADATA_PREFIX): skyhook.Name,
@@ -1536,25 +1553,12 @@ func getAgentImage(opts SkyhookOperatorOptions, _package *v1alpha1.Package) stri
15361553
return opts.AgentImage
15371554
}
15381555

1539-
// generatePodName generates a pod name that is unique for a given node, skyhook, package, and stage
1540-
// namePrefix is the prefix of the pod name (should be unique)
1541-
func generatePodName(namePrefix, nodeName string) string {
1542-
// max name of pod is 63 characters
1543-
// so we need to truncate the name if it's too long
1544-
1545-
unique := sha256.Sum256([]byte(namePrefix + nodeName))
1546-
uniqueStr := hex.EncodeToString(unique[:])[:8]
1547-
1548-
maxlen := 63 - len(uniqueStr) - 1
1549-
if len(namePrefix) > maxlen {
1550-
namePrefix = namePrefix[:maxlen]
1551-
}
1552-
1553-
return strings.ToLower(fmt.Sprintf("%s-%s", namePrefix, uniqueStr))
1554-
}
1555-
15561556
// CreatePodFromPackage creates a pod spec for a skyhook pod for a given package
15571557
func (r *SkyhookReconciler) CreatePodFromPackage(_package *v1alpha1.Package, skyhook *wrapper.Skyhook, nodeName string, stage v1alpha1.Stage) *corev1.Pod {
1558+
// Generate consistent names that won't exceed k8s limits
1559+
volumeName := generateSafeName(63, "metadata", nodeName)
1560+
configMapName := generateSafeName(253, skyhook.Name, nodeName, "metadata")
1561+
15581562
volumes := []corev1.Volume{
15591563
{
15601564
Name: "root-mount",
@@ -1565,13 +1569,11 @@ func (r *SkyhookReconciler) CreatePodFromPackage(_package *v1alpha1.Package, sky
15651569
},
15661570
},
15671571
{
1568-
// node names in different CSPs might include dots which isn't allowed in volume names
1569-
// so we have to replace all dots with dashes
1570-
Name: strings.ReplaceAll(fmt.Sprintf("%s-metadata", nodeName), ".", "-"),
1572+
Name: volumeName,
15711573
VolumeSource: corev1.VolumeSource{
15721574
ConfigMap: &corev1.ConfigMapVolumeSource{
15731575
LocalObjectReference: corev1.LocalObjectReference{
1574-
Name: strings.ReplaceAll(fmt.Sprintf("%s-%s-metadata", skyhook.Name, nodeName), ".", "-"),
1576+
Name: configMapName,
15751577
},
15761578
},
15771579
},
@@ -1585,9 +1587,7 @@ func (r *SkyhookReconciler) CreatePodFromPackage(_package *v1alpha1.Package, sky
15851587
MountPropagation: ptr(corev1.MountPropagationHostToContainer),
15861588
},
15871589
{
1588-
// node names in different CSPs might include dots which isn't allowed in volume mount names
1589-
// so we have to replace all dots with dashes
1590-
Name: strings.ReplaceAll(fmt.Sprintf("%s-metadata", nodeName), ".", "-"),
1590+
Name: volumeName,
15911591
MountPath: "/skyhook-package/node-metadata",
15921592
},
15931593
}
@@ -1623,7 +1623,7 @@ func (r *SkyhookReconciler) CreatePodFromPackage(_package *v1alpha1.Package, sky
16231623

16241624
pod := &corev1.Pod{
16251625
ObjectMeta: metav1.ObjectMeta{
1626-
Name: generatePodName(fmt.Sprintf("%s-%s-%s-%s", skyhook.Name, _package.Name, _package.Version, stage), nodeName),
1626+
Name: generateSafeName(63, skyhook.Name, _package.Name, _package.Version, string(stage), nodeName),
16271627
Namespace: r.opts.Namespace,
16281628
Labels: map[string]string{
16291629
fmt.Sprintf("%s/name", v1alpha1.METADATA_PREFIX): skyhook.Name,

operator/internal/controller/skyhook_controller_test.go

Lines changed: 135 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ package controller
2323
import (
2424
"context"
2525
"fmt"
26+
"testing"
2627
"time"
2728

2829
"github.com/NVIDIA/skyhook/api/v1alpha1"
@@ -880,34 +881,34 @@ var _ = Describe("skyhook controller tests", func() {
880881

881882
// Test 1: Deterministic behavior (same inputs = same output)
882883
prefix1 := createNamePrefix(skyhook.Name, package1.Name, package1.Version, string(v1alpha1.StageApply))
883-
name1 := generatePodName(prefix1, nodeName)
884-
name2 := generatePodName(prefix1, nodeName)
884+
name1 := generateSafeName(63, prefix1, nodeName)
885+
name2 := generateSafeName(63, prefix1, nodeName)
885886
Expect(name1).To(Equal(name2), "Generated pod names should be deterministic")
886887

887888
// Test 2: Uniqueness with different inputs
888889
// Different stage
889890
prefixApply := createNamePrefix(skyhook.Name, package1.Name, package1.Version, string(v1alpha1.StageApply))
890891
prefixConfig := createNamePrefix(skyhook.Name, package1.Name, package1.Version, string(v1alpha1.StageConfig))
891-
nameApply := generatePodName(prefixApply, nodeName)
892-
nameConfig := generatePodName(prefixConfig, nodeName)
892+
nameApply := generateSafeName(63, prefixApply, nodeName)
893+
nameConfig := generateSafeName(63, prefixConfig, nodeName)
893894
Expect(nameApply).NotTo(Equal(nameConfig), "Different stages should produce different pod names")
894895

895896
// Different package version
896897
prefix2 := createNamePrefix(skyhook.Name, package2.Name, package2.Version, string(v1alpha1.StageApply))
897-
nameVersion1 := generatePodName(prefix1, nodeName)
898-
nameVersion2 := generatePodName(prefix2, nodeName)
898+
nameVersion1 := generateSafeName(63, prefix1, nodeName)
899+
nameVersion2 := generateSafeName(63, prefix2, nodeName)
899900
Expect(nameVersion1).NotTo(Equal(nameVersion2), "Different package versions should produce different pod names")
900901

901902
// Different node
902-
nameNode1 := generatePodName(prefix1, nodeName)
903-
nameNode2 := generatePodName(prefix1, nodeName2)
903+
nameNode1 := generateSafeName(63, prefix1, nodeName)
904+
nameNode2 := generateSafeName(63, prefix1, nodeName2)
904905
Expect(nameNode1).NotTo(Equal(nameNode2), "Different nodes should produce different pod names")
905906

906907
// Test for uninstall pods with timestamp
907908
uninstallPrefix1 := fmt.Sprintf("%s-uninstall-123456789", prefixApply)
908909
uninstallPrefix2 := fmt.Sprintf("%s-uninstall-987654321", prefixApply)
909-
uninstallName1 := generatePodName(uninstallPrefix1, nodeName)
910-
uninstallName2 := generatePodName(uninstallPrefix2, nodeName)
910+
uninstallName1 := generateSafeName(63, uninstallPrefix1, nodeName)
911+
uninstallName2 := generateSafeName(63, uninstallPrefix2, nodeName)
911912
Expect(uninstallName1).NotTo(Equal(uninstallName2), "Uninstall pods with different timestamps should have different names")
912913
Expect(uninstallName1).NotTo(Equal(nameApply), "Uninstall pod name should be different from regular pod name")
913914

@@ -916,7 +917,7 @@ var _ = Describe("skyhook controller tests", func() {
916917
longPackageName := "this-is-a-very-long-package-name-that-also-exceeds-kubernetes-naming-limits"
917918
longPackageVersion := "1.2.3.4.5.6.7.8.9.10"
918919
longPrefix := createNamePrefix(longSkyhookName, longPackageName, longPackageVersion, string(v1alpha1.StageApply))
919-
longName := generatePodName(longPrefix, "node1")
920+
longName := generateSafeName(63, longPrefix, "node1")
920921
Expect(len(longName)).To(BeNumerically("<=", 63), "Pod name should not exceed Kubernetes 63 character limit")
921922
Expect(longName).To(MatchRegexp(`-[0-9a-f]+$`), "Pod name should end with a hash component")
922923
})
@@ -984,4 +985,127 @@ var _ = Describe("skyhook controller tests", func() {
984985
matches = operator.PodMatchesPackage(testPackage, *interruptPod, testSkyhook, testStage)
985986
Expect(matches).To(BeTrue(), "PodMatchesPackage should recognize the interrupt pod it created")
986987
})
988+
989+
It("should generate valid volume names", func() {
990+
tests := []struct {
991+
name string
992+
prefix string
993+
nodeName string
994+
expectedLen int
995+
shouldMatch string
996+
description string
997+
}{
998+
{
999+
name: "short name",
1000+
prefix: "metadata",
1001+
nodeName: "node1",
1002+
expectedLen: 23, // "metadata-node1-" + 8 char hash
1003+
description: "should handle short names",
1004+
},
1005+
{
1006+
name: "very long node name",
1007+
prefix: "metadata",
1008+
nodeName: "very-long-node-name-that-exceeds-kubernetes-limits-and-needs-to-be-truncated-to-something-shorter",
1009+
expectedLen: 63,
1010+
description: "should handle long names by hashing",
1011+
},
1012+
{
1013+
name: "consistent hashing",
1014+
prefix: "metadata",
1015+
nodeName: "node1",
1016+
shouldMatch: generateSafeName(63, "metadata", "node1"),
1017+
description: "should generate consistent names for the same input",
1018+
},
1019+
}
1020+
1021+
for _, tt := range tests {
1022+
result := generateSafeName(63, tt.prefix, tt.nodeName)
1023+
1024+
if tt.expectedLen > 0 {
1025+
Expect(len(result)).To(Equal(tt.expectedLen), tt.description)
1026+
}
1027+
if tt.shouldMatch != "" {
1028+
Expect(result).To(Equal(tt.shouldMatch), tt.description)
1029+
}
1030+
Expect(len(result)).To(BeNumerically("<=", 63), "volume name should never exceed 63 characters")
1031+
Expect(result).To(MatchRegexp(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`), "volume name should match kubernetes naming requirements")
1032+
}
1033+
})
1034+
1035+
It("should generate valid configmap names", func() {
1036+
tests := []struct {
1037+
name string
1038+
skyhookName string
1039+
nodeName string
1040+
expectedLen int
1041+
shouldMatch string
1042+
description string
1043+
}{
1044+
{
1045+
name: "short names",
1046+
skyhookName: "skyhook1",
1047+
nodeName: "node1",
1048+
expectedLen: 32, // "skyhook1-node1-metadata-" + 8 char hash
1049+
description: "should handle short names",
1050+
},
1051+
{
1052+
name: "very long names",
1053+
skyhookName: "very-long-skyhook-name",
1054+
nodeName: "very-long-node-name-that-exceeds-kubernetes-limits-and-needs-to-be-truncated",
1055+
expectedLen: 63,
1056+
description: "should handle long names by truncating and hashing",
1057+
},
1058+
{
1059+
name: "consistent hashing",
1060+
skyhookName: "skyhook1",
1061+
nodeName: "node1",
1062+
shouldMatch: generateSafeName(63, "skyhook1", "node1", "metadata"),
1063+
description: "should generate consistent names for the same input",
1064+
},
1065+
{
1066+
name: "handles dots in names",
1067+
skyhookName: "skyhook.1",
1068+
nodeName: "node.1",
1069+
expectedLen: 34,
1070+
description: "should handle dots in names consistently",
1071+
},
1072+
}
1073+
1074+
for _, tt := range tests {
1075+
result := generateSafeName(63, tt.skyhookName, tt.nodeName, "metadata")
1076+
1077+
if tt.expectedLen > 0 {
1078+
Expect(len(result)).To(Equal(tt.expectedLen), tt.description)
1079+
}
1080+
if tt.shouldMatch != "" {
1081+
Expect(result).To(Equal(tt.shouldMatch), tt.description)
1082+
}
1083+
Expect(len(result)).To(BeNumerically("<=", 63), "configmap name should never exceed 63 characters")
1084+
Expect(result).To(MatchRegexp(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`), "configmap name should match kubernetes naming requirements")
1085+
}
1086+
})
9871087
})
1088+
1089+
func TestGenerateValidPodNames(t *testing.T) {
1090+
g := NewWithT(t)
1091+
1092+
// Test short name
1093+
name := generateSafeName(63, "test", "node1")
1094+
g.Expect(len(name)).To(Equal(19)) // "test-node1-" + 8 char hash
1095+
g.Expect(name).To(MatchRegexp(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`))
1096+
1097+
// Test very long name
1098+
name = generateSafeName(63, "test-very-long-name-that-should-be-truncated", "node1")
1099+
g.Expect(len(name)).To(Equal(59))
1100+
g.Expect(name).To(MatchRegexp(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`))
1101+
1102+
// Test consistent hashing
1103+
name1 := generateSafeName(63, "test", "node1")
1104+
name2 := generateSafeName(63, "test", "node1")
1105+
g.Expect(name1).To(Equal(name2))
1106+
1107+
// Test dots in name
1108+
name = generateSafeName(63, "test.name", "node.1")
1109+
g.Expect(name).To(MatchRegexp(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`))
1110+
g.Expect(len(name)).To(Equal(25)) // "test-name-node-1-" + 8 char hash
1111+
}

0 commit comments

Comments
 (0)