diff --git a/CHANGELOG.md b/CHANGELOG.md index 41fef423d..7e4d8c57e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Added enforcement of the `nvidia` runtime class for GPU pods, with the option to enforce a custom runtime class, or disable enforcement entirely. ### Fixed +- (Openshift only) - High CPU usage for the operator pod due to continues reconciles - Fixed a bug where the scheduler would not re-try updating podgroup status after failure - Added missing SCC for Openshift installations diff --git a/pkg/operator/operands/binder/binder_test.go b/pkg/operator/operands/binder/binder_test.go index b3c1b458b..2de7bb927 100644 --- a/pkg/operator/operands/binder/binder_test.go +++ b/pkg/operator/operands/binder/binder_test.go @@ -19,6 +19,8 @@ import ( "github.com/NVIDIA/KAI-scheduler/pkg/operator/operands/common/test_utils" appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -93,6 +95,60 @@ var _ = Describe("Binder", func() { Expect(deployment.Labels).To(HaveKeyWithValue("foo", "bar")) Expect(deployment.Spec.Template.Labels).To(HaveKeyWithValue("kai", "scheduler")) }) + + It("sets CDI flag if set in cluser policy", func(ctx context.Context) { + clusterPolicy := &nvidiav1.ClusterPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: nvidiav1.ClusterPolicySpec{ + CDI: nvidiav1.CDIConfigSpec{ + Enabled: ptr.To(true), + Default: ptr.To(true), + }, + }, + } + + Expect(fakeKubeClient.Create(ctx, clusterPolicy)).To(Succeed()) + objects, err := b.DesiredState(ctx, fakeKubeClient, kaiConfig) + Expect(err).To(BeNil()) + + deploymentT := test_utils.FindTypeInObjects[*appsv1.Deployment](objects) + Expect(deploymentT).NotTo(BeNil()) + Expect((*deploymentT).Spec.Template.Spec.Containers[0].Args).To(ContainElement("--cdi-enabled=true")) + }) + }) + + Context("Reservation Service Account", func() { + It("will not remove current image pull secrets", func(ctx context.Context) { + kaiConfig.Spec.Global.ImagePullSecrets = []string{"test-secret"} + + reservationSA := &v1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: *kaiConfig.Spec.Binder.ResourceReservation.Namespace, + Name: *kaiConfig.Spec.Binder.ResourceReservation.ServiceAccountName, + }, + ImagePullSecrets: []v1.LocalObjectReference{ + {Name: "existing"}, + }, + } + Expect(fakeKubeClient.Create(ctx, reservationSA)).To(Succeed()) + objects, err := b.DesiredState(ctx, fakeKubeClient, kaiConfig) + Expect(err).To(BeNil()) + + var newReservationSA *v1.ServiceAccount + for _, obj := range objects { + sa, ok := obj.(*v1.ServiceAccount) + if ok && sa.Name == reservationSA.Name { + newReservationSA = sa + } + } + + Expect(newReservationSA).NotTo(BeNil()) + Expect(newReservationSA.ImagePullSecrets).To(HaveLen(2)) + Expect(newReservationSA.ImagePullSecrets).To(ContainElement(v1.LocalObjectReference{Name: "existing"})) + Expect(newReservationSA.ImagePullSecrets).To(ContainElement(v1.LocalObjectReference{Name: "test-secret"})) + }) }) }) }) diff --git a/pkg/operator/operands/binder/resources.go b/pkg/operator/operands/binder/resources.go index 29c7a4c71..ecd95d398 100644 --- a/pkg/operator/operands/binder/resources.go +++ b/pkg/operator/operands/binder/resources.go @@ -140,7 +140,23 @@ func resourceReservationServiceAccount( sa.Name = *kaiConfig.Spec.Binder.ResourceReservation.ServiceAccountName sa.Namespace = *kaiConfig.Spec.Binder.ResourceReservation.Namespace - sa.ImagePullSecrets = kaiConfigUtils.GetGlobalImagePullSecrets(kaiConfig.Spec.Global) + + imagePullSecrets := make(map[string]bool) + for _, secret := range sa.ImagePullSecrets { + imagePullSecrets[secret.Name] = true + } + + for _, secret := range kaiConfigUtils.GetGlobalImagePullSecrets(kaiConfig.Spec.Global) { + if !imagePullSecrets[secret.Name] { + imagePullSecrets[secret.Name] = true + } + } + + sa.ImagePullSecrets = make([]v1.LocalObjectReference, 0, len(imagePullSecrets)) + for secretName := range imagePullSecrets { + sa.ImagePullSecrets = append(sa.ImagePullSecrets, v1.LocalObjectReference{Name: secretName}) + } + return []client.Object{sa}, nil }