Skip to content

Commit b416b40

Browse files
committed
Implement E2E tests for custom cluster-domain
Signed-off-by: Yuki Iwai <[email protected]>
1 parent 86733ad commit b416b40

File tree

1 file changed

+65
-6
lines changed

1 file changed

+65
-6
lines changed

test/e2e/mpi_job_test.go

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ var _ = ginkgo.Describe("MPIJob", func() {
168168
expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
169169
})
170170

171-
ginkgo.It("should not be updated when managed externaly, only created", func() {
171+
ginkgo.It("should not be updated when managed externally, only created", func() {
172172
mpiJob.Spec.RunPolicy.ManagedBy = ptr.To(kubeflow.MultiKueueController)
173173
ctx := context.Background()
174174
mpiJob = createJob(ctx, mpiJob)
@@ -352,7 +352,7 @@ var _ = ginkgo.Describe("MPIJob", func() {
352352
// Set up the scheduler-plugins.
353353
setUpSchedulerPlugins()
354354
// Set up the mpi-operator so that the scheduler-plugins is used as gang-scheduler.
355-
setupMPIOperator(ctx, mpiJob, enableGangSchedulingFlag, unschedulableResources)
355+
setupMPIOperator(ctx, mpiJob, unschedulableResources, enableGangSchedulingFlag)
356356
})
357357

358358
ginkgo.AfterEach(func() {
@@ -447,7 +447,7 @@ var _ = ginkgo.Describe("MPIJob", func() {
447447
// Set up the volcano-scheduler.
448448
setupVolcanoScheduler()
449449
// Set up the mpi-operator so that the volcano scheduler is used as gang-scheduler.
450-
setupMPIOperator(ctx, mpiJob, enableGangSchedulingFlag, unschedulableResources)
450+
setupMPIOperator(ctx, mpiJob, unschedulableResources, enableGangSchedulingFlag)
451451
})
452452

453453
ginkgo.AfterEach(func() {
@@ -527,6 +527,61 @@ var _ = ginkgo.Describe("MPIJob", func() {
527527
}, foreverTimeout, waitInterval).Should(gomega.Equal(corev1.ConditionTrue))
528528
})
529529
})
530+
531+
// The custom cluster-domain e2e tests.
532+
ginkgo.Context("with custom cluster-domain", func() {
533+
const (
534+
clusterDomainFlag = "--cluster-domain=cluster.local"
535+
allowRunAsRootOpt = "--allow-run-as-root"
536+
)
537+
538+
var ctx = context.Background()
539+
540+
ginkgo.BeforeEach(func() {
541+
setupMPIOperator(ctx, mpiJob, nil, clusterDomainFlag)
542+
mpiJob.Spec.RunLauncherAsWorker = ptr.To(true)
543+
launcherContainer := &mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0]
544+
launcherContainer.Command = append(launcherContainer.Command, allowRunAsRootOpt)
545+
})
546+
547+
ginkgo.AfterEach(func() {
548+
operator, err := k8sClient.AppsV1().Deployments(mpiOperator).Get(ctx, mpiOperator, metav1.GetOptions{})
549+
oldOperator := operator.DeepCopy()
550+
gomega.Expect(err).Should(gomega.Succeed())
551+
for i, arg := range operator.Spec.Template.Spec.Containers[0].Args {
552+
if arg == clusterDomainFlag {
553+
operator.Spec.Template.Spec.Containers[0].Args = append(
554+
operator.Spec.Template.Spec.Containers[0].Args[:i], operator.Spec.Template.Spec.Containers[0].Args[i+1:]...)
555+
break
556+
}
557+
}
558+
if diff := cmp.Diff(oldOperator, operator); len(diff) != 0 {
559+
_, err = k8sClient.AppsV1().Deployments(mpiOperator).Update(ctx, operator, metav1.UpdateOptions{})
560+
gomega.Expect(err).Should(gomega.Succeed())
561+
gomega.Eventually(func() bool {
562+
ok, err := ensureDeploymentAvailableReplicas(ctx, mpiOperator, mpiOperator)
563+
gomega.Expect(err).Should(gomega.Succeed())
564+
return ok
565+
}, foreverTimeout, waitInterval).Should(gomega.BeTrue())
566+
}
567+
// Restore the previous MPIJob configurations.
568+
mpiJob.Spec.RunLauncherAsWorker = ptr.To(false)
569+
for i, arg := range mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].Command {
570+
if arg == allowRunAsRootOpt {
571+
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].Command = append(
572+
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].Command[:i],
573+
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].Command[i+1:]...)
574+
}
575+
}
576+
})
577+
578+
ginkgo.When("running as root", func() {
579+
ginkgo.It("should succeed", func() {
580+
mpiJob := createJobAndWaitForCompletion(mpiJob)
581+
expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
582+
})
583+
})
584+
})
530585
})
531586

532587
func resumeJob(ctx context.Context, mpiJob *kubeflow.MPIJob) *kubeflow.MPIJob {
@@ -761,7 +816,7 @@ func cleanUpVolcanoScheduler() {
761816
}
762817

763818
// setupMPIOperator scales down and scales up the MPIOperator replication so that set up gang-scheduler takes effect
764-
func setupMPIOperator(ctx context.Context, mpiJob *kubeflow.MPIJob, enableGangSchedulingFlag string, unschedulableResources *corev1.ResourceList) {
819+
func setupMPIOperator(ctx context.Context, mpiJob *kubeflow.MPIJob, unschedulableResources *corev1.ResourceList, managerFlags ...string) {
765820
ginkgo.By("Scale-In the deployment to 0")
766821
operator, err := k8sClient.AppsV1().Deployments(mpiOperator).Get(ctx, mpiOperator, metav1.GetOptions{})
767822
gomega.Expect(err).Should(gomega.Succeed())
@@ -778,7 +833,7 @@ func setupMPIOperator(ctx context.Context, mpiJob *kubeflow.MPIJob, enableGangSc
778833
gomega.Eventually(func() error {
779834
updatedOperator, err := k8sClient.AppsV1().Deployments(mpiOperator).Get(ctx, mpiOperator, metav1.GetOptions{})
780835
gomega.Expect(err).Should(gomega.Succeed())
781-
updatedOperator.Spec.Template.Spec.Containers[0].Args = append(updatedOperator.Spec.Template.Spec.Containers[0].Args, enableGangSchedulingFlag)
836+
updatedOperator.Spec.Template.Spec.Containers[0].Args = append(updatedOperator.Spec.Template.Spec.Containers[0].Args, managerFlags...)
782837
updatedOperator.Spec.Replicas = ptr.To[int32](1)
783838
_, err = k8sClient.AppsV1().Deployments(mpiOperator).Update(ctx, updatedOperator, metav1.UpdateOptions{})
784839
return err
@@ -791,5 +846,9 @@ func setupMPIOperator(ctx context.Context, mpiJob *kubeflow.MPIJob, enableGangSc
791846
return isNotZero
792847
}, foreverTimeout, waitInterval).Should(gomega.BeTrue())
793848
createMPIJobWithOpenMPI(mpiJob)
794-
mpiJob.Spec.RunPolicy.SchedulingPolicy = &kubeflow.SchedulingPolicy{MinResources: unschedulableResources}
849+
if unschedulableResources != nil {
850+
mpiJob.Spec.RunPolicy.SchedulingPolicy = &kubeflow.SchedulingPolicy{
851+
MinResources: unschedulableResources,
852+
}
853+
}
795854
}

0 commit comments

Comments
 (0)