@@ -168,7 +168,7 @@ var _ = ginkgo.Describe("MPIJob", func() {
168168 expectConditionToBeTrue (mpiJob , kubeflow .JobSucceeded )
169169 })
170170
171- ginkgo .It ("should not be updated when managed externaly , only created" , func () {
171+ ginkgo .It ("should not be updated when managed externally , only created" , func () {
172172 mpiJob .Spec .RunPolicy .ManagedBy = ptr .To (kubeflow .MultiKueueController )
173173 ctx := context .Background ()
174174 mpiJob = createJob (ctx , mpiJob )
@@ -352,7 +352,7 @@ var _ = ginkgo.Describe("MPIJob", func() {
352352 // Set up the scheduler-plugins.
353353 setUpSchedulerPlugins ()
354354 // Set up the mpi-operator so that the scheduler-plugins is used as gang-scheduler.
355- setupMPIOperator (ctx , mpiJob , enableGangSchedulingFlag , unschedulableResources )
355+ setupMPIOperator (ctx , mpiJob , unschedulableResources , enableGangSchedulingFlag )
356356 })
357357
358358 ginkgo .AfterEach (func () {
@@ -447,7 +447,7 @@ var _ = ginkgo.Describe("MPIJob", func() {
447447 // Set up the volcano-scheduler.
448448 setupVolcanoScheduler ()
449449 // Set up the mpi-operator so that the volcano scheduler is used as gang-scheduler.
450- setupMPIOperator (ctx , mpiJob , enableGangSchedulingFlag , unschedulableResources )
450+ setupMPIOperator (ctx , mpiJob , unschedulableResources , enableGangSchedulingFlag )
451451 })
452452
453453 ginkgo .AfterEach (func () {
@@ -527,6 +527,61 @@ var _ = ginkgo.Describe("MPIJob", func() {
527527 }, foreverTimeout , waitInterval ).Should (gomega .Equal (corev1 .ConditionTrue ))
528528 })
529529 })
530+
531+ // The custom cluster-domain e2e tests.
532+ ginkgo .Context ("with custom cluster-domain" , func () {
533+ const (
534+ clusterDomainFlag = "--cluster-domain=cluster.local"
535+ allowRunAsRootOpt = "--allow-run-as-root"
536+ )
537+
538+ var ctx = context .Background ()
539+
540+ ginkgo .BeforeEach (func () {
541+ setupMPIOperator (ctx , mpiJob , nil , clusterDomainFlag )
542+ mpiJob .Spec .RunLauncherAsWorker = ptr .To (true )
543+ launcherContainer := & mpiJob .Spec .MPIReplicaSpecs [kubeflow .MPIReplicaTypeLauncher ].Template .Spec .Containers [0 ]
544+ launcherContainer .Command = append (launcherContainer .Command , allowRunAsRootOpt )
545+ })
546+
547+ ginkgo .AfterEach (func () {
548+ operator , err := k8sClient .AppsV1 ().Deployments (mpiOperator ).Get (ctx , mpiOperator , metav1.GetOptions {})
549+ oldOperator := operator .DeepCopy ()
550+ gomega .Expect (err ).Should (gomega .Succeed ())
551+ for i , arg := range operator .Spec .Template .Spec .Containers [0 ].Args {
552+ if arg == clusterDomainFlag {
553+ operator .Spec .Template .Spec .Containers [0 ].Args = append (
554+ operator .Spec .Template .Spec .Containers [0 ].Args [:i ], operator .Spec .Template .Spec .Containers [0 ].Args [i + 1 :]... )
555+ break
556+ }
557+ }
558+ if diff := cmp .Diff (oldOperator , operator ); len (diff ) != 0 {
559+ _ , err = k8sClient .AppsV1 ().Deployments (mpiOperator ).Update (ctx , operator , metav1.UpdateOptions {})
560+ gomega .Expect (err ).Should (gomega .Succeed ())
561+ gomega .Eventually (func () bool {
562+ ok , err := ensureDeploymentAvailableReplicas (ctx , mpiOperator , mpiOperator )
563+ gomega .Expect (err ).Should (gomega .Succeed ())
564+ return ok
565+ }, foreverTimeout , waitInterval ).Should (gomega .BeTrue ())
566+ }
567+ // Restore the previous MPIJob configurations.
568+ mpiJob .Spec .RunLauncherAsWorker = ptr .To (false )
569+ for i , arg := range mpiJob .Spec .MPIReplicaSpecs [kubeflow .MPIReplicaTypeLauncher ].Template .Spec .Containers [0 ].Command {
570+ if arg == allowRunAsRootOpt {
571+ mpiJob .Spec .MPIReplicaSpecs [kubeflow .MPIReplicaTypeLauncher ].Template .Spec .Containers [0 ].Command = append (
572+ mpiJob .Spec .MPIReplicaSpecs [kubeflow .MPIReplicaTypeLauncher ].Template .Spec .Containers [0 ].Command [:i ],
573+ mpiJob .Spec .MPIReplicaSpecs [kubeflow .MPIReplicaTypeLauncher ].Template .Spec .Containers [0 ].Command [i + 1 :]... )
574+ }
575+ }
576+ })
577+
578+ ginkgo .When ("running as root" , func () {
579+ ginkgo .It ("should succeed" , func () {
580+ mpiJob := createJobAndWaitForCompletion (mpiJob )
581+ expectConditionToBeTrue (mpiJob , kubeflow .JobSucceeded )
582+ })
583+ })
584+ })
530585})
531586
532587func resumeJob (ctx context.Context , mpiJob * kubeflow.MPIJob ) * kubeflow.MPIJob {
@@ -761,7 +816,7 @@ func cleanUpVolcanoScheduler() {
761816}
762817
763818// setupMPIOperator scales down and scales up the MPIOperator replication so that set up gang-scheduler takes effect
764- func setupMPIOperator (ctx context.Context , mpiJob * kubeflow.MPIJob , enableGangSchedulingFlag string , unschedulableResources * corev1.ResourceList ) {
819+ func setupMPIOperator (ctx context.Context , mpiJob * kubeflow.MPIJob , unschedulableResources * corev1.ResourceList , managerFlags ... string ) {
765820 ginkgo .By ("Scale-In the deployment to 0" )
766821 operator , err := k8sClient .AppsV1 ().Deployments (mpiOperator ).Get (ctx , mpiOperator , metav1.GetOptions {})
767822 gomega .Expect (err ).Should (gomega .Succeed ())
@@ -778,7 +833,7 @@ func setupMPIOperator(ctx context.Context, mpiJob *kubeflow.MPIJob, enableGangSc
778833 gomega .Eventually (func () error {
779834 updatedOperator , err := k8sClient .AppsV1 ().Deployments (mpiOperator ).Get (ctx , mpiOperator , metav1.GetOptions {})
780835 gomega .Expect (err ).Should (gomega .Succeed ())
781- updatedOperator .Spec .Template .Spec .Containers [0 ].Args = append (updatedOperator .Spec .Template .Spec .Containers [0 ].Args , enableGangSchedulingFlag )
836+ updatedOperator .Spec .Template .Spec .Containers [0 ].Args = append (updatedOperator .Spec .Template .Spec .Containers [0 ].Args , managerFlags ... )
782837 updatedOperator .Spec .Replicas = ptr.To [int32 ](1 )
783838 _ , err = k8sClient .AppsV1 ().Deployments (mpiOperator ).Update (ctx , updatedOperator , metav1.UpdateOptions {})
784839 return err
@@ -791,5 +846,9 @@ func setupMPIOperator(ctx context.Context, mpiJob *kubeflow.MPIJob, enableGangSc
791846 return isNotZero
792847 }, foreverTimeout , waitInterval ).Should (gomega .BeTrue ())
793848 createMPIJobWithOpenMPI (mpiJob )
794- mpiJob .Spec .RunPolicy .SchedulingPolicy = & kubeflow.SchedulingPolicy {MinResources : unschedulableResources }
849+ if unschedulableResources != nil {
850+ mpiJob .Spec .RunPolicy .SchedulingPolicy = & kubeflow.SchedulingPolicy {
851+ MinResources : unschedulableResources ,
852+ }
853+ }
795854}
0 commit comments