Skip to content

Commit fafa719

Browse files
committed
test: add test for custom CR
Signed-off-by: Ajay Mishra <[email protected]>
1 parent de52e86 commit fafa719

File tree

1 file changed

+178
-0
lines changed

1 file changed

+178
-0
lines changed

health-monitors/kubernetes-object-monitor/pkg/controller/reconciler_test.go

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ import (
3030
"github.com/stretchr/testify/assert"
3131
"github.com/stretchr/testify/require"
3232
v1 "k8s.io/api/core/v1"
33+
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
3334
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
35+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3436
"k8s.io/apimachinery/pkg/runtime/schema"
3537
"k8s.io/apimachinery/pkg/types"
3638
ctrl "sigs.k8s.io/controller-runtime"
@@ -241,6 +243,83 @@ func TestReconciler_ErrorCodePropagation(t *testing.T) {
241243
}, time.Second, 50*time.Millisecond)
242244
}
243245

246+
func TestReconciler_CustomResource(t *testing.T) {
247+
crPolicy := config.Policy{
248+
Name: "gpu-job-failed",
249+
Enabled: true,
250+
Resource: config.ResourceSpec{
251+
Group: "batch.nvidia.com",
252+
Version: "v1alpha1",
253+
Kind: "GPUJob",
254+
},
255+
Predicate: config.PredicateSpec{
256+
Expression: `has(resource.status.state) && resource.status.state == "Failed"`,
257+
},
258+
NodeAssociation: &config.AssociationSpec{
259+
Expression: `resource.spec.nodeName`,
260+
},
261+
HealthEvent: config.HealthEventSpec{
262+
ComponentClass: "GPU",
263+
IsFatal: false,
264+
Message: "GPU job failed",
265+
RecommendedAction: "CONTACT_SUPPORT",
266+
ErrorCode: []string{"GPU_JOB_FAILED"},
267+
},
268+
}
269+
270+
setup := setupTestWithCRD(t, []config.Policy{crPolicy}, gpuJobCRD())
271+
nodeName := "gpu-test-node"
272+
jobName := "test-gpu-job"
273+
namespace := "default"
274+
275+
createNode(t, setup, nodeName, v1.ConditionTrue)
276+
277+
gpuJob := &unstructured.Unstructured{
278+
Object: map[string]any{
279+
"apiVersion": "batch.nvidia.com/v1alpha1",
280+
"kind": "GPUJob",
281+
"metadata": map[string]any{
282+
"name": jobName,
283+
"namespace": namespace,
284+
},
285+
"spec": map[string]any{
286+
"nodeName": nodeName,
287+
},
288+
},
289+
}
290+
291+
require.NoError(t, setup.k8sClient.Create(setup.ctx, gpuJob))
292+
293+
require.Eventually(t, func() bool {
294+
err := setup.k8sClient.Get(setup.ctx, types.NamespacedName{
295+
Name: jobName,
296+
Namespace: namespace,
297+
}, gpuJob)
298+
return err == nil
299+
}, time.Second, 50*time.Millisecond)
300+
301+
gpuJob.Object["status"] = map[string]any{
302+
"state": "Failed",
303+
}
304+
require.NoError(t, setup.k8sClient.Status().Update(setup.ctx, gpuJob))
305+
306+
result, err := setup.reconciler.Reconcile(setup.ctx, ctrl.Request{
307+
NamespacedName: types.NamespacedName{Name: jobName, Namespace: namespace},
308+
})
309+
assert.NoError(t, err)
310+
assert.Equal(t, ctrl.Result{}, result)
311+
312+
require.Eventually(t, func() bool {
313+
if len(setup.publisher.publishedEvents) != 1 {
314+
return false
315+
}
316+
event := setup.publisher.publishedEvents[0]
317+
return event.nodeName == nodeName &&
318+
!event.isHealthy &&
319+
event.policy.Name == "gpu-job-failed"
320+
}, time.Second, 50*time.Millisecond)
321+
}
322+
244323
func TestReconciler_ColdStart(t *testing.T) {
245324
tests := []struct {
246325
name string
@@ -401,6 +480,58 @@ func setupTestWithPolicies(t *testing.T, policies []config.Policy) *testSetup {
401480
}
402481
}
403482

483+
func setupTestWithCRD(t *testing.T, policies []config.Policy, crd *apiextensionsv1.CustomResourceDefinition) *testSetup {
484+
t.Helper()
485+
486+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
487+
t.Cleanup(cancel)
488+
489+
testEnv := &envtest.Environment{
490+
CRDs: []*apiextensionsv1.CustomResourceDefinition{crd},
491+
}
492+
cfg, err := testEnv.Start()
493+
require.NoError(t, err)
494+
t.Cleanup(func() {
495+
assert.NoError(t, testEnv.Stop())
496+
})
497+
498+
k8sClient, err := client.New(cfg, client.Options{})
499+
require.NoError(t, err)
500+
501+
mockPub := &mockPublisher{
502+
publishedEvents: []mockPublishedEvent{},
503+
}
504+
505+
celEnvironment, err := celenv.NewEnvironment(k8sClient)
506+
require.NoError(t, err)
507+
508+
evaluator, err := policy.NewEvaluator(celEnvironment, policies)
509+
require.NoError(t, err)
510+
511+
gvk := schema.GroupVersionKind{
512+
Group: crd.Spec.Group,
513+
Version: crd.Spec.Versions[0].Name,
514+
Kind: crd.Spec.Names.Kind,
515+
}
516+
517+
reconciler := controller.NewResourceReconciler(
518+
k8sClient,
519+
evaluator,
520+
mockPub,
521+
policies,
522+
gvk,
523+
)
524+
525+
return &testSetup{
526+
ctx: ctx,
527+
k8sClient: k8sClient,
528+
reconciler: reconciler,
529+
publisher: mockPub,
530+
evaluator: evaluator,
531+
testEnv: testEnv,
532+
}
533+
}
534+
404535
type mockPublishedEvent struct {
405536
ctx context.Context
406537
policy *config.Policy
@@ -515,3 +646,50 @@ func getCounterVecValue(t *testing.T, counterVec *prometheus.CounterVec, labelVa
515646
require.NoError(t, err)
516647
return metric.Counter.GetValue()
517648
}
649+
650+
func gpuJobCRD() *apiextensionsv1.CustomResourceDefinition {
651+
return &apiextensionsv1.CustomResourceDefinition{
652+
ObjectMeta: metav1.ObjectMeta{
653+
Name: "gpujobs.batch.nvidia.com",
654+
},
655+
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
656+
Group: "batch.nvidia.com",
657+
Names: apiextensionsv1.CustomResourceDefinitionNames{
658+
Plural: "gpujobs",
659+
Singular: "gpujob",
660+
Kind: "GPUJob",
661+
ListKind: "GPUJobList",
662+
},
663+
Scope: apiextensionsv1.NamespaceScoped,
664+
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
665+
{
666+
Name: "v1alpha1",
667+
Served: true,
668+
Storage: true,
669+
Schema: &apiextensionsv1.CustomResourceValidation{
670+
OpenAPIV3Schema: &apiextensionsv1.JSONSchemaProps{
671+
Type: "object",
672+
Properties: map[string]apiextensionsv1.JSONSchemaProps{
673+
"spec": {
674+
Type: "object",
675+
Properties: map[string]apiextensionsv1.JSONSchemaProps{
676+
"nodeName": {Type: "string"},
677+
},
678+
},
679+
"status": {
680+
Type: "object",
681+
Properties: map[string]apiextensionsv1.JSONSchemaProps{
682+
"state": {Type: "string"},
683+
},
684+
},
685+
},
686+
},
687+
},
688+
Subresources: &apiextensionsv1.CustomResourceSubresources{
689+
Status: &apiextensionsv1.CustomResourceSubresourceStatus{},
690+
},
691+
},
692+
},
693+
},
694+
}
695+
}

0 commit comments

Comments
 (0)