Skip to content

Commit df959fe

Browse files
Add E2E tests
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent 085c179 commit df959fe

File tree

6 files changed

+1049
-0
lines changed

6 files changed

+1049
-0
lines changed

tests/e2e/cleanup_test.go

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
package e2e
2+
3+
import (
4+
"fmt"
5+
"time"
6+
7+
. "github.com/onsi/gomega"
8+
9+
"k8s.io/apimachinery/pkg/api/errors"
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
"k8s.io/apimachinery/pkg/types"
12+
)
13+
14+
// waitForDeletion polls the provided checkFunc until a NotFound error is returned,
15+
// confirming that the resource is deleted.
16+
func waitForDeletion(resourceName string, checkFunc func() error) error {
17+
timeout := 2 * time.Minute
18+
interval := 5 * time.Second
19+
start := time.Now()
20+
for {
21+
err := checkFunc()
22+
if err != nil && errors.IsNotFound(err) {
23+
return nil
24+
}
25+
if time.Since(start) > timeout {
26+
return fmt.Errorf("timed out waiting for deletion of %s", resourceName)
27+
}
28+
time.Sleep(interval)
29+
}
30+
}
31+
32+
// cleanup aggressively uninstalls deployed Helm releases and deletes resource claim templates.
33+
func cleanup() {
34+
deployed, err := helmClient.ListDeployedReleases()
35+
Expect(err).NotTo(HaveOccurred())
36+
37+
for _, release := range deployed {
38+
switch release.Name {
39+
case helmReleaseName:
40+
err := helmClient.UninstallReleaseByName(helmReleaseName)
41+
Expect(err).NotTo(HaveOccurred())
42+
case "gfd":
43+
// Uninstall GFD if it was enabled.
44+
if EnableGFD {
45+
err := helmClient.UninstallReleaseByName("gfd")
46+
Expect(err).NotTo(HaveOccurred())
47+
}
48+
default:
49+
// Skip other releases.
50+
continue
51+
}
52+
}
53+
54+
// Delete all ResourceClaimTemplates in the test namespace.
55+
resourceClaimTemplates, err := clientSet.ResourceV1beta1().ResourceClaimTemplates(testNamespace.Name).List(ctx, metav1.ListOptions{})
56+
Expect(err).NotTo(HaveOccurred())
57+
58+
for _, tmpl := range resourceClaimTemplates.Items {
59+
// Remove finalizers before deletion.
60+
patchBytes := []byte(`[{"op": "remove", "path": "/metadata/finalizers"}]`)
61+
_, _ = clientSet.ResourceV1beta1().ResourceClaimTemplates(testNamespace.Name).Patch(ctx, tmpl.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
62+
err = clientSet.ResourceV1beta1().ResourceClaimTemplates(testNamespace.Name).Delete(ctx, tmpl.Name, metav1.DeleteOptions{})
63+
Expect(err).NotTo(HaveOccurred())
64+
}
65+
}
66+
67+
// cleanupCRDs deletes specific CRDs used during testing.
68+
func cleanupCRDs() {
69+
crds := []string{
70+
"computedomains.resource.nvidia.com",
71+
}
72+
if EnableGFD {
73+
crds = append(crds, "nodefeatures.nfd.k8s-sigs.io", "nodefeaturerules.nfd.k8s-sigs.io")
74+
}
75+
76+
for _, crd := range crds {
77+
err := extClient.ApiextensionsV1().CustomResourceDefinitions().Delete(ctx, crd, metav1.DeleteOptions{})
78+
Expect(err).NotTo(HaveOccurred())
79+
// Optionally, wait for deletion confirmation:
80+
_ = waitForDeletion(crd, func() error {
81+
_, err := extClient.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, crd, metav1.GetOptions{})
82+
return err
83+
})
84+
}
85+
}
86+
87+
// cleanupNamespaceResources removes all resources in the specified namespace.
88+
func cleanupNamespaceResources(namespace string) {
89+
cleanupFuncs := []func(string) error{
90+
cleanupHelmDeployments,
91+
cleanupResourceClaims,
92+
cleanupResourceClaimTemplates,
93+
cleanupDaemonSets,
94+
cleanupPods,
95+
}
96+
for _, fn := range cleanupFuncs {
97+
Expect(fn(namespace)).NotTo(HaveOccurred())
98+
}
99+
}
100+
101+
// cleanupResourceClaims forcibly removes finalizers and deletes all ResourceClaims.
102+
func cleanupResourceClaims(namespace string) error {
103+
claims, err := clientSet.ResourceV1beta1().ResourceClaims(namespace).List(ctx, metav1.ListOptions{})
104+
if err != nil {
105+
return err
106+
}
107+
patchBytes := []byte(`[{"op": "remove", "path": "/metadata/finalizers"}]`)
108+
zero := int64(0)
109+
policy := metav1.DeletePropagationForeground
110+
deleteOptions := metav1.DeleteOptions{GracePeriodSeconds: &zero, PropagationPolicy: &policy}
111+
for _, rc := range claims.Items {
112+
_, _ = clientSet.ResourceV1beta1().ResourceClaims(namespace).Patch(ctx, rc.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
113+
if err = clientSet.ResourceV1beta1().ResourceClaims(namespace).Delete(ctx, rc.Name, deleteOptions); err != nil {
114+
return err
115+
}
116+
if err = waitForDeletion(rc.Name, func() error {
117+
_, err := clientSet.ResourceV1beta1().ResourceClaims(namespace).Get(ctx, rc.Name, metav1.GetOptions{})
118+
return err
119+
}); err != nil {
120+
return err
121+
}
122+
}
123+
return nil
124+
}
125+
126+
// cleanupResourceClaimTemplates forcibly removes finalizers and deletes all ResourceClaimTemplates.
127+
func cleanupResourceClaimTemplates(namespace string) error {
128+
templates, err := clientSet.ResourceV1beta1().ResourceClaimTemplates(namespace).List(ctx, metav1.ListOptions{})
129+
if err != nil {
130+
return err
131+
}
132+
patchBytes := []byte(`[{"op": "remove", "path": "/metadata/finalizers"}]`)
133+
zero := int64(0)
134+
policy := metav1.DeletePropagationForeground
135+
deleteOptions := metav1.DeleteOptions{GracePeriodSeconds: &zero, PropagationPolicy: &policy}
136+
for _, tmpl := range templates.Items {
137+
_, _ = clientSet.ResourceV1beta1().ResourceClaimTemplates(namespace).Patch(ctx, tmpl.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
138+
if err = clientSet.ResourceV1beta1().ResourceClaimTemplates(namespace).Delete(ctx, tmpl.Name, deleteOptions); err != nil {
139+
return err
140+
}
141+
if err = waitForDeletion(tmpl.Name, func() error {
142+
_, err := clientSet.ResourceV1beta1().ResourceClaimTemplates(namespace).Get(ctx, tmpl.Name, metav1.GetOptions{})
143+
return err
144+
}); err != nil {
145+
return err
146+
}
147+
}
148+
return nil
149+
}
150+
151+
// cleanupDaemonSets forcibly removes finalizers and deletes all DaemonSets in the namespace.
152+
func cleanupDaemonSets(namespace string) error {
153+
dsList, err := clientSet.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{})
154+
if err != nil {
155+
return err
156+
}
157+
patchBytes := []byte(`[{"op": "remove", "path": "/metadata/finalizers"}]`)
158+
zero := int64(0)
159+
policy := metav1.DeletePropagationForeground
160+
deleteOptions := metav1.DeleteOptions{GracePeriodSeconds: &zero, PropagationPolicy: &policy}
161+
for _, ds := range dsList.Items {
162+
_, _ = clientSet.AppsV1().DaemonSets(namespace).Patch(ctx, ds.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
163+
if err = clientSet.AppsV1().DaemonSets(namespace).Delete(ctx, ds.Name, deleteOptions); err != nil {
164+
return err
165+
}
166+
if err = waitForDeletion(ds.Name, func() error {
167+
_, err := clientSet.AppsV1().DaemonSets(namespace).Get(ctx, ds.Name, metav1.GetOptions{})
168+
return err
169+
}); err != nil {
170+
return err
171+
}
172+
}
173+
return nil
174+
}
175+
176+
// cleanupPods forcibly removes finalizers and force-deletes all Pods in the namespace.
177+
func cleanupPods(namespace string) error {
178+
podList, err := clientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{})
179+
if err != nil {
180+
return err
181+
}
182+
patchBytes := []byte(`[{"op": "remove", "path": "/metadata/finalizers"}]`)
183+
zero := int64(0)
184+
deleteOptions := metav1.DeleteOptions{GracePeriodSeconds: &zero}
185+
for _, pod := range podList.Items {
186+
_, _ = clientSet.CoreV1().Pods(namespace).Patch(ctx, pod.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
187+
if err = clientSet.CoreV1().Pods(namespace).Delete(ctx, pod.Name, deleteOptions); err != nil {
188+
return err
189+
}
190+
if err = waitForDeletion(pod.Name, func() error {
191+
_, err := clientSet.CoreV1().Pods(namespace).Get(ctx, pod.Name, metav1.GetOptions{})
192+
return err
193+
}); err != nil {
194+
return err
195+
}
196+
}
197+
return nil
198+
}
199+
200+
// cleanupNodeLabels removes the "resource.nvidia.com/computeDomain" label from all nodes.
201+
func cleanupNodeLabels() error {
202+
nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
203+
if err != nil {
204+
return err
205+
}
206+
for _, node := range nodes.Items {
207+
if _, exists := node.Labels["resource.nvidia.com/computeDomain"]; exists {
208+
delete(node.Labels, "resource.nvidia.com/computeDomain")
209+
_, err := clientSet.CoreV1().Nodes().Update(ctx, &node, metav1.UpdateOptions{})
210+
if err != nil {
211+
return err
212+
}
213+
}
214+
}
215+
return nil
216+
}
217+
218+
// cleanupTestResources deletes the pod, computeDomain, and resource claim template,
219+
// and then removes the "resource.nvidia.com/computeDomain" label from all nodes.
220+
// It returns an error if any step fails.
221+
func cleanupTestResources(namespace, podName, cdName, rctName string) error {
222+
// Delete the Pod with default deletion options.
223+
if err := clientSet.CoreV1().Pods(namespace).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil {
224+
return err
225+
}
226+
227+
// Prepare foreground deletion options with zero grace period.
228+
zero := int64(0)
229+
policy := metav1.DeletePropagationForeground
230+
deleteOptions := metav1.DeleteOptions{
231+
GracePeriodSeconds: &zero,
232+
PropagationPolicy: &policy,
233+
}
234+
235+
// Delete the computeDomain.
236+
if err := resourceClient.ResourceV1beta1().ComputeDomains(namespace).Delete(ctx, cdName, deleteOptions); err != nil {
237+
return err
238+
}
239+
240+
// Delete the resource claim template.
241+
if err := clientSet.ResourceV1beta1().ResourceClaimTemplates(namespace).Delete(ctx, rctName, deleteOptions); err != nil {
242+
return err
243+
}
244+
245+
// Remove the "resource.nvidia.com/computeDomain" label from all nodes.
246+
if err := cleanupNodeLabels(); err != nil {
247+
return err
248+
}
249+
250+
return nil
251+
}
252+
253+
// cleanupHelmDeployments uninstalls all deployed Helm releases in the specified namespace.
254+
func cleanupHelmDeployments(namespace string) error {
255+
releases, err := helmClient.ListDeployedReleases()
256+
if err != nil {
257+
return fmt.Errorf("failed to list deployed releases: %w", err)
258+
}
259+
260+
for _, release := range releases {
261+
// Check if the release is deployed in the target namespace.
262+
// Depending on your helmClient configuration the release might carry the namespace information.
263+
if release.Namespace == namespace {
264+
if err := helmClient.UninstallReleaseByName(release.Name); err != nil {
265+
return fmt.Errorf("failed to uninstall release %q: %w", release.Name, err)
266+
}
267+
}
268+
}
269+
return nil
270+
}

tests/e2e/data/test-pod1.yaml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
---
2+
apiVersion: v1
3+
kind: Namespace
4+
metadata:
5+
name: gpu-test1
6+
7+
---
8+
apiVersion: resource.k8s.io/v1beta1
9+
kind: ResourceClaimTemplate
10+
metadata:
11+
namespace: gpu-test1
12+
name: single-gpu
13+
spec:
14+
spec:
15+
devices:
16+
requests:
17+
- name: gpu
18+
deviceClassName: gpu.nvidia.com
19+
20+
---
21+
apiVersion: resource.nvidia.com/v1beta1
22+
kind: ComputeDomain
23+
metadata:
24+
namespace: gpu-test1
25+
name: imex-channel-injection
26+
spec:
27+
numNodes: 1
28+
channel:
29+
resourceClaimTemplate:
30+
name: imex-channel-0
31+
32+
---
33+
apiVersion: v1
34+
kind: Pod
35+
metadata:
36+
namespace: gpu-test1
37+
name: pod1
38+
labels:
39+
app: pod
40+
spec:
41+
containers:
42+
- name: ctr
43+
image: ubuntu:22.04
44+
command: ["bash", "-c"]
45+
args: ["nvidia-smi -L; trap 'exit 0' TERM; sleep 9999 & wait"]
46+
resources:
47+
claims:
48+
- name: gpu
49+
- name: imex-channel-0
50+
resourceClaims:
51+
- name: gpu
52+
resourceClaimTemplateName: single-gpu
53+
- name: imex-channel-0
54+
resourceClaimTemplateName: imex-channel-0
55+
tolerations:
56+
- key: "nvidia.com/gpu"
57+
operator: "Exists"
58+
effect: "NoSchedule"

0 commit comments

Comments
 (0)