Skip to content

Commit e9f647e

Browse files
committed
tests: cover CD daemon cleanup-on-shutdown
Signed-off-by: Dr. Jan-Philip Gehrcke <[email protected]>
1 parent 980a6a1 commit e9f647e

File tree

2 files changed

+45
-2
lines changed

2 files changed

+45
-2
lines changed

cmd/compute-domain-daemon/computedomain.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,12 +392,12 @@ func (m *ComputeDomainManager) removeNodeFromComputeDomain(ctx context.Context)
392392
newCD.Status.Status = nvapi.ComputeDomainStatusNotReady
393393
}
394394

395+
// Update status and (upon success) store the latest version of the object
396+
// (as returned by the API server) in the mutation cache.
395397
newCD.Status.Nodes = updatedNodes
396398
if _, err := m.config.clientsets.Nvidia.ResourceV1beta1().ComputeDomains(newCD.Namespace).UpdateStatus(ctx, newCD, metav1.UpdateOptions{}); err != nil {
397399
return fmt.Errorf("error removing node from ComputeDomain status: %w", err)
398400
}
399-
400-
// Add the updated ComputeDomain to the mutation cache
401401
m.mutationCache.Mutation(newCD)
402402

403403
klog.Infof("Successfully removed node with IP %s from ComputeDomain %s/%s", m.config.podIP, newCD.Namespace, newCD.Name)

tests/bats/tests.bats

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,49 @@ log_objects() {
143143
kubectl delete -f demo/specs/imex/channel-injection-all.yaml
144144
}
145145

146+
@test "CD daemon shutdown: confirm CD status cleanup" {
147+
log_objects
148+
149+
kubectl apply -f demo/specs/imex/channel-injection.yaml
150+
kubectl wait --for=condition=READY pods imex-channel-injection --timeout=100s
151+
run kubectl logs imex-channel-injection
152+
assert_output --partial "channel0"
153+
154+
local LOGPATH="${BATS_TEST_TMPDIR}/cd-daemon.log"
155+
local PNAME
156+
PNAME=$( \
157+
kubectl get pods -n nvidia-dra-driver-gpu | \
158+
grep imex-channel-injection | \
159+
awk '{print $1}'
160+
)
161+
162+
# Expect `nodes` key to be present in CD status.
163+
run bats_pipe kubectl get computedomain imex-channel-injection -o json \| jq '.status'
164+
assert_output --partial 'nodes'
165+
166+
echo "attach background log follower to daemon pod: $PNAME"
167+
kubectl logs -n nvidia-dra-driver-gpu --follow "$PNAME" > "$LOGPATH" 2>&1 &
168+
kubectl delete pods imex-channel-injection
169+
170+
# Note: the log follower child process terminates when the pod terminates.
171+
kubectl wait --for=delete pods imex-channel-injection --timeout=10s
172+
173+
# Expect `nodes` key to not be be present (single-node CD).
174+
run bats_pipe kubectl get computedomain imex-channel-injection -o json \| jq '.status'
175+
refute_output --partial 'nodes'
176+
177+
# Inspect CD daemon log, dump tail for easier debug-on-failure.
178+
cat "$LOGPATH" | tail -n 50
179+
180+
# Explicitly confirm cleanup-on-shutdown behavior by inspecting CD log.
181+
cat "$LOGPATH" | grep -e "Successfully updated node .* status to NotReady"
182+
cat "$LOGPATH" | grep "Successfully removed node" | \
183+
grep "from ComputeDomain default/imex-channel-injection"
184+
185+
# Delete CD.
186+
kubectl delete computedomain imex-channel-injection
187+
}
188+
146189
@test "NodePrepareResources: catch unknown field in opaque cfg in ResourceClaim" {
147190
log_objects
148191

0 commit comments

Comments
 (0)