@@ -6,6 +6,7 @@ setup() {
66 _common_setup
77}
88
9+
910# Currently, the tests defined in this file deliberately depend on each other
1011# and are expected to execute in the order defined. In the future, we want to
1112# build test dependency injection (with fixtures), and work towards clean
@@ -45,6 +46,14 @@ apply_check_delete_workload_imex_chan_inject() {
4546 run kubectl logs imex-channel-injection
4647 assert_output --partial " channel0"
4748 kubectl delete -f demo/specs/imex/channel-injection.yaml
49+
50+ log_objects () {
51+ # Never fail, but show output in case a test fails, to facilitate debugging.
52+ # Could this be part of setup()? If setup succeeds and when a test fails:
53+ # does this show the output of setup? Then we could do this.
54+ kubectl get resourceclaims || true
55+ kubectl get computedomain || true
56+ kubectl get pods -o wide || true
4857}
4958
5059# A test that covers local dev tooling, we don't want to
@@ -111,10 +120,12 @@ apply_check_delete_workload_imex_chan_inject() {
111120}
112121
113122@test " IMEX channel injection (single)" {
123+ log_objects
114124 apply_check_delete_workload_imex_chan_inject
115125}
116126
117127@test " IMEX channel injection (all)" {
128+ log_objects
118129 # Example: with TEST_CHART_VERSION="v25.3.2-12390-chart"
119130 # the command below returns 0 (true: the tested version is smaller)
120131 if dpkg --compare-versions " ${TEST_CHART_VERSION# v} " lt " 25.8.0" ; then
@@ -129,6 +140,8 @@ apply_check_delete_workload_imex_chan_inject() {
129140}
130141
131142@test " NodePrepareResources: catch unknown field in opaque cfg in ResourceClaim" {
143+ log_objects
144+
132145 envsubst < tests/bats/specs/rc-opaque-cfg-unknown-field.yaml.tmpl > \
133146 " ${BATS_TEST_TMPDIR} " /rc-opaque-cfg-unknown-field.yaml
134147 cd " ${BATS_TEST_TMPDIR} "
@@ -171,6 +184,8 @@ apply_check_delete_workload_imex_chan_inject() {
171184}
172185
173186@test " nickelpie (NCCL send/recv/broadcast, 2 pods, 2 nodes, small payload)" {
187+ log_objects
188+
174189 # Do not run in checkout dir (to not pollute that).
175190 cd " ${BATS_TEST_TMPDIR} "
176191 git clone https://github.com/jgehrcke/jpsnips-nv
@@ -185,6 +200,8 @@ apply_check_delete_workload_imex_chan_inject() {
185200}
186201
187202@test " nvbandwidth (2 nodes, 2 GPUs each)" {
203+ log_objects
204+
188205 kubectl create -f https://github.com/kubeflow/mpi-operator/releases/download/v0.6.0/mpi-operator.yaml || echo " ignore"
189206 kubectl apply -f demo/specs/imex/nvbandwidth-test-job-1.yaml
190207 # The canonical k8s job interface works even for MPIJob (the MPIJob has an
@@ -197,6 +214,8 @@ apply_check_delete_workload_imex_chan_inject() {
197214}
198215
199216@test " downgrade: current-dev -> last-stable" {
217+ log_objects
218+
200219 # Stage 1: apply workload, but do not delete.
201220 kubectl apply -f demo/specs/imex/channel-injection.yaml
202221 kubectl wait --for=condition=READY pods imex-channel-injection --timeout=60s
@@ -215,6 +234,8 @@ apply_check_delete_workload_imex_chan_inject() {
215234}
216235
217236@test " upgrade: wipe-state, install-last-stable, upgrade-to-current-dev" {
237+ log_objects
238+
218239 # Stage 1: clean slate
219240 helm uninstall " ${TEST_HELM_RELEASE_NAME} " -n nvidia-dra-driver-gpu
220241 kubectl wait --for=delete pods -A -l app.kubernetes.io/name=nvidia-dra-driver-gpu --timeout=10s
0 commit comments