Skip to content

Commit 65cd2c5

Browse files
authored
Merge pull request #677 from jgehrcke/jp/test-abort-on-failure
tests: abort suite on first failure, misc
2 parents b3f4e07 + c40b44b commit 65cd2c5

File tree

5 files changed

+36
-17
lines changed

5 files changed

+36
-17
lines changed

tests/bats/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ ARG BUILDARCH
1212
# Install bats for running cmdline tests.
1313
# This is the image used when invoking `make bats-test`.
1414
RUN git clone https://github.com/bats-core/bats-core.git && cd bats-core && \
15-
git checkout 855844b8344e67d60dc0f43fa39817ed7787f141 && ./install.sh /usr/local
15+
git checkout 658f442f0fcdd6f9e2ea01625999217e8f7bfe7d && ./install.sh /usr/local
1616

1717
RUN mkdir -p /bats-libraries
1818
RUN git clone https://github.com/bats-core/bats-support /bats-libraries/bats-support

tests/bats/Makefile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,9 @@ image:
7777
# cmdline args).
7878
.PHONY: tests
7979
tests: image
80-
mkdir -p tests-out && \
81-
export _RUNDIR=$$(mktemp -p tests-out -d -t bats-tests-$$(date +%s)-XXXXX) && \
80+
export _RUNDIR_PARENT=/tmp/k8s-dra-driver-gpu-tests-out && \
81+
mkdir -p "$${_RUNDIR_PARENT}" && \
82+
export _RUNDIR=$$(mktemp -p $${_RUNDIR_PARENT} -d -t bats-tests-$$(date +%s)-XXXXX) && \
8283
docker run \
8384
--rm \
8485
$(DOCKER_RUN_FLAGS) \
@@ -99,9 +100,10 @@ tests: image
99100
-c "set -ex; cd /cwd; \
100101
echo 'Running k8s cluster cleanup (invasive)... '; \
101102
bash tests/bats/cleanup-from-previous-run.sh 2>&1 | tee -a $${_RUNDIR}/cleanup.outerr; \
102-
TMPDIR=/cwd/$${_RUNDIR} bats \
103+
TMPDIR=$${_RUNDIR} bats \
103104
--print-output-on-failure \
104105
--no-tempdir-cleanup \
105106
--timing \
107+
--abort \
106108
tests/bats/tests.bats \
107109
"

tests/bats/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ Make wise usage of
6767

6868
Misc notes:
6969

70+
* The test suite stops on first failure (using the [new](https://github.com/bats-core/bats-core/issues/209) `--abort` flag for bats).
71+
The tests are not perfectly independent yet, and hence that's sane default behavior.
7072
* Don't skip the section about when [not to use `run`](https://bats-core.readthedocs.io/en/stable/writing-tests.html#when-not-to-use-run).
7173
* Take inspiration from [cri-o tests](https://github.com/cri-o/cri-o/tree/81e69a58c7e6ec8699b3bdd8696b1d0e25e32bfb/test).
72-
* Stop test suite on first failure? No first-class support in bats. See [this](https://github.com/bats-core/bats-core/issues/209) discussion.
7374
* We can and should radically iterate on the test suite's config interface to satisfy our needs.

tests/bats/helpers.sh

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,27 @@ get_all_cd_daemon_logs_for_cd_name() {
112112
CD_NAME="$1"
113113
CD_UID=$(kubectl describe computedomains.resource.nvidia.com "${CD_NAME}" | grep UID | awk '{print $2}')
114114
CD_LABEL_KV="resource.nvidia.com/computeDomain=${CD_UID}"
115+
echo "CD daemon logs for CD: $CD_UID"
116+
kubectl logs \
117+
-n nvidia-dra-driver-gpu \
118+
-l "${CD_LABEL_KV}" \
119+
--tail=-1 \
120+
--prefix \
121+
--all-containers
122+
}
123+
124+
show_kubelet_plugin_error_logs() {
125+
echo -e "\nKUBELET PLUGIN ERROR LOGS START"
126+
(
115127
kubectl logs \
116-
-n nvidia-dra-driver-gpu \
117-
-l "${CD_LABEL_KV}" \
118-
--tail=-1 \
119-
--prefix \
120-
--all-containers \
121-
--timestamps
128+
-l nvidia-dra-driver-gpu-component=kubelet-plugin \
129+
-n nvidia-dra-driver-gpu \
130+
--prefix --tail=-1 | grep -E "^(E|W)[0-9]{4}"
131+
) || true
132+
echo -e "KUBELET PLUGIN ERROR LOGS END\n\n"
122133
}
123134

135+
124136
# Intended use case: one pod in Running or ContainerCreating state; then this
125137
# function returns the specific name of that pod. Specifically, ignore pods that
126138
# were just deleted or are terminating (this is important during the small time

tests/bats/tests.bats

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,23 @@ setup_file() {
4242
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update
4343
}
4444

45+
bats::on_failure() {
46+
echo -e "\n\nFAILURE HOOK START"
47+
log_objects
48+
show_kubelet_plugin_error_logs
49+
get_all_cd_daemon_logs_for_cd_name "imex-channel-injection" || true
50+
echo -e "FAILURE HOOK END\n\n"
51+
}
52+
4553
apply_check_delete_workload_imex_chan_inject() {
4654
kubectl apply -f demo/specs/imex/channel-injection.yaml
4755
kubectl wait --for=condition=READY pods imex-channel-injection --timeout=100s
4856
run kubectl logs imex-channel-injection
49-
kubectl delete -f demo/specs/imex/channel-injection.yaml
50-
# Check output after attempted deletion.
5157
assert_output --partial "channel0"
5258

5359
# Wait for deletion to complete; this is critical before moving on to the next
5460
# test (as long as we don't wipe state entirely between tests).
61+
kubectl delete -f demo/specs/imex/channel-injection.yaml
5562
kubectl wait --for=delete pods imex-channel-injection --timeout=10s
5663
}
5764

@@ -101,12 +108,9 @@ log_objects() {
101108
kubectl get crd computedomains.resource.nvidia.com
102109
}
103110

104-
@test "wait for kubelet plugin pods READY" {
111+
@test "wait for plugin & controller pods READY" {
105112
kubectl wait --for=condition=READY pods -A \
106113
-l nvidia-dra-driver-gpu-component=kubelet-plugin --timeout=10s
107-
}
108-
109-
@test "wait for controller pod READY" {
110114
kubectl wait --for=condition=READY pods -A \
111115
-l nvidia-dra-driver-gpu-component=controller --timeout=10s
112116
}

0 commit comments

Comments
 (0)