diff --git a/Makefile b/Makefile index de9ca711a..77c08641a 100644 --- a/Makefile +++ b/Makefile @@ -221,7 +221,14 @@ PHONY: .shell --user $$(id -u):$$(id -g) \ $(BUILDIMAGE) -.PHONY: bats +.PHONY: bats bats-cd bats-gpu bats: make -f tests/bats/Makefile tests +# Run compute domain specific tests +bats-cd: + make -f tests/bats/Makefile tests-cd + +# Run GPU plugin specific tests +bats-gpu: + make -f tests/bats/Makefile tests-gpu diff --git a/tests/bats/Makefile b/tests/bats/Makefile index 9df6b3bf6..743bb2fc8 100644 --- a/tests/bats/Makefile +++ b/tests/bats/Makefile @@ -16,7 +16,6 @@ include $(CURDIR)/versions.mk include $(CURDIR)/common.mk - # The to-be-tested Helm chart. Ignored when setting TEST_CHART_LOCAL. TEST_CHART_REPO ?= "oci://ghcr.io/nvidia/k8s-dra-driver-gpu" TEST_CHART_VERSION ?= "$(VERSION_GHCR_CHART)" @@ -53,8 +52,10 @@ TEST_CHART_REPO = "deployments/helm/nvidia-dra-driver-gpu" TEST_CHART_VERSION = $(VERSION:v%=%) endif -BATS_IMAGE = batstests:$(GIT_COMMIT_SHORT) +BATS_IMAGE ?= batstests:$(GIT_COMMIT_SHORT) +BATS_ARGS ?= --print-output-on-failure --no-tempdir-cleanup --timing --abort KUBECONFIG ?= $(HOME)/.kube/config +RUNDIR_PARENT ?= /tmp/k8s-dra-driver-gpu-tests-out-$(USER) # Add `docker run` arguments when not running # in Github Actions / GitLab CI. @@ -63,6 +64,42 @@ ifeq ($(CI),) DOCKER_RUN_FLAGS += -it endif +DOCKER_ENVS = \ + --env KUBECONFIG=$(KUBECONFIG) \ + --env TEST_CHART_REPO=$(TEST_CHART_REPO) \ + --env TEST_CHART_VERSION=$(TEST_CHART_VERSION) \ + --env TEST_CHART_LASTSTABLE_REPO=$(TEST_CHART_LASTSTABLE_REPO) \ + --env TEST_CHART_LASTSTABLE_VERSION=$(TEST_CHART_LASTSTABLE_VERSION) \ + --env TEST_CRD_UPGRADE_TARGET_GIT_REF=$(TEST_CRD_UPGRADE_TARGET_GIT_REF) \ + --env TEST_NVIDIA_DRIVER_ROOT=$(TEST_NVIDIA_DRIVER_ROOT) \ + --env TEST_EXPECTED_IMAGE_SPEC_SUBSTRING=$(TEST_EXPECTED_IMAGE_SPEC_SUBSTRING) + +DOCKER_UID := $(shell id -u) +DOCKER_GID := $(shell id -g) +DOCKER_USER := $(DOCKER_UID):$(DOCKER_GID) + +DOCKER_MOUNTS = \ + -v /tmp:/tmp \ + -v $(CURDIR):/cwd \ + -v $(dir $(abspath $(KUBECONFIG))):$(dir $(abspath $(KUBECONFIG))) + +# Helper to run bats inside container +# $(1) = whitespace-separated list of test files +define RUN_BATS + @export _RUNDIR_PARENT="$(RUNDIR_PARENT)"; \ + mkdir -p "$${_RUNDIR_PARENT}"; \ + export _RUNDIR="$$(mktemp -p "$${_RUNDIR_PARENT}" -d -t bats-tests-$$(date +%s)-XXXXX)"; \ + echo "Run dir: $${_RUNDIR}"; \ + docker run --rm $(DOCKER_RUN_FLAGS) $(DOCKER_MOUNTS) $(DOCKER_ENVS) \ + -u $(DOCKER_USER) --entrypoint /bin/bash $(BATS_IMAGE) \ + -c "set -ex; cd /cwd; \ + echo 'Running k8s cluster cleanup (invasive)...'; \ + bash tests/bats/cleanup-from-previous-run.sh 2>&1 | tee -a $${_RUNDIR}/cleanup.outerr; \ + set +x; echo '--- STARTING TEST SUITE ---'; set -x; \ + TMPDIR="$${_RUNDIR}" bats $(BATS_ARGS) $(1) \ + " +endef + default: tests .PHONY: image @@ -75,43 +112,35 @@ image: # suite/file 'setup' in bats, but we'd lose output on success). During dev, you # may want to add --show-output-of-passing-tests (and read bats docs for other # cmdline args). -.PHONY: tests +.PHONY: tests-gpu tests-cd + +# Run GPU plugin specific tests +tests-gpu: image + $(call RUN_BATS, \ + tests/bats/test_basics.bats \ + tests/bats/test_gpu_basic.bats \ + tests/bats/test_gpu_stress.bats) + +# Run Compute Domain specific tests +tests-cd: image + $(call RUN_BATS, \ + tests/bats/test_basics.bats \ + tests/bats/test_cd_imex_chan_inject.bats \ + tests/bats/test_cd_mnnvl_workload.bats \ + tests/bats/test_cd_misc.bats \ + tests/bats/test_cd_logging.bats \ + tests/bats/test_cd_failover.bats \ + tests/bats/test_cd_updowngrade.bats) + +# Run complete tests tests: image - export _RUNDIR_PARENT=/tmp/k8s-dra-driver-gpu-tests-out-$${USER} && \ - mkdir -p "$${_RUNDIR_PARENT}" && \ - export _RUNDIR=$$(mktemp -p $${_RUNDIR_PARENT} -d -t bats-tests-$$(date +%s)-XXXXX) && \ - docker run \ - --rm \ - $(DOCKER_RUN_FLAGS) \ - -v /tmp:/tmp \ - -v $(CURDIR):/cwd \ - -v $(HOME)/.kube/:$(HOME)/.kube \ - --env KUBECONFIG=$(KUBECONFIG) \ - --env TEST_CHART_REPO=$(TEST_CHART_REPO) \ - --env TEST_CHART_VERSION=$(TEST_CHART_VERSION) \ - --env TEST_CHART_LASTSTABLE_REPO=$(TEST_CHART_LASTSTABLE_REPO) \ - --env TEST_CHART_LASTSTABLE_VERSION=$(TEST_CHART_LASTSTABLE_VERSION) \ - --env TEST_CRD_UPGRADE_TARGET_GIT_REF=$(TEST_CRD_UPGRADE_TARGET_GIT_REF) \ - --env TEST_NVIDIA_DRIVER_ROOT=$(TEST_NVIDIA_DRIVER_ROOT) \ - --env TEST_EXPECTED_IMAGE_SPEC_SUBSTRING=$(TEST_EXPECTED_IMAGE_SPEC_SUBSTRING) \ - -u $(shell id -u ${USER}):$(shell id -g ${USER}) \ - --entrypoint "/bin/bash"\ - $(BATS_IMAGE) \ - -c "set -ex; cd /cwd; \ - echo 'Running k8s cluster cleanup (invasive)... '; \ - bash tests/bats/cleanup-from-previous-run.sh 2>&1 | tee -a $${_RUNDIR}/cleanup.outerr; \ - set +x; echo '--- STARTING TEST SUITE ---'; set -x; \ - TMPDIR=$${_RUNDIR} bats \ - --print-output-on-failure \ - --no-tempdir-cleanup \ - --timing \ - --abort \ - tests/bats/test_basics.bats \ - tests/bats/test_gpu_basic.bats \ - tests/bats/test_cd_imex_chan_inject.bats \ - tests/bats/test_cd_mnnvl_workload.bats \ - tests/bats/test_cd_misc.bats \ - tests/bats/test_cd_logging.bats \ - tests/bats/test_cd_failover.bats \ - tests/bats/test_cd_updowngrade.bats \ - " + $(call RUN_BATS, \ + tests/bats/test_basics.bats \ + tests/bats/test_gpu_basic.bats \ + tests/bats/test_cd_imex_chan_inject.bats \ + tests/bats/test_cd_mnnvl_workload.bats \ + tests/bats/test_cd_misc.bats \ + tests/bats/test_cd_logging.bats \ + tests/bats/test_cd_failover.bats \ + tests/bats/test_cd_updowngrade.bats \ + tests/bats/test_gpu_stress.bats) diff --git a/tests/bats/cleanup-from-previous-run.sh b/tests/bats/cleanup-from-previous-run.sh index 9e20e8cee..b56b35276 100644 --- a/tests/bats/cleanup-from-previous-run.sh +++ b/tests/bats/cleanup-from-previous-run.sh @@ -34,7 +34,7 @@ set -x # If a previous run leaves e.g. the controller behind in CrashLoopBackOff then # the next installation with --wait won't succeed. -timeout -v 5 helm uninstall nvidia-dra-driver-gpu-batssuite -n nvidia-dra-driver-gpu +timeout -v 15 helm uninstall nvidia-dra-driver-gpu-batssuite -n nvidia-dra-driver-gpu # When the CRD has been left behind deleted by a partially performed # test then the deletions below cannot succeed. Apply a CRD version that @@ -62,6 +62,13 @@ timeout -v 5 kubectl delete pods -l env=batssuite 2> /dev/null timeout -v 2 kubectl delete resourceclaim batssuite-rc-bad-opaque-config --force 2> /dev/null timeout -v 2 kubectl delete -f demo/specs/imex/simple-mig-test 2> /dev/null +# Cleanup any GPU stress test pods left behind +timeout -v 30 kubectl delete pods -l 'env=batssuite,test=stress-shared' 2> /dev/null +timeout -v 5 kubectl delete -f tests/bats/specs/rc-shared-gpu.yaml 2> /dev/null +kubectl wait --for=delete pods -l 'env=batssuite,test=stress-shared' \ + --timeout=60s \ + || echo "wait-for-delete failed" + # TODO: maybe more brute-forcing/best-effort: it might make sense to submit all # workload in this test suite into a special namespace (not `default`), and to # then use `kubectl delete pods -n --all`. @@ -69,8 +76,10 @@ timeout -v 2 kubectl delete -f demo/specs/imex/simple-mig-test 2> /dev/null # Delete any previous remainder of `clean-state-dirs-all-nodes.sh` invocation. kubectl delete pods privpod-rm-plugindirs 2> /dev/null -timeout -v 5 helm uninstall nvidia-dra-driver-gpu-batssuite -n nvidia-dra-driver-gpu +# Make sure to wait till the chart is completely removed +helm uninstall nvidia-dra-driver-gpu-batssuite --wait -n nvidia-dra-driver-gpu +# Double check that the pods are deleted kubectl wait \ --for=delete pods -A \ -l app.kubernetes.io/name=nvidia-dra-driver-gpu \ diff --git a/tests/bats/specs/pods-shared-gpu.yaml b/tests/bats/specs/pods-shared-gpu.yaml new file mode 100644 index 000000000..396d61a5c --- /dev/null +++ b/tests/bats/specs/pods-shared-gpu.yaml @@ -0,0 +1,23 @@ +# Pod referencing the shared resource claim from rc-shared-gpu.yaml +# Test will create multiple pods using the spec below and updated INDEX. +--- +apiVersion: v1 +kind: Pod +metadata: + name: stress-pod-__INDEX__ + labels: + env: batssuite + test: stress-shared +spec: + restartPolicy: Never + containers: + - name: ctr + image: ubuntu:24.04 + command: ["bash","-lc"] + args: ["nvidia-smi -L; trap 'exit 0' TERM; sleep 9999 & wait"] + resources: + claims: + - name: gpu + resourceClaims: + - name: gpu + resourceClaimName: rc-shared-gpu diff --git a/tests/bats/specs/rc-shared-gpu.yaml b/tests/bats/specs/rc-shared-gpu.yaml new file mode 100644 index 000000000..89036df3d --- /dev/null +++ b/tests/bats/specs/rc-shared-gpu.yaml @@ -0,0 +1,14 @@ +# Shared GPU resource claim +apiVersion: resource.k8s.io/v1 +kind: ResourceClaim +metadata: + name: rc-shared-gpu + labels: + env: batssuite + test: stress-shared +spec: + devices: + requests: + - name: gpu + exactly: + deviceClassName: gpu.nvidia.com diff --git a/tests/bats/test_gpu_stress.bats b/tests/bats/test_gpu_stress.bats new file mode 100644 index 000000000..8c87534eb --- /dev/null +++ b/tests/bats/test_gpu_stress.bats @@ -0,0 +1,76 @@ +# shellcheck disable=SC2148 +# shellcheck disable=SC2329 + +: "${STRESS_PODS_N:=15}" +: "${STRESS_LOOPS:=5}" +: "${STRESS_DELAY:=30}" + +setup_file () { + load 'helpers.sh' + _common_setup + local _iargs=("--set" "logVerbosity=6") + iupgrade_wait "${TEST_CHART_REPO}" "${TEST_CHART_VERSION}" _iargs +} + +setup() { + load 'helpers.sh' + _common_setup + log_objects +} + +bats::on_failure() { + echo -e "\n\nFAILURE HOOK START" + log_objects + show_kubelet_plugin_error_logs + echo -e "FAILURE HOOK END\n\n" +} + +# Expand pod YAML with indexes +_generate_pods_manifest() { + local out="$1" + local template="tests/bats/specs/pods-shared-gpu.yaml" + : > "$out" + for i in $(seq 1 "${STRESS_PODS_N}"); do + sed "s/__INDEX__/${i}/g" "${template}" >> "$out" + echo "---" >> "$out" + done +} + +@test "Stress: shared ResourceClaim across ${STRESS_PODS_N} pods x ${STRESS_LOOPS} loops" { + for loop in $(seq 1 "${STRESS_LOOPS}"); do + echo "=== Loop $loop/${STRESS_LOOPS} ===" + + # Apply ResourceClaim + kubectl apply -f tests/bats/specs/rc-shared-gpu.yaml + + # Generate and apply pods spec + manifest="${BATS_TEST_TMPDIR:-/tmp}/pods-shared-${loop}.yaml" + _generate_pods_manifest "$manifest" + kubectl apply -f "$manifest" + + # Wait for ResourceClaim allocation + kubectl wait --for=jsonpath='{.status.allocation}' resourceclaim rc-shared-gpu --timeout=120s + + # Wait for all pods to be Ready + kubectl wait --for=condition=Ready pods -l 'env=batssuite,test=stress-shared' --timeout=180s + + # Verify pod phases + phases=$(kubectl get pods -l 'env=batssuite,test=stress-shared' -o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.phase}{"\n"}{end}') + echo "$phases" + echo "$phases" | awk '$2!="Running"{exit 1}' + + # Spot-check GPU allocation logs + run kubectl logs stress-pod-1 + assert_output --partial "UUID: GPU-" + + # Cleanup + kubectl delete pods -l 'env=batssuite,test=stress-shared' --timeout=90s + kubectl delete -f tests/bats/specs/rc-shared-gpu.yaml --timeout=90s + kubectl wait --for=delete pods -l 'env=batssuite,test=stress-shared' --timeout=60s + + if [[ "$loop" -lt "$STRESS_LOOPS" ]]; then + echo "Sleeping ${STRESS_DELAY}s before next loop..." + sleep "${STRESS_DELAY}" + fi + done +}