Skip to content

Commit adf0b8e

Browse files
authored
Merge pull request #600 from jgehrcke/jp/bats-for-local-dev
bats-tests: add TEST_CHART_LOCAL, add tests, adjust readme
2 parents 5104942 + 2c8eafa commit adf0b8e

File tree

5 files changed

+104
-44
lines changed

5 files changed

+104
-44
lines changed

.dockerignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ deployments/container
33
*.tar
44
*.tgz
55
demo
6-
.git
6+
.git
7+
tests-out

tests/bats/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ FROM debian:trixie
33
# GNU parallel: bats may want to use that
44
# gettext-base: provides envsubst, used by nickelpie
55
RUN apt-get update && apt-get install -y -q --no-install-recommends \
6-
parallel git ca-certificates curl make gettext-base && \
6+
parallel git ca-certificates curl make gettext-base jq && \
77
rm -rf /var/lib/apt/lists/*
88

99
# Set by BuiltKit, of the form amd64/arm64.

tests/bats/Makefile

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,45 @@
1616
include $(CURDIR)/versions.mk
1717
include $(CURDIR)/common.mk
1818

19-
BATS_IMAGE = batstests:$(GIT_COMMIT_SHORT)
2019

21-
KUBECONFIG ?= $(HOME)/.kube/config
20+
# The to-be-tested Helm chart. Ignored when setting TEST_CHART_LOCAL.
2221
TEST_CHART_REPO ?= "oci://ghcr.io/nvidia/k8s-dra-driver-gpu"
23-
TEST_CHART_VERSION ?= $(VERSION_GHCR_CHART)
22+
TEST_CHART_VERSION ?= "$(VERSION_GHCR_CHART)"
23+
24+
# The baseline Helm chart to test upgrades from and downgrades to.
2425
TEST_CHART_LASTSTABLE_REPO ?= "oci://ghcr.io/nvidia/k8s-dra-driver-gpu"
2526
TEST_CHART_LASTSTABLE_VERSION ?= "25.3.2-2c250af3-chart"
27+
28+
# If not "false": the to-be-tested Helm chart is installed from the local
29+
# filesystem (from `deployments/helm/nvidia-dra-driver-gpu`). Make sure
30+
# (out-of-band) that the container images that the chart refers to are available
31+
# (placed directly on the involved nodes or pullable). This is a convenince
32+
# parameter for setting
33+
#
34+
# TEST_CHART_REPO="deployments/helm/nvidia-dra-driver-gpu/"
35+
# TEST_CHART_VERSION="$(make print-VERSION)" (but w/o v prefix)
36+
#
37+
TEST_CHART_LOCAL ?= "false"
38+
39+
# Consumed in upgrade test via kubectl apply -f <URL>
40+
# (can be a branch, tag, or commit). TODO: parse default
41+
# from `TEST_CHART_VERSION`.
42+
TEST_CRD_UPGRADE_TARGET_GIT_REF ?= "main"
43+
2644
TEST_NVIDIA_DRIVER_ROOT ?= "/run/nvidia/driver"
2745

28-
# Currently consumed in upgrade test via
29-
# kubectl apply -f <URL> (can be a branch, tag, or commit)
30-
TEST_CRD_UPGRADE_TARGET_GIT_REF ?= $(GIT_COMMIT_SHORT)
46+
# TODO: calculate precise, expected container image spec,
47+
# and test for that in test suite, for example:
48+
# `nvcr.io/nvidia/k8s-dra-driver-gpu:v25.8.0-dev`
49+
TEST_EXPECTED_IMAGE_SPEC_SUBSTRING ?= $(VERSION)
50+
51+
ifneq ($(TEST_CHART_LOCAL),"false")
52+
TEST_CHART_REPO = "deployments/helm/nvidia-dra-driver-gpu/"
53+
TEST_CHART_VERSION = $(VERSION:v%=%)
54+
endif
55+
56+
BATS_IMAGE = batstests:$(GIT_COMMIT_SHORT)
57+
KUBECONFIG ?= $(HOME)/.kube/config
3158

3259
default: tests
3360

@@ -57,6 +84,7 @@ tests: image
5784
--env TEST_CHART_LASTSTABLE_VERSION=$(TEST_CHART_LASTSTABLE_VERSION) \
5885
--env TEST_CRD_UPGRADE_TARGET_GIT_REF=$(TEST_CRD_UPGRADE_TARGET_GIT_REF) \
5986
--env TEST_NVIDIA_DRIVER_ROOT=$(TEST_NVIDIA_DRIVER_ROOT) \
87+
--env TEST_EXPECTED_IMAGE_SPEC_SUBSTRING=$(TEST_EXPECTED_IMAGE_SPEC_SUBSTRING) \
6088
-u $(shell id -u ${USER}):$(shell id -g ${USER}) \
6189
--entrypoint "/bin/bash"\
6290
$(BATS_IMAGE) \

tests/bats/README.md

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,70 +2,72 @@
22

33
## Usage
44

5-
Invoke `make bats` in the root of this repository.
5+
Review the `TEST_*` variables [at around the top of the Makefile](https://github.com/NVIDIA/k8s-dra-driver-gpu/blob/main/tests/bats/Makefile#L22). Most of them can be overridden via environment.
6+
Use this configuration interface to customize your test run.
67

8+
Then invoke `make bats` in the root of the repository.
79

8-
### Test local dev state (artifacts not pushed)
10+
Some examples are shown below.
911

10-
Not yet supported.
11-
Let's change this ASAP.
12+
### Test a specific GHCR chart version
1213

13-
This test suite for now assumes public availability of a Helm chart on GHCR or NGC, pointing to a container image publicly available on GHCR or NGC.
14+
Example:
1415

15-
### Test Helm chart from registery
16+
```console
17+
$ export TEST_CHART_VERSION="25.8.0-dev-b823882b-chart"
18+
$ make bats
19+
...
20+
12 tests, 0 failures in 166 seconds
21+
```
1622

17-
#### Default versions
23+
Note: by default, the test suite assumes availability of a Helm chart on `oci://ghcr.io/nvidia/k8s-dra-driver-gpu`, pointing to a container image also publicly available in that registry.
1824

19-
Say, this is the current local git revision:
2025

21-
```console
22-
$ git rev-parse --short=8 HEAD
23-
e6e1dde4
24-
```
26+
### Test local dev state (artifacts not pushed)
27+
28+
To test the Helm chart currently specified in `deployments/helm/nvidia-dra-driver-gpu` in the local checkout, run
2529

26-
Then the test suite runs with the default configuration, for example:
2730
```console
28-
$ make bats
29-
...
30-
--env TEST_CHART_REPO="oci://ghcr.io/nvidia/k8s-dra-driver-gpu" \
31-
--env TEST_CHART_VERSION=25.8.0-dev-e6e1dde4-chart \
32-
--env TEST_CHART_LASTSTABLE_REPO="oci://ghcr.io/nvidia/k8s-dra-driver-gpu" \
33-
--env TEST_CHART_LASTSTABLE_VERSION="25.3.2-7020737a-chart" \
34-
--env TEST_CRD_UPGRADE_TARGET_GIT_REF=e6e1dde4 \
35-
...
36-
12 tests, 0 failures in 166 seconds
31+
TEST_CHART_LOCAL=1 make bats
3732
```
3833

39-
As you can see, this currently requires a Helm chart corresponding to the local revision to be available on GHCR.
34+
This overrides `TEST_CHART_REPO` and `TEST_CHART_VERSION`.
4035

41-
#### Test specific versions
36+
Make sure (out-of-band) that the container images that the local chart refers to are available to all nodes in the Kubernetes cluster -- placed directly (TODO: how-to) or pullable.
4237

43-
Set the correponding `TEST_*` environment variables before invoking the Makefile target.
38+
### Defaults
4439

45-
For example:
40+
By default, `make bats` tries to install a Helm chart from `oci://ghcr.io/nvidia/k8s-dra-driver-gpu` corresponding to the git revision of the local checkout:
4641

4742
```console
48-
$ export TEST_CHART_VERSION="25.8.0-dev-b823882b-chart"
49-
$ export TEST_CRD_UPGRADE_TARGET_GIT_REF="main"
43+
$ git rev-parse --short=8 HEAD
44+
e6e1dde4
5045
$ make bats
5146
...
52-
12 tests, 0 failures in 166 seconds
47+
--env TEST_CHART_REPO="oci://ghcr.io/nvidia/k8s-dra-driver-gpu" \
48+
--env TEST_CHART_VERSION=25.8.0-dev-e6e1dde4-chart \
49+
--env TEST_CRD_UPGRADE_TARGET_GIT_REF=e6e1dde4 \
50+
...
5351
```
5452

53+
That's CI-oriented.
54+
We may want to change that.
55+
5556

5657
## Development
5758

5859
Bats is a workable solution.
5960
Developing new tests might however probe your patience.
6061
Make wise usage of
6162

63+
* bats' [`run`](https://bats-core.readthedocs.io/en/stable/writing-tests.html#run-test-other-commands) command.
6264
* [skipping tests](https://bats-core.readthedocs.io/en/stable/writing-tests.html#skip-easily-skip-tests)
6365
* [tagging tests with `bats:focus`](https://bats-core.readthedocs.io/en/stable/writing-tests.html#special-tags)
6466
* [CLI args](https://bats-core.readthedocs.io/en/stable/usage.html) such as `--verbose-run`, `--show-output-of-passing-tests`.
6567

68+
Misc notes:
6669

67-
Also, familiarize yourself with bat's [`run`](https://bats-core.readthedocs.io/en/stable/writing-tests.html#run-test-other-commands) command.
68-
69-
Don't skip the section about when [not to use `run`](https://bats-core.readthedocs.io/en/stable/writing-tests.html#when-not-to-use-run).
70-
71-
Take inspiration from [cri-o tests](https://github.com/cri-o/cri-o/tree/81e69a58c7e6ec8699b3bdd8696b1d0e25e32bfb/test).
70+
* Don't skip the section about when [not to use `run`](https://bats-core.readthedocs.io/en/stable/writing-tests.html#when-not-to-use-run).
71+
* Take inspiration from [cri-o tests](https://github.com/cri-o/cri-o/tree/81e69a58c7e6ec8699b3bdd8696b1d0e25e32bfb/test).
72+
* Stop test suite on first failure? No first-class support in bats. See [this](https://github.com/bats-core/bats-core/issues/209) discussion.
73+
* We can and should radically iterate on the test suite's config interface to satisfy our needs.

tests/bats/tests.bats

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ setup_file() {
4848
# Install or upgrade, and wait for pods to be READY.
4949
# 1st arg: helm chart repo
5050
# 2nd arg: helm chart version
51-
# 3rd arg: array with additional args (provide `NOARGS`` if none)
51+
# 3rd arg: array with additional args (provide `NOARGS` if none)
5252
iupgrade_wait() {
5353
# E.g. `nvidia/nvidia-dra-driver-gpu` or
5454
# `oci://ghcr.io/nvidia/k8s-dra-driver-gpu`
@@ -83,7 +83,9 @@ apply_check_delete_workload_imex_chan_inject() {
8383

8484
# A test that covers local dev tooling, we don't want to
8585
# unintentionally change/break these targets.
86-
@test "test VERSION_W_COMMIT, VERSION_GHCR_CHART" {
86+
@test "test VERSION_W_COMMIT, VERSION_GHCR_CHART, VERSION" {
87+
run make print-VERSION
88+
assert_output --regexp '^v[0-9]+\.[0-9]+\.[0-9]+-dev$'
8789
run make print-VERSION_W_COMMIT
8890
assert_output --regexp '^v[0-9]+\.[0-9]+\.[0-9]+-dev-[0-9a-f]{8}$'
8991
run make print-VERSION_GHCR_CHART
@@ -101,6 +103,17 @@ apply_check_delete_workload_imex_chan_inject() {
101103
iupgrade_wait "${TEST_CHART_REPO}" "${TEST_CHART_VERSION}" _iargs
102104
}
103105

106+
@test "helm list: validate output" {
107+
# Sanity check: one chart installed.
108+
helm list -n nvidia-dra-driver-gpu -o json | jq 'length == 1'
109+
110+
# Confirm consistency between the various version-related parameters. Note
111+
# that the --version arg provided to `helm install/upgrade` does not directly
112+
# set app_version; it is just a version constraint. `app_version` tested here
113+
# is AFAIU defined solely by the chart's appVersion YAML spec.
114+
helm list -n nvidia-dra-driver-gpu -o json | jq '.[].app_version' | grep "${TEST_CHART_VERSION}"
115+
}
116+
104117
@test "get crd computedomains.resource.nvidia.com" {
105118
kubectl get crd computedomains.resource.nvidia.com
106119
}
@@ -115,6 +128,22 @@ apply_check_delete_workload_imex_chan_inject() {
115128
-l nvidia-dra-driver-gpu-component=controller --timeout=10s
116129
}
117130

131+
@test "validate CD controller container image spec" {
132+
local ACTUAL_IMAGE_SPEC
133+
ACTUAL_IMAGE_SPEC=$(kubectl get pod \
134+
-n nvidia-dra-driver-gpu \
135+
-l nvidia-dra-driver-gpu-component=controller \
136+
-o json | \
137+
jq -r '.items[].spec.containers[] | select(.name=="compute-domain") | .image')
138+
139+
# Emit once, unfiltered, for debuggability
140+
echo "$ACTUAL_IMAGE_SPEC"
141+
142+
# Confirm substring; TODO: make tighter with precise
143+
# TEST_EXPECTED_IMAGE_SPEC_SUBSTRING
144+
echo "$ACTUAL_IMAGE_SPEC" | grep "${TEST_EXPECTED_IMAGE_SPEC_SUBSTRING}"
145+
}
146+
118147
@test "IMEX channel injection (single)" {
119148
apply_check_delete_workload_imex_chan_inject
120149
}

0 commit comments

Comments
 (0)