Skip to content

Commit d06fa8c

Browse files
authored
Merge pull request #117 from NVIDIA/e2etestandpushimage
enhance e2e test and publish image
2 parents c238934 + e5b4884 commit d06fa8c

File tree

5 files changed

+155
-47
lines changed

5 files changed

+155
-47
lines changed

.github/workflows/precompiled.yaml

Lines changed: 119 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,14 @@ jobs:
2525
outputs:
2626
driver_branch: ${{ steps.extract_driver_branch.outputs.driver_branch }}
2727
kernel_flavors: ${{ steps.extract_driver_branch.outputs.kernel_flavors }}
28+
dist: ${{ steps.extract_driver_branch.outputs.dist }}
2829
steps:
2930
- name: Checkout code
3031
uses: actions/checkout@v4
3132
- name: Read driver versions
3233
id: extract_driver_branch
3334
run: |
34-
# get driver-branch
35+
# get driver_branch
3536
DRIVER_BRANCH=("535" "550")
3637
driver_branch_json=$(printf '%s\n' "${DRIVER_BRANCH[@]}" | jq -R . | jq -cs .)
3738
echo "driver_branch=$driver_branch_json" >> $GITHUB_OUTPUT
@@ -41,13 +42,19 @@ jobs:
4142
kernel_flavors_json=$(printf '%s\n' "${KERNEL_FLAVORS[@]}" | jq -R . | jq -cs .)
4243
echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT
4344
44-
precompiled-image:
45+
# get ubuntu distributions
46+
DIST=("ubuntu22.04")
47+
dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .)
48+
echo "dist=$dist_json" >> $GITHUB_OUTPUT
49+
50+
precompiled-build-image:
4551
needs: set-driver-version-matrix
4652
runs-on: ubuntu-latest
4753
strategy:
4854
matrix:
49-
driver-branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }}
55+
driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }}
5056
flavor: ${{ fromJson(needs.set-driver-version-matrix.outputs.kernel_flavors) }}
57+
dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }}
5158
steps:
5259
- uses: actions/checkout@v4
5360
name: Check out code
@@ -59,7 +66,7 @@ jobs:
5966
REPO_FULL_NAME="${{ github.repository }}"
6067
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
6168
62-
GENERATE_ARTIFACTS="true"
69+
GENERATE_ARTIFACTS="false"
6370
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
6471
echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
6572
@@ -79,10 +86,10 @@ jobs:
7986
VERSION: ${COMMIT_SHORT_SHA}
8087
BASE_TARGET: jammy
8188
run: |
82-
make DRIVER_BRANCH=${{ matrix.driver-branch }} KERNEL_FLAVOR=${{ matrix.flavor }} build-base-${BASE_TARGET}
89+
make DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_FLAVOR=${{ matrix.flavor }} build-base-${BASE_TARGET}
8390
8491
trap "docker rm -f base-${BASE_TARGET}-${{ matrix.flavor }}" EXIT
85-
docker run -d --name base-${BASE_TARGET}-${{ matrix.flavor }} ghcr.io/nvidia/driver:base-${BASE_TARGET}-${{ matrix.flavor }}-${{ matrix.driver-branch }}
92+
docker run -d --name base-${BASE_TARGET}-${{ matrix.flavor }} ghcr.io/nvidia/driver:base-${BASE_TARGET}-${{ matrix.flavor }}-${{ matrix.driver_branch }}
8693
# try 3 times every 10 seconds to get the file, if success exit the loop
8794
for i in {1..3}; do
8895
docker cp base-${BASE_TARGET}-${{ matrix.flavor }}:/var/kernel_version.txt kernel_version.txt && break
@@ -93,19 +100,42 @@ jobs:
93100
IMAGE_NAME: ghcr.io/nvidia/driver
94101
VERSION: ${COMMIT_SHORT_SHA}
95102
PRECOMPILED: "true"
96-
DIST: signed_ubuntu22.04
103+
DIST: signed_${{ matrix.dist }}
97104
run: |
98105
source kernel_version.txt && \
99-
make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver-branch }} build-${DIST}-${DRIVER_VERSION}
106+
make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver_branch }} build-${DIST}-${DRIVER_VERSION}
107+
108+
- name: Save build image as a tar
109+
env:
110+
DIST: ${{ matrix.dist }}
111+
PRIVATE_REGISTRY: "ghcr.io"
112+
run: |
113+
source kernel_version.txt
114+
docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}" \
115+
-o ./driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar
116+
# set env for artifacts upload
117+
echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV
118+
echo "DIST=$DIST" >> $GITHUB_ENV
119+
120+
- name: Upload build image as an artifact
121+
uses: actions/upload-artifact@v4
122+
with:
123+
name: driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}
124+
path: ./driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar
125+
retention-days: 1
100126

101127
determine-e2e-test-matrix:
102128
runs-on: ubuntu-latest
129+
strategy:
130+
matrix:
131+
dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }}
103132
needs:
104-
- precompiled-image
133+
- precompiled-build-image
105134
- set-driver-version-matrix
106135
outputs:
107136
matrix_values_not_empty: ${{ steps.set_kernel_version.outputs.matrix_values_not_empty }}
108137
matrix_values: ${{ steps.set_kernel_version.outputs.matrix_values }}
138+
dist: ${{ steps.set-driver-version-matrix.outputs.dist }}
109139
steps:
110140
- name: Check out code
111141
uses: actions/checkout@v4
@@ -120,42 +150,27 @@ jobs:
120150
id: set_kernel_version
121151
env:
122152
BASE_TARGET: "jammy"
123-
DIST: "ubuntu22.04"
153+
DIST: ${{ matrix.dist }}
124154
run: |
125155
echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT
126156
127157
kernel_flavors_json='${{ needs.set-driver-version-matrix.outputs.kernel_flavors }}'
128-
kernel_flavors=$(echo "$kernel_flavors_json" | jq -r '.[]')
158+
KERNEL_FLAVORS=($(echo "$kernel_flavors_json" | jq -r '.[]'))
129159
driver_branch_json='${{ needs.set-driver-version-matrix.outputs.driver_branch }}'
130-
driver_branch=$(echo "$driver_branch_json" | jq -r '.[]')
131-
132-
kernel_versions=()
133-
for kernel_flavor in $kernel_flavors; do
134-
# FIXME -- remove if condition, once azure kernel upgrade starts working
135-
if [[ "$kernel_flavor" == "azure" ]]; then
136-
echo "skipping azure kernel testing"
137-
continue
138-
fi
139-
for DRIVER_BRANCH in $driver_branch; do
140-
source ./tests/scripts/findkernelversion.sh "$BASE_TARGET" "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST"
141-
if [[ "$should_continue" == true ]]; then
142-
echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT
143-
break
144-
fi
145-
done
146-
if [[ "$should_continue" == false ]]; then
147-
echo "Skipping e2e tests for the following driver tag: ${KERNEL_VERSION}-${kernel_flavor}-${DIST}"
148-
else
149-
KERNEL_VERSION=$(echo "$KERNEL_VERSION" | tr -d ' \n')
150-
kernel_versions+=("$KERNEL_VERSION")
151-
echo "Adding the following tag to the e2e test matrix: ${KERNEL_VERSION}-${kernel_flavor}-${DIST}"
152-
fi
153-
done
160+
DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]'))
154161
162+
source ./tests/scripts/ci-precompiled-helpers.sh
163+
KERNEL_VERSIONS=($(get_kernel_versions_to_test $BASE_TARGET KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST))
164+
if [ -z "$KERNEL_VERSIONS" ]; then
165+
# no new kernel release
166+
echo "Skipping e2e tests"
167+
exit 0
168+
fi
155169
# Convert array to JSON format and assign
156170
echo "[]" > $GITHUB_WORKSPACE/matrix_values.json
157-
printf '%s\n' "${kernel_versions[@]}" | jq -R . | jq -s . > $GITHUB_WORKSPACE/matrix_values.json
171+
printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > $GITHUB_WORKSPACE/matrix_values.json
158172
echo "matrix_values=$(cat $GITHUB_WORKSPACE/matrix_values.json | jq -c .)" >> $GITHUB_OUTPUT
173+
echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT
159174
160175
e2e-tests-nvidiadriver:
161176
runs-on: ubuntu-latest
@@ -166,9 +181,16 @@ jobs:
166181
strategy:
167182
matrix:
168183
kernel_version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }}
184+
dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }}
169185
steps:
170186
- name: Check out code
171187
uses: actions/checkout@v4
188+
- name: Login to GitHub Container Registry
189+
uses: docker/login-action@v3
190+
with:
191+
registry: ghcr.io
192+
username: ${{ github.actor }}
193+
password: ${{ secrets.GITHUB_TOKEN }}
172194
- name: Set up Holodeck
173195
uses: NVIDIA/[email protected]
174196
env:
@@ -195,6 +217,15 @@ jobs:
195217
echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV
196218
KERNEL_VERSION="${{ matrix.kernel_version }}"
197219
echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV
220+
echo "DIST=${{ matrix.dist }}" >> $GITHUB_ENV
221+
driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}"
222+
DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]'))
223+
echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV
224+
225+
- name: Install GitHub CLI
226+
run: |
227+
sudo apt-get update
228+
sudo apt-get install -y gh
198229
199230
- name: Upgrade the kernel for Precompiled e2e test
200231
env:
@@ -220,23 +251,29 @@ jobs:
220251
- name: Precompiled e2e test gpu driver validation
221252
env:
222253
TEST_CASE: "./tests/cases/nvidia-driver.sh"
223-
GPU_OPERATOR_OPTIONS: "--set driver.repository=${{ env.PRIVATE_REGISTRY }}/nvidia --set driver.usePrecompiled=true"
254+
GPU_OPERATOR_OPTIONS: "--set driver.repository=${{ env.PRIVATE_REGISTRY }}/nvidia --set driver.usePrecompiled=true \
255+
--set driver.imagePullPolicy=Never"
256+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
224257
run: |
225258
rc=0
226259
# for precompiled driver we are setting driver branch as driver version
227-
driver_versions_json='${{ needs.set-driver-version-matrix.outputs.driver_branch }}'
228-
driver_versions=$(echo "$driver_versions_json" | jq -r '.[]')
229-
for DRIVER_VERSION in $driver_versions; do
260+
DRIVER_BRANCHES=(${{ env.DRIVER_BRANCHES }})
261+
for DRIVER_VERSION in "${DRIVER_BRANCHES[@]}"; do
230262
echo "Running e2e for DRIVER_VERSION=$DRIVER_VERSION"
263+
image="driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}"
264+
echo "Downloading $image in tests directory"
265+
gh run download --name $image --dir ./tests/
231266
status=0
232-
OPERATOR_OPTIONS="${GPU_OPERATOR_OPTIONS} --set driver.version=${DRIVER_VERSION}"
267+
TEST_CASE_ARGS="${GPU_OPERATOR_OPTIONS} --set driver.version=${DRIVER_VERSION}"
233268
# add escape character for space
234-
OPERATOR_OPTIONS=$(printf '%q ' "$OPERATOR_OPTIONS")
235-
./tests/ci-run-e2e.sh "${TEST_CASE}" "${OPERATOR_OPTIONS}" || status=$?
269+
TEST_CASE_ARGS=$(printf '%q ' "$TEST_CASE_ARGS")
270+
IMAGE_PATH="./tests/driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}.tar"
271+
./tests/ci-run-e2e.sh "${TEST_CASE}" "${TEST_CASE_ARGS}" ${IMAGE_PATH} || status=$?
236272
if [ $status -eq 1 ]; then
237273
echo "e2e validation failed for driver version $DRIVER_VERSION with status $status"
238274
rc=$status
239275
fi
276+
rm -f $IMAGE_PATH
240277
done
241278
./tests/scripts/pull.sh /tmp/logs logs
242279
exit $rc
@@ -248,3 +285,42 @@ jobs:
248285
name: nvidiadriver-Precompiled-e2e-test-logs
249286
path: ./logs/
250287
retention-days: 15
288+
289+
publish-precompiled-image:
290+
runs-on: ubuntu-latest
291+
needs:
292+
- set-driver-version-matrix
293+
- determine-e2e-test-matrix
294+
- e2e-tests-nvidiadriver
295+
strategy:
296+
matrix:
297+
driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }}
298+
kernel_version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }}
299+
dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }}
300+
steps:
301+
- name: Check out code
302+
uses: actions/checkout@v4
303+
- name: Login to GitHub Container Registry
304+
uses: docker/login-action@v3
305+
with:
306+
registry: ghcr.io
307+
username: ${{ github.actor }}
308+
password: ${{ secrets.GITHUB_TOKEN }}
309+
310+
- name: Set image vars
311+
run: |
312+
echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV
313+
echo "DIST=${{ matrix.dist }}" >> $GITHUB_ENV
314+
315+
- name: Download built image artifact
316+
uses: actions/download-artifact@v4
317+
with:
318+
name: driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }}
319+
path: ./
320+
321+
- name: Publish image
322+
run: |
323+
image_path="./driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }}.tar"
324+
echo "uploading $image_path"
325+
docker load -i $image_path
326+
docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }}

tests/cases/nvidia-driver.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ fi
88

99
# export gpu-operator options
1010
export TEST_CASE_ARGS="$1"
11+
if [[ $# -eq 2 ]]; then
12+
export IMAGE_PATH="$2"
13+
sudo ctr -n k8s.io images import "$IMAGE_PATH"
14+
fi
1115

1216
SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../scripts && pwd )"
1317
source "${SCRIPTS_DIR}"/.definitions.sh

tests/ci-run-e2e.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
set -xe
44

5-
if [[ $# -ne 2 ]]; then
5+
if [[ $# -lt 2 ]]; then
66
echo "TEST_CASE TEST_CASE_ARGS are required"
77
exit 1
88
fi
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
get_kernel_versions_to_test() {
2+
if [[ "$#" -ne 4 ]]; then
3+
echo " Error:$0 must be called with BASE_TARGET DRIVER_BRANCHES DRIVER_BRANCHES DIST" >&2
4+
exit 1
5+
fi
6+
7+
local BASE_TARGET="$1"
8+
local -a KERNEL_FLAVORS=("${!2}")
9+
local -a DRIVER_BRANCHES=("${!3}")
10+
local DIST="$4"
11+
12+
kernel_versions=()
13+
for kernel_flavor in "${KERNEL_FLAVORS[@]}"; do
14+
# FIXME -- remove if condition, once azure kernel upgrade starts working
15+
if [[ "$kernel_flavor" == "azure" ]]; then
16+
continue
17+
fi
18+
for DRIVER_BRANCH in "${DRIVER_BRANCHES[@]}"; do
19+
source ./tests/scripts/findkernelversion.sh "$BASE_TARGET" "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" >&2
20+
if [[ "$should_continue" == true ]]; then
21+
break
22+
fi
23+
done
24+
if [[ "$should_continue" == true ]]; then
25+
KERNEL_VERSION=$(echo "$KERNEL_VERSION" | tr -d ' \n')
26+
kernel_versions+=("$KERNEL_VERSION")
27+
fi
28+
done
29+
echo "${kernel_versions[@]}"
30+
}

tests/scripts/findkernelversion.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export KERNEL_FLAVOR="${2}"
1010
export DRIVER_BRANCH="${3}"
1111
export DIST="${4}"
1212

13-
export REGCTL_VERSION=v0.4.7
13+
export REGCTL_VERSION=v0.7.1
1414
mkdir -p bin
1515
curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
1616
chmod a+x bin/regctl
@@ -22,8 +22,6 @@ export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt)
2222

2323
# calculate driver tag
2424
status=0
25-
echo "regctl tag ls nvcr.io/nvidia/driver | grep "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$""
26-
2725
regctl tag ls nvcr.io/nvidia/driver | grep "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$" || status=$?
2826
if [[ $status -eq 0 ]]; then
2927
export should_continue=false

0 commit comments

Comments
 (0)