Skip to content

Commit a7a73bf

Browse files
committed
Test Not for review holodeck kernel version change
1 parent 5c8db7d commit a7a73bf

File tree

8 files changed

+61
-127
lines changed

8 files changed

+61
-127
lines changed

.github/workflows/image.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ on:
2121
- opened
2222
- synchronize
2323
branches:
24-
- main
25-
- release-*
24+
- main-no
25+
- release-no
2626
push:
2727
branches:
28-
- main
29-
- release-*
28+
- main-no
29+
- release-no
3030

3131
jobs:
3232
image:

.github/workflows/precompiled.yaml

Lines changed: 45 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,15 @@
1616
name: Precompiled images
1717

1818
on:
19-
schedule:
20-
- cron: '00 09 * * *'
19+
pull_request:
20+
types:
21+
- opened
22+
- synchronize
23+
branches:
24+
- test-holodeck
25+
push:
26+
branches:
27+
- test-holodeck
2128

2229
jobs:
2330
set-driver-version-matrix:
@@ -40,16 +47,19 @@ jobs:
4047
4148
# get kernel flavors
4249
KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle")
50+
# KERNEL_FLAVORS=("generic")
4351
kernel_flavors_json=$(printf '%s\n' "${KERNEL_FLAVORS[@]}" | jq -R . | jq -cs .)
4452
echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT
4553
4654
# get ubuntu distributions
4755
DIST=("ubuntu22.04" "ubuntu24.04")
56+
# DIST=("ubuntu22.04")
4857
dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .)
4958
echo "dist=$dist_json" >> $GITHUB_OUTPUT
5059
5160
# LTS_KERNEL setup
52-
LTS_KERNEL=("5.15" "6.8")
61+
# LTS_KERNEL=("5.15" "6.8")
62+
LTS_KERNEL=("6.8")
5363
lts_kernel_json=$(printf '%s\n' "${LTS_KERNEL[@]}" | jq -R . | jq -cs .)
5464
echo "lts_kernel=$lts_kernel_json" >> $GITHUB_OUTPUT
5565
@@ -273,18 +283,18 @@ jobs:
273283
274284
# slack notification for new kernel release before e2e tests starts
275285
# as e2e tests may fail for new kernel release
276-
- name: Slack notification
277-
if: steps.set_kernel_version.outputs.matrix_values_not_empty == '1'
278-
uses: slackapi/[email protected]
279-
with:
280-
token: ${{ secrets.SLACK_BOT_TOKEN }}
281-
method: chat.postMessage
282-
payload: |
283-
{
284-
"channel": "${{ secrets.SLACK_CHANNEL_ID }}",
285-
"text": "${{ secrets.DETECTED_PRECOMPILED_KERNEL_MESSAGE }} ${{ steps.set_kernel_version.outputs.published_kernels }} \n
286-
Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
287-
}
286+
# - name: Slack notification
287+
# if: steps.set_kernel_version.outputs.matrix_values_not_empty == '1'
288+
# uses: slackapi/[email protected]
289+
# with:
290+
# token: ${{ secrets.SLACK_BOT_TOKEN }}
291+
# method: chat.postMessage
292+
# payload: |
293+
# {
294+
# "channel": "${{ secrets.SLACK_CHANNEL_ID }}",
295+
# "text": "${{ secrets.DETECTED_PRECOMPILED_KERNEL_MESSAGE }} ${{ steps.set_kernel_version.outputs.published_kernels }} \n
296+
# Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
297+
# }
288298

289299
e2e-tests-nvidiadriver:
290300
runs-on: linux-amd64-cpu4
@@ -319,9 +329,12 @@ jobs:
319329
driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}"
320330
DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]'))
321331
echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV
332+
- name: Set kernel version in holodeck_${{ env.DIST }}.yaml
333+
run: |
334+
yq e '.spec.kernel.version = "${{ env.KERNEL_VERSION }}"' -i tests/holodeck_${{ env.DIST }}.yaml
322335
323336
- name: Set up Holodeck
324-
uses: NVIDIA/[email protected].6
337+
uses: NVIDIA/[email protected].15
325338
env:
326339
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
327340
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -346,27 +359,6 @@ jobs:
346359
sudo apt-get update
347360
sudo apt-get install -y gh
348361
349-
- name: Upgrade the kernel for Precompiled e2e test
350-
env:
351-
UPGRADE_KERNEL_SCRIPT: "./tests/scripts/upgrade-kernel.sh"
352-
run: |
353-
status=0
354-
./tests/ci-remote-exec.sh "${UPGRADE_KERNEL_SCRIPT}" "${KERNEL_VERSION}" || status=$?
355-
# On the target system, all scripts/test-case exit with code 1 for error handling.
356-
# However, since reboot-related disconnections break the SSH connection
357-
# and can cause the entire job to exit, we should ignore all errors except
358-
# exit code 1. During a reboot, exit code 1 will not be thrown, so handling
359-
# other errors as code 1 will ensure proper management of reboot scenarios
360-
if [ $status -eq 1 ]; then
361-
echo "Kernel version $KERNEL_VERSION upgrade failed"
362-
exit 1
363-
fi
364-
./tests/scripts/remote_retry.sh || status=$?
365-
if [ $status -ne 0 ]; then
366-
echo "Failed to connect to remote instance"
367-
exit $status
368-
fi
369-
370362
- name: Precompiled e2e test gpu driver validation
371363
env:
372364
TEST_CASE: "./tests/cases/nvidia-driver.sh"
@@ -455,8 +447,8 @@ jobs:
455447
fi
456448
image_path="./base-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}.tar"
457449
echo "uploading $image_path"
458-
docker load -i $image_path
459-
docker push ${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${KERNEL_FLAVOR}-${{ matrix.driver_branch }}
450+
# docker load -i $image_path
451+
# docker push ${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${KERNEL_FLAVOR}-${{ matrix.driver_branch }}
460452
461453
- name: Download built image artifact
462454
if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }}
@@ -470,17 +462,17 @@ jobs:
470462
run: |
471463
image_path="./driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}.tar"
472464
echo "uploading $image_path"
473-
docker load -i $image_path
474-
docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}
475-
476-
- name: Slack notification
477-
if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }}
478-
uses: slackapi/[email protected]
479-
with:
480-
token: ${{ secrets.SLACK_BOT_TOKEN }}
481-
method: chat.postMessage
482-
payload: |
483-
{
484-
"channel": "${{ secrets.SLACK_CHANNEL_ID }}",
485-
"text": "${{ secrets.PUBLISHED_PRECOMPILED_IMAGE_MESSAGE }} ${{ env.PRIVATE_REGISTRY }}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}"
486-
}
465+
# docker load -i $image_path
466+
# docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}
467+
468+
# - name: Slack notification
469+
# if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }}
470+
# uses: slackapi/[email protected]
471+
# with:
472+
# token: ${{ secrets.SLACK_BOT_TOKEN }}
473+
# method: chat.postMessage
474+
# payload: |
475+
# {
476+
# "channel": "${{ secrets.SLACK_CHANNEL_ID }}",
477+
# "text": "${{ secrets.PUBLISHED_PRECOMPILED_IMAGE_MESSAGE }} ${{ env.PRIVATE_REGISTRY }}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}"
478+
# }

tests/holodeck_ubuntu22.04.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,7 @@ spec:
3030
kubernetes:
3131
install: true
3232
installer: kubeadm
33-
version: v1.28.5
34-
crictlVersion: v1.28.0
33+
version: v1.31.1
34+
crictlVersion: v1.31.1
35+
kernel:
36+
version: ${{ env.KERNEL_VERSION }}

tests/holodeck_ubuntu24.04.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ spec:
2424
image:
2525
architecture: amd64
2626
imageId: ami-0da424eb883458071
27+
containerdConfigPatches:
28+
- |-
29+
[plugins."io.containerd.grpc.v1.cri".registry]
30+
config_path = "/etc/containerd/certs.d"
2731
containerRuntime:
2832
install: true
2933
name: containerd
@@ -33,3 +37,5 @@ spec:
3337
installer: kubeadm
3438
version: v1.30.0
3539
crictlVersion: v1.30.0
40+
kernel:
41+
version:${{ env.KERNEL_VERSION }}

tests/scripts/findkernelversion.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@ if [[ $status_nvcr -eq 0 || $status_ghcr -eq 0 ]]; then
4242
else
4343
export should_continue=true
4444
fi
45+
export should_continue=true

tests/scripts/install-operator.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ kubectl create namespace "${TEST_NAMESPACE}"
2222

2323
# Run the helm install command
2424
echo "OPERATOR_OPTIONS: ${OPERATOR_OPTIONS}"
25+
2526
eval ${HELM} install gpu-operator nvidia/gpu-operator \
2627
-n "${TEST_NAMESPACE}" \
2728
"${OPERATOR_OPTIONS}" \

tests/scripts/kernel-upgrade-helper.sh

Lines changed: 0 additions & 54 deletions
This file was deleted.

tests/scripts/upgrade-kernel.sh

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)