Skip to content

Commit 13fc531

Browse files
committed
Test Not for review holodeck kernel version change
1 parent 5c8db7d commit 13fc531

File tree

8 files changed

+64
-139
lines changed

8 files changed

+64
-139
lines changed

.github/workflows/image.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ on:
2121
- opened
2222
- synchronize
2323
branches:
24-
- main
25-
- release-*
24+
- main-no
25+
- release-no
2626
push:
2727
branches:
28-
- main
29-
- release-*
28+
- main-no
29+
- release-no
3030

3131
jobs:
3232
image:

.github/workflows/precompiled.yaml

Lines changed: 44 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,15 @@
1616
name: Precompiled images
1717

1818
on:
19-
schedule:
20-
- cron: '00 09 * * *'
19+
pull_request:
20+
types:
21+
- opened
22+
- synchronize
23+
branches:
24+
- test-holodeck
25+
push:
26+
branches:
27+
- test-holodeck
2128

2229
jobs:
2330
set-driver-version-matrix:
@@ -40,16 +47,19 @@ jobs:
4047
4148
# get kernel flavors
4249
KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle")
50+
# KERNEL_FLAVORS=("oracle")
4351
kernel_flavors_json=$(printf '%s\n' "${KERNEL_FLAVORS[@]}" | jq -R . | jq -cs .)
4452
echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT
4553
4654
# get ubuntu distributions
4755
DIST=("ubuntu22.04" "ubuntu24.04")
56+
# DIST=("ubuntu24.04")
4857
dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .)
4958
echo "dist=$dist_json" >> $GITHUB_OUTPUT
5059
5160
# LTS_KERNEL setup
5261
LTS_KERNEL=("5.15" "6.8")
62+
# LTS_KERNEL=("6.8")
5363
lts_kernel_json=$(printf '%s\n' "${LTS_KERNEL[@]}" | jq -R . | jq -cs .)
5464
echo "lts_kernel=$lts_kernel_json" >> $GITHUB_OUTPUT
5565
@@ -273,18 +283,18 @@ jobs:
273283
274284
# slack notification for new kernel release before e2e tests starts
275285
# as e2e tests may fail for new kernel release
276-
- name: Slack notification
277-
if: steps.set_kernel_version.outputs.matrix_values_not_empty == '1'
278-
uses: slackapi/[email protected]
279-
with:
280-
token: ${{ secrets.SLACK_BOT_TOKEN }}
281-
method: chat.postMessage
282-
payload: |
283-
{
284-
"channel": "${{ secrets.SLACK_CHANNEL_ID }}",
285-
"text": "${{ secrets.DETECTED_PRECOMPILED_KERNEL_MESSAGE }} ${{ steps.set_kernel_version.outputs.published_kernels }} \n
286-
Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
287-
}
286+
# - name: Slack notification
287+
# if: steps.set_kernel_version.outputs.matrix_values_not_empty == '1'
288+
# uses: slackapi/[email protected]
289+
# with:
290+
# token: ${{ secrets.SLACK_BOT_TOKEN }}
291+
# method: chat.postMessage
292+
# payload: |
293+
# {
294+
# "channel": "${{ secrets.SLACK_CHANNEL_ID }}",
295+
# "text": "${{ secrets.DETECTED_PRECOMPILED_KERNEL_MESSAGE }} ${{ steps.set_kernel_version.outputs.published_kernels }} \n
296+
# Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
297+
# }
288298

289299
e2e-tests-nvidiadriver:
290300
runs-on: linux-amd64-cpu4
@@ -319,9 +329,12 @@ jobs:
319329
driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}"
320330
DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]'))
321331
echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV
332+
- name: Set kernel version in holodeck_${{ env.DIST }}.yaml
333+
run: |
334+
yq e '. + {"kernel": {"version": strenv(KERNEL_VERSION)}}' -i tests/holodeck_${{ env.DIST }}.yaml
322335
323336
- name: Set up Holodeck
324-
uses: NVIDIA/[email protected].6
337+
uses: NVIDIA/[email protected].15
325338
env:
326339
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
327340
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -346,27 +359,6 @@ jobs:
346359
sudo apt-get update
347360
sudo apt-get install -y gh
348361
349-
- name: Upgrade the kernel for Precompiled e2e test
350-
env:
351-
UPGRADE_KERNEL_SCRIPT: "./tests/scripts/upgrade-kernel.sh"
352-
run: |
353-
status=0
354-
./tests/ci-remote-exec.sh "${UPGRADE_KERNEL_SCRIPT}" "${KERNEL_VERSION}" || status=$?
355-
# On the target system, all scripts/test-case exit with code 1 for error handling.
356-
# However, since reboot-related disconnections break the SSH connection
357-
# and can cause the entire job to exit, we should ignore all errors except
358-
# exit code 1. During a reboot, exit code 1 will not be thrown, so handling
359-
# other errors as code 1 will ensure proper management of reboot scenarios
360-
if [ $status -eq 1 ]; then
361-
echo "Kernel version $KERNEL_VERSION upgrade failed"
362-
exit 1
363-
fi
364-
./tests/scripts/remote_retry.sh || status=$?
365-
if [ $status -ne 0 ]; then
366-
echo "Failed to connect to remote instance"
367-
exit $status
368-
fi
369-
370362
- name: Precompiled e2e test gpu driver validation
371363
env:
372364
TEST_CASE: "./tests/cases/nvidia-driver.sh"
@@ -455,8 +447,8 @@ jobs:
455447
fi
456448
image_path="./base-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}.tar"
457449
echo "uploading $image_path"
458-
docker load -i $image_path
459-
docker push ${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${KERNEL_FLAVOR}-${{ matrix.driver_branch }}
450+
# docker load -i $image_path
451+
# docker push ${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${KERNEL_FLAVOR}-${{ matrix.driver_branch }}
460452
461453
- name: Download built image artifact
462454
if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }}
@@ -470,17 +462,17 @@ jobs:
470462
run: |
471463
image_path="./driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}.tar"
472464
echo "uploading $image_path"
473-
docker load -i $image_path
474-
docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}
475-
476-
- name: Slack notification
477-
if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }}
478-
uses: slackapi/[email protected]
479-
with:
480-
token: ${{ secrets.SLACK_BOT_TOKEN }}
481-
method: chat.postMessage
482-
payload: |
483-
{
484-
"channel": "${{ secrets.SLACK_CHANNEL_ID }}",
485-
"text": "${{ secrets.PUBLISHED_PRECOMPILED_IMAGE_MESSAGE }} ${{ env.PRIVATE_REGISTRY }}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}"
486-
}
465+
# docker load -i $image_path
466+
# docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}
467+
468+
# - name: Slack notification
469+
# if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }}
470+
# uses: slackapi/[email protected]
471+
# with:
472+
# token: ${{ secrets.SLACK_BOT_TOKEN }}
473+
# method: chat.postMessage
474+
# payload: |
475+
# {
476+
# "channel": "${{ secrets.SLACK_CHANNEL_ID }}",
477+
# "text": "${{ secrets.PUBLISHED_PRECOMPILED_IMAGE_MESSAGE }} ${{ env.PRIVATE_REGISTRY }}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}"
478+
# }

tests/holodeck_ubuntu22.04.yaml

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ spec:
1212
type: g4dn.xlarge
1313
region: us-west-1
1414
ingressIpRanges:
15-
- 18.190.12.32/32
16-
- 3.143.46.93/32
17-
- 52.15.119.136/32
18-
- 35.155.108.162/32
19-
- 35.162.190.51/32
20-
- 54.201.61.24/32
21-
- 52.24.205.48/32
22-
- 44.235.4.62/32
23-
- 44.230.241.223/32
15+
- 18.190.12.32/32
16+
- 3.143.46.93/32
17+
- 52.15.119.136/32
18+
- 35.155.108.162/32
19+
- 35.162.190.51/32
20+
- 54.201.61.24/32
21+
- 52.24.205.48/32
22+
- 44.235.4.62/32
23+
- 44.230.241.223/32
2424
image:
2525
architecture: amd64
2626
imageId: ami-0ce2cb35386fc22e9
@@ -30,5 +30,6 @@ spec:
3030
kubernetes:
3131
install: true
3232
installer: kubeadm
33-
version: v1.28.5
34-
crictlVersion: v1.28.0
33+
version: v1.31.1
34+
crictlVersion: v1.31.1
35+

tests/holodeck_ubuntu24.04.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,8 @@ spec:
2727
containerRuntime:
2828
install: true
2929
name: containerd
30-
version: 1.7.22
3130
kubernetes:
3231
install: true
3332
installer: kubeadm
34-
version: v1.30.0
35-
crictlVersion: v1.30.0
33+
version: v1.31.1
34+
crictlVersion: v1.31.1

tests/scripts/checks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ check_pod_ready() {
77
echo "Checking $pod_label pod"
88

99
kubectl get pods -lapp=$pod_label -n ${TEST_NAMESPACE}
10-
10+
1111
echo "Checking $pod_label pod readiness"
1212

1313
if kubectl wait -n ${TEST_NAMESPACE} --for=condition=Ready pod -l app=$pod_label --timeout ${pod_status_time_out}; then

tests/scripts/findkernelversion.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@ if [[ $status_nvcr -eq 0 || $status_ghcr -eq 0 ]]; then
4242
else
4343
export should_continue=true
4444
fi
45+
export should_continue=true

tests/scripts/kernel-upgrade-helper.sh

Lines changed: 0 additions & 54 deletions
This file was deleted.

tests/scripts/upgrade-kernel.sh

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)