Skip to content

Commit 270c7b8

Browse files
authored
feat: add test cases to check vGPU license status (#696)
* feat: add test cases to check vGPU license status
1 parent 978f8b7 commit 270c7b8

28 files changed

+82
-1
lines changed

test/cases/nvidia/unit_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
_ "embed"
88
"fmt"
99
"testing"
10+
"time"
1011

1112
fwext "github.com/aws/aws-k8s-tester/internal/e2e"
1213
"sigs.k8s.io/e2e-framework/klient/wait"
@@ -66,7 +67,8 @@ func TestSingleNodeUnitTest(t *testing.T) {
6667
ObjectMeta: metav1.ObjectMeta{Name: "unit-test-job", Namespace: "default"},
6768
}
6869
err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
69-
wait.WithContext(ctx))
70+
wait.WithContext(ctx),
71+
wait.WithTimeout(10*time.Minute))
7072
if err != nil {
7173
t.Fatal(err)
7274
}

test/images/nvidia/gpu_unit_tests/tests/common.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,12 @@ generate_data()
5353
eval "$cmd" > $expected
5454
_assert_data "$expected" "$cmd" "$msg"
5555
}
56+
57+
function is_vgpu()
58+
{
59+
local instance_type=${EC2_INSTANCE_TYPE:-$(get_instance_type)}
60+
case "${instance_type}" in
61+
g6f.*|gr6f.*) return ;;
62+
*) return 1 ;; # Not supported
63+
esac
64+
}

test/images/nvidia/gpu_unit_tests/tests/test_basic.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ test_03_nvbandwidth()
3131

3232
test_04_dcgm_diagnostics()
3333
{
34+
# This test is not applicable for vGPU instance types.
35+
if is_vgpu; then
36+
skip "This test does not apply to vGPU instances (g6f.*, gr6f.*)"
37+
fi
38+
3439
# https://docs.nvidia.com/datacenter/dcgm/latest/user-guide/dcgm-diagnostics.html#run-levels-and-tests
3540
if [[ $EC2_INSTANCE_TYPE == g* ]]; then
3641
# The G series instance don't have nvlink and GPU p2p communication

test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,26 @@ test_nvidia_gpu_unused()
6161
test_nvidia_gpu_throttled()
6262
{
6363

64+
# vGPU instances don't support GPU clock throttling detection.
65+
# This test is not applicable for vGPU instance types.
66+
if is_vgpu; then
67+
skip "This test does not apply to vGPU instances (g6f.*, gr6f.*)"
68+
fi
6469
# https://docs.nvidia.com/deploy/nvml-api/group__nvmlClocksEventReasons.html#group__nvmlClocksEventReasons
6570
# The only bit allowed is nvmlClocksEventReasonGpuIdle 0x0000000000000001LL
6671
filter="egrep -v -e '(0x0000000000000000|0x0000000000000001|0x0000000000000004)'"
6772
cmd="nvidia-smi --query-gpu index,gpu_bus_id,gpu_uuid,clocks_throttle_reasons.active --format=csv,noheader"
6873
assert_status_code 1 "$cmd | $filter" "Throttled gpu detected"
6974
}
75+
76+
77+
test_nvidia_vgpu_license_status()
78+
{
79+
if ! is_vgpu; then
80+
skip "This test only applies to vGPU instances (g6f.*, gr6f.*)"
81+
fi
82+
83+
assert_data $data/nvidia_vgpu_license_status.txt \
84+
"nvidia-smi -q | grep 'vGPU Software' -A 2" \
85+
"vGPU license status validation failed"
86+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
name, index, pci.bus_id
2+
NVIDIA L4-6Q, 0, 00000000:31:00.0
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/sys/devices/system/node/node0/cpulist:0-7
2+
/sys/devices/system/node/node0/distance:10
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
name, pci.bus_id, persistence_mode
2+
NVIDIA L4-6Q, 00000000:31:00.0, Enabled
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
GPU0 CPU Affinity NUMA Affinity GPU NUMA ID
2+
GPU0 X 0-7 0 N/A
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
vGPU Software Licensed Product
2+
Product Name : NVIDIA RTX Virtual Workstation
3+
License Status : Licensed (Expiry: N/A)

0 commit comments

Comments
 (0)