Skip to content

Commit 5a17101

Browse files
authored
Merge pull request #1715 from karthikvetrivel/test/transform-manager-functions
tests: Add unit tests for DCGM, VFIO Manager, CC Manager, and vGPU Device Manager transforms
2 parents 5f7a3ab + 249f2d9 commit 5a17101

File tree

1 file changed

+327
-0
lines changed

1 file changed

+327
-0
lines changed

controllers/transforms_test.go

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"k8s.io/apimachinery/pkg/api/resource"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/util/intstr"
30+
"k8s.io/utils/ptr"
3031
ctrl "sigs.k8s.io/controller-runtime"
3132
"sigs.k8s.io/controller-runtime/pkg/client"
3233
"sigs.k8s.io/controller-runtime/pkg/client/fake"
@@ -145,6 +146,11 @@ func (d Daemonset) WithRuntimeClassName(name string) Daemonset {
145146
return d
146147
}
147148

149+
func (d Daemonset) WithHostNetwork(enabled bool) Daemonset {
150+
d.Spec.Template.Spec.HostNetwork = enabled
151+
return d
152+
}
153+
148154
// Pod is a Pod wrapper used for testing
149155
type Pod struct {
150156
*corev1.Pod
@@ -844,6 +850,111 @@ func TestTransformDCGMExporter(t *testing.T) {
844850
}
845851
}
846852

853+
func TestTransformDCGM(t *testing.T) {
854+
limits := corev1.ResourceList{
855+
corev1.ResourceCPU: resource.MustParse("100m"),
856+
corev1.ResourceMemory: resource.MustParse("128Mi"),
857+
}
858+
requests := corev1.ResourceList{
859+
corev1.ResourceCPU: resource.MustParse("50m"),
860+
corev1.ResourceMemory: resource.MustParse("64Mi"),
861+
}
862+
863+
testCases := []struct {
864+
description string
865+
daemonset Daemonset
866+
clusterPolicySpec *gpuv1.ClusterPolicySpec
867+
expectedDaemonset Daemonset
868+
}{
869+
{
870+
description: "transform dcgm fully configured",
871+
daemonset: NewDaemonset().
872+
WithContainer(corev1.Container{Name: "dcgm"}).
873+
WithContainer(corev1.Container{Name: "sidecar"}),
874+
clusterPolicySpec: &gpuv1.ClusterPolicySpec{
875+
DCGM: gpuv1.DCGMSpec{
876+
Repository: "nvcr.io/nvidia/cloud-native",
877+
Image: "dcgm",
878+
Version: "v1.0.0",
879+
ImagePullPolicy: "IfNotPresent",
880+
ImagePullSecrets: []string{"pull-secret"},
881+
Resources: &gpuv1.ResourceRequirements{Limits: limits, Requests: requests},
882+
Args: []string{"--foo"},
883+
Env: []gpuv1.EnvVar{{Name: "FOO", Value: "bar"}},
884+
},
885+
},
886+
expectedDaemonset: NewDaemonset().
887+
WithContainer(corev1.Container{
888+
Name: "dcgm",
889+
Image: "nvcr.io/nvidia/cloud-native/dcgm:v1.0.0",
890+
ImagePullPolicy: corev1.PullIfNotPresent,
891+
Args: []string{"--foo"},
892+
Env: []corev1.EnvVar{{Name: "FOO", Value: "bar"}},
893+
Resources: corev1.ResourceRequirements{Limits: limits, Requests: requests},
894+
}).
895+
WithContainer(corev1.Container{
896+
Name: "sidecar",
897+
Resources: corev1.ResourceRequirements{Limits: limits, Requests: requests},
898+
}).
899+
WithPullSecret("pull-secret").
900+
WithRuntimeClassName("nvidia"),
901+
},
902+
{
903+
description: "transform dcgm sets runtime class only when spec empty",
904+
daemonset: NewDaemonset().WithContainer(corev1.Container{Name: "dcgm"}),
905+
clusterPolicySpec: &gpuv1.ClusterPolicySpec{
906+
Operator: gpuv1.OperatorSpec{RuntimeClass: "nvidia"},
907+
DCGM: gpuv1.DCGMSpec{Repository: "nvcr.io/nvidia/cloud-native", Image: "dcgm", Version: "v1.0.0"},
908+
},
909+
expectedDaemonset: NewDaemonset().
910+
WithContainer(corev1.Container{
911+
Name: "dcgm",
912+
Image: "nvcr.io/nvidia/cloud-native/dcgm:v1.0.0",
913+
ImagePullPolicy: corev1.PullIfNotPresent,
914+
}).
915+
WithRuntimeClassName("nvidia"),
916+
},
917+
{
918+
description: "dcgm enabled does not set remote engine env",
919+
daemonset: NewDaemonset().WithContainer(corev1.Container{Name: "dcgm"}),
920+
clusterPolicySpec: &gpuv1.ClusterPolicySpec{
921+
DCGM: gpuv1.DCGMSpec{Enabled: ptr.To(true), Repository: "nvcr.io/nvidia/cloud-native", Image: "dcgm", Version: "v1.0.0"},
922+
},
923+
expectedDaemonset: NewDaemonset().WithContainer(corev1.Container{
924+
Name: "dcgm",
925+
Image: "nvcr.io/nvidia/cloud-native/dcgm:v1.0.0",
926+
ImagePullPolicy: corev1.PullIfNotPresent,
927+
}).WithRuntimeClassName("nvidia"),
928+
},
929+
{
930+
description: "dcgm disabled with localhost env does not change hostNetwork",
931+
daemonset: NewDaemonset().WithContainer(corev1.Container{
932+
Name: "dcgm",
933+
Env: []corev1.EnvVar{{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "localhost:5555"}},
934+
}),
935+
clusterPolicySpec: &gpuv1.ClusterPolicySpec{
936+
DCGM: gpuv1.DCGMSpec{Enabled: ptr.To(false), Repository: "nvcr.io/nvidia/cloud-native", Image: "dcgm", Version: "v1.0.0"},
937+
},
938+
expectedDaemonset: NewDaemonset().
939+
WithContainer(corev1.Container{
940+
Name: "dcgm",
941+
Image: "nvcr.io/nvidia/cloud-native/dcgm:v1.0.0",
942+
ImagePullPolicy: corev1.PullIfNotPresent,
943+
Env: []corev1.EnvVar{{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "localhost:5555"}},
944+
}).
945+
WithRuntimeClassName("nvidia"),
946+
},
947+
}
948+
949+
for _, tc := range testCases {
950+
t.Run(tc.description, func(t *testing.T) {
951+
err := TransformDCGM(tc.daemonset.DaemonSet, tc.clusterPolicySpec, ClusterPolicyController{runtime: gpuv1.Containerd, logger: ctrl.Log.WithName("test")})
952+
require.NoError(t, err)
953+
require.EqualValues(t, tc.expectedDaemonset.DaemonSet, tc.daemonset.DaemonSet)
954+
})
955+
}
956+
}
957+
847958
func TestTransformMigManager(t *testing.T) {
848959
testCases := []struct {
849960
description string
@@ -1009,6 +1120,222 @@ func TestTransformKataManager(t *testing.T) {
10091120
}
10101121
}
10111122

1123+
func TestTransformVFIOManager(t *testing.T) {
1124+
resources := corev1.ResourceRequirements{
1125+
Limits: corev1.ResourceList{
1126+
corev1.ResourceCPU: resource.MustParse("100m"),
1127+
corev1.ResourceMemory: resource.MustParse("128Mi"),
1128+
},
1129+
Requests: corev1.ResourceList{
1130+
corev1.ResourceCPU: resource.MustParse("50m"),
1131+
corev1.ResourceMemory: resource.MustParse("64Mi"),
1132+
},
1133+
}
1134+
secret := "pull-secret"
1135+
mockEnv := []gpuv1.EnvVar{{Name: "foo", Value: "bar"}}
1136+
mockEnvCore := []corev1.EnvVar{{Name: "foo", Value: "bar"}}
1137+
1138+
testCases := []struct {
1139+
description string
1140+
daemonset Daemonset
1141+
clusterPolicySpec *gpuv1.ClusterPolicySpec
1142+
expectedDaemonset Daemonset
1143+
}{
1144+
{
1145+
description: "transform vfio manager",
1146+
daemonset: NewDaemonset().
1147+
WithContainer(corev1.Container{Name: "nvidia-vfio-manager"}).
1148+
WithContainer(corev1.Container{Name: "sidecar"}).
1149+
WithInitContainer(corev1.Container{Name: "k8s-driver-manager"}),
1150+
clusterPolicySpec: &gpuv1.ClusterPolicySpec{
1151+
VFIOManager: gpuv1.VFIOManagerSpec{
1152+
Repository: "nvcr.io/nvidia/cloud-native",
1153+
Image: "vfio-pci-manager",
1154+
Version: "v1.0.0",
1155+
ImagePullPolicy: "IfNotPresent",
1156+
ImagePullSecrets: []string{secret},
1157+
Resources: &gpuv1.ResourceRequirements{Limits: resources.Limits, Requests: resources.Requests},
1158+
Args: []string{"--test-flag"},
1159+
Env: mockEnv,
1160+
DriverManager: gpuv1.DriverManagerSpec{
1161+
Repository: "nvcr.io/nvidia/cloud-native",
1162+
Image: "k8s-driver-manager",
1163+
Version: "v1.0.0",
1164+
ImagePullPolicy: "IfNotPresent",
1165+
Env: mockEnv,
1166+
},
1167+
},
1168+
},
1169+
expectedDaemonset: NewDaemonset().
1170+
WithContainer(corev1.Container{
1171+
Name: "nvidia-vfio-manager",
1172+
Image: "nvcr.io/nvidia/cloud-native/vfio-pci-manager:v1.0.0",
1173+
ImagePullPolicy: corev1.PullIfNotPresent,
1174+
Args: []string{"--test-flag"},
1175+
Env: mockEnvCore,
1176+
Resources: resources,
1177+
}).
1178+
WithContainer(corev1.Container{
1179+
Name: "sidecar",
1180+
Resources: resources,
1181+
}).
1182+
WithInitContainer(corev1.Container{
1183+
Name: "k8s-driver-manager",
1184+
Image: "nvcr.io/nvidia/cloud-native/k8s-driver-manager:v1.0.0",
1185+
ImagePullPolicy: corev1.PullIfNotPresent,
1186+
Env: mockEnvCore,
1187+
}).
1188+
WithPullSecret(secret),
1189+
},
1190+
}
1191+
1192+
for _, tc := range testCases {
1193+
t.Run(tc.description, func(t *testing.T) {
1194+
err := TransformVFIOManager(tc.daemonset.DaemonSet, tc.clusterPolicySpec, ClusterPolicyController{logger: ctrl.Log.WithName("test")})
1195+
require.NoError(t, err)
1196+
require.EqualValues(t, tc.expectedDaemonset, tc.daemonset)
1197+
})
1198+
}
1199+
}
1200+
1201+
func TestTransformCCManager(t *testing.T) {
1202+
resources := corev1.ResourceRequirements{
1203+
Limits: corev1.ResourceList{
1204+
corev1.ResourceCPU: resource.MustParse("100m"),
1205+
corev1.ResourceMemory: resource.MustParse("128Mi"),
1206+
},
1207+
Requests: corev1.ResourceList{
1208+
corev1.ResourceCPU: resource.MustParse("50m"),
1209+
corev1.ResourceMemory: resource.MustParse("64Mi"),
1210+
},
1211+
}
1212+
secret := "pull-secret"
1213+
mockEnv := []gpuv1.EnvVar{{Name: "foo", Value: "bar"}}
1214+
defaultMode := "devtools"
1215+
1216+
testCases := []struct {
1217+
description string
1218+
daemonset Daemonset
1219+
clusterPolicySpec *gpuv1.ClusterPolicySpec
1220+
expectedDaemonset Daemonset
1221+
}{
1222+
{
1223+
description: "transform cc manager",
1224+
daemonset: NewDaemonset().
1225+
WithContainer(corev1.Container{Name: "nvidia-cc-manager"}).
1226+
WithContainer(corev1.Container{Name: "sidecar"}),
1227+
clusterPolicySpec: &gpuv1.ClusterPolicySpec{
1228+
CCManager: gpuv1.CCManagerSpec{
1229+
Repository: "nvcr.io/nvidia/cloud-native",
1230+
Image: "k8s-cc-manager",
1231+
Version: "v1.0.0",
1232+
ImagePullPolicy: "IfNotPresent",
1233+
ImagePullSecrets: []string{secret},
1234+
Resources: &gpuv1.ResourceRequirements{Limits: resources.Limits, Requests: resources.Requests},
1235+
Args: []string{"--test-flag"},
1236+
DefaultMode: defaultMode,
1237+
Env: mockEnv,
1238+
},
1239+
},
1240+
expectedDaemonset: NewDaemonset().
1241+
WithContainer(corev1.Container{
1242+
Name: "nvidia-cc-manager",
1243+
Image: "nvcr.io/nvidia/cloud-native/k8s-cc-manager:v1.0.0",
1244+
ImagePullPolicy: corev1.PullIfNotPresent,
1245+
Args: []string{"--test-flag"},
1246+
Env: []corev1.EnvVar{
1247+
{Name: "DEFAULT_CC_MODE", Value: defaultMode},
1248+
{Name: "foo", Value: "bar"},
1249+
},
1250+
Resources: resources,
1251+
}).
1252+
WithContainer(corev1.Container{
1253+
Name: "sidecar",
1254+
Resources: resources,
1255+
}).
1256+
WithPullSecret(secret),
1257+
},
1258+
}
1259+
1260+
for _, tc := range testCases {
1261+
t.Run(tc.description, func(t *testing.T) {
1262+
err := TransformCCManager(tc.daemonset.DaemonSet, tc.clusterPolicySpec, ClusterPolicyController{logger: ctrl.Log.WithName("test")})
1263+
require.NoError(t, err)
1264+
require.EqualValues(t, tc.expectedDaemonset, tc.daemonset)
1265+
})
1266+
}
1267+
}
1268+
1269+
func TestTransformVGPUDeviceManager(t *testing.T) {
1270+
resources := corev1.ResourceRequirements{
1271+
Limits: corev1.ResourceList{
1272+
corev1.ResourceCPU: resource.MustParse("100m"),
1273+
corev1.ResourceMemory: resource.MustParse("128Mi"),
1274+
},
1275+
Requests: corev1.ResourceList{
1276+
corev1.ResourceCPU: resource.MustParse("50m"),
1277+
corev1.ResourceMemory: resource.MustParse("64Mi"),
1278+
},
1279+
}
1280+
secret := "pull-secret"
1281+
mockEnv := []gpuv1.EnvVar{{Name: "foo", Value: "bar"}}
1282+
1283+
testCases := []struct {
1284+
description string
1285+
daemonset Daemonset
1286+
clusterPolicySpec *gpuv1.ClusterPolicySpec
1287+
expectedDaemonset Daemonset
1288+
}{
1289+
{
1290+
description: "transform vgpu device manager",
1291+
daemonset: NewDaemonset().
1292+
WithContainer(corev1.Container{Name: "nvidia-vgpu-device-manager"}).
1293+
WithContainer(corev1.Container{Name: "sidecar"}),
1294+
clusterPolicySpec: &gpuv1.ClusterPolicySpec{
1295+
VGPUDeviceManager: gpuv1.VGPUDeviceManagerSpec{
1296+
Repository: "nvcr.io/nvidia/cloud-native",
1297+
Image: "vgpu-device-manager",
1298+
Version: "v1.0.0",
1299+
ImagePullPolicy: "IfNotPresent",
1300+
ImagePullSecrets: []string{secret},
1301+
Resources: &gpuv1.ResourceRequirements{Limits: resources.Limits, Requests: resources.Requests},
1302+
Args: []string{"--test-flag"},
1303+
Env: mockEnv,
1304+
Config: &gpuv1.VGPUDevicesConfigSpec{
1305+
Name: "custom-vgpu-config",
1306+
Default: "perf",
1307+
},
1308+
},
1309+
},
1310+
expectedDaemonset: NewDaemonset().
1311+
WithContainer(corev1.Container{
1312+
Name: "nvidia-vgpu-device-manager",
1313+
Image: "nvcr.io/nvidia/cloud-native/vgpu-device-manager:v1.0.0",
1314+
ImagePullPolicy: corev1.PullIfNotPresent,
1315+
Args: []string{"--test-flag"},
1316+
Env: []corev1.EnvVar{
1317+
{Name: "foo", Value: "bar"},
1318+
{Name: "DEFAULT_VGPU_CONFIG", Value: "perf"},
1319+
},
1320+
Resources: resources,
1321+
}).
1322+
WithContainer(corev1.Container{
1323+
Name: "sidecar",
1324+
Resources: resources,
1325+
}).
1326+
WithPullSecret(secret),
1327+
},
1328+
}
1329+
1330+
for _, tc := range testCases {
1331+
t.Run(tc.description, func(t *testing.T) {
1332+
err := TransformVGPUDeviceManager(tc.daemonset.DaemonSet, tc.clusterPolicySpec, ClusterPolicyController{logger: ctrl.Log.WithName("test")})
1333+
require.NoError(t, err)
1334+
require.EqualValues(t, tc.expectedDaemonset, tc.daemonset)
1335+
})
1336+
}
1337+
}
1338+
10121339
func TestTransformValidationInitContainer(t *testing.T) {
10131340
testCases := []struct {
10141341
description string

0 commit comments

Comments
 (0)