@@ -27,6 +27,7 @@ import (
2727 "k8s.io/apimachinery/pkg/api/resource"
2828 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929 "k8s.io/apimachinery/pkg/util/intstr"
30+ "k8s.io/utils/ptr"
3031 ctrl "sigs.k8s.io/controller-runtime"
3132 "sigs.k8s.io/controller-runtime/pkg/client"
3233 "sigs.k8s.io/controller-runtime/pkg/client/fake"
@@ -145,6 +146,11 @@ func (d Daemonset) WithRuntimeClassName(name string) Daemonset {
145146 return d
146147}
147148
149+ func (d Daemonset ) WithHostNetwork (enabled bool ) Daemonset {
150+ d .Spec .Template .Spec .HostNetwork = enabled
151+ return d
152+ }
153+
148154// Pod is a Pod wrapper used for testing
149155type Pod struct {
150156 * corev1.Pod
@@ -844,6 +850,111 @@ func TestTransformDCGMExporter(t *testing.T) {
844850 }
845851}
846852
853+ func TestTransformDCGM (t * testing.T ) {
854+ limits := corev1.ResourceList {
855+ corev1 .ResourceCPU : resource .MustParse ("100m" ),
856+ corev1 .ResourceMemory : resource .MustParse ("128Mi" ),
857+ }
858+ requests := corev1.ResourceList {
859+ corev1 .ResourceCPU : resource .MustParse ("50m" ),
860+ corev1 .ResourceMemory : resource .MustParse ("64Mi" ),
861+ }
862+
863+ testCases := []struct {
864+ description string
865+ daemonset Daemonset
866+ clusterPolicySpec * gpuv1.ClusterPolicySpec
867+ expectedDaemonset Daemonset
868+ }{
869+ {
870+ description : "transform dcgm fully configured" ,
871+ daemonset : NewDaemonset ().
872+ WithContainer (corev1.Container {Name : "dcgm" }).
873+ WithContainer (corev1.Container {Name : "sidecar" }),
874+ clusterPolicySpec : & gpuv1.ClusterPolicySpec {
875+ DCGM : gpuv1.DCGMSpec {
876+ Repository : "nvcr.io/nvidia/cloud-native" ,
877+ Image : "dcgm" ,
878+ Version : "v1.0.0" ,
879+ ImagePullPolicy : "IfNotPresent" ,
880+ ImagePullSecrets : []string {"pull-secret" },
881+ Resources : & gpuv1.ResourceRequirements {Limits : limits , Requests : requests },
882+ Args : []string {"--foo" },
883+ Env : []gpuv1.EnvVar {{Name : "FOO" , Value : "bar" }},
884+ },
885+ },
886+ expectedDaemonset : NewDaemonset ().
887+ WithContainer (corev1.Container {
888+ Name : "dcgm" ,
889+ Image : "nvcr.io/nvidia/cloud-native/dcgm:v1.0.0" ,
890+ ImagePullPolicy : corev1 .PullIfNotPresent ,
891+ Args : []string {"--foo" },
892+ Env : []corev1.EnvVar {{Name : "FOO" , Value : "bar" }},
893+ Resources : corev1.ResourceRequirements {Limits : limits , Requests : requests },
894+ }).
895+ WithContainer (corev1.Container {
896+ Name : "sidecar" ,
897+ Resources : corev1.ResourceRequirements {Limits : limits , Requests : requests },
898+ }).
899+ WithPullSecret ("pull-secret" ).
900+ WithRuntimeClassName ("nvidia" ),
901+ },
902+ {
903+ description : "transform dcgm sets runtime class only when spec empty" ,
904+ daemonset : NewDaemonset ().WithContainer (corev1.Container {Name : "dcgm" }),
905+ clusterPolicySpec : & gpuv1.ClusterPolicySpec {
906+ Operator : gpuv1.OperatorSpec {RuntimeClass : "nvidia" },
907+ DCGM : gpuv1.DCGMSpec {Repository : "nvcr.io/nvidia/cloud-native" , Image : "dcgm" , Version : "v1.0.0" },
908+ },
909+ expectedDaemonset : NewDaemonset ().
910+ WithContainer (corev1.Container {
911+ Name : "dcgm" ,
912+ Image : "nvcr.io/nvidia/cloud-native/dcgm:v1.0.0" ,
913+ ImagePullPolicy : corev1 .PullIfNotPresent ,
914+ }).
915+ WithRuntimeClassName ("nvidia" ),
916+ },
917+ {
918+ description : "dcgm enabled does not set remote engine env" ,
919+ daemonset : NewDaemonset ().WithContainer (corev1.Container {Name : "dcgm" }),
920+ clusterPolicySpec : & gpuv1.ClusterPolicySpec {
921+ DCGM : gpuv1.DCGMSpec {Enabled : ptr .To (true ), Repository : "nvcr.io/nvidia/cloud-native" , Image : "dcgm" , Version : "v1.0.0" },
922+ },
923+ expectedDaemonset : NewDaemonset ().WithContainer (corev1.Container {
924+ Name : "dcgm" ,
925+ Image : "nvcr.io/nvidia/cloud-native/dcgm:v1.0.0" ,
926+ ImagePullPolicy : corev1 .PullIfNotPresent ,
927+ }).WithRuntimeClassName ("nvidia" ),
928+ },
929+ {
930+ description : "dcgm disabled with localhost env does not change hostNetwork" ,
931+ daemonset : NewDaemonset ().WithContainer (corev1.Container {
932+ Name : "dcgm" ,
933+ Env : []corev1.EnvVar {{Name : "DCGM_REMOTE_HOSTENGINE_INFO" , Value : "localhost:5555" }},
934+ }),
935+ clusterPolicySpec : & gpuv1.ClusterPolicySpec {
936+ DCGM : gpuv1.DCGMSpec {Enabled : ptr .To (false ), Repository : "nvcr.io/nvidia/cloud-native" , Image : "dcgm" , Version : "v1.0.0" },
937+ },
938+ expectedDaemonset : NewDaemonset ().
939+ WithContainer (corev1.Container {
940+ Name : "dcgm" ,
941+ Image : "nvcr.io/nvidia/cloud-native/dcgm:v1.0.0" ,
942+ ImagePullPolicy : corev1 .PullIfNotPresent ,
943+ Env : []corev1.EnvVar {{Name : "DCGM_REMOTE_HOSTENGINE_INFO" , Value : "localhost:5555" }},
944+ }).
945+ WithRuntimeClassName ("nvidia" ),
946+ },
947+ }
948+
949+ for _ , tc := range testCases {
950+ t .Run (tc .description , func (t * testing.T ) {
951+ err := TransformDCGM (tc .daemonset .DaemonSet , tc .clusterPolicySpec , ClusterPolicyController {runtime : gpuv1 .Containerd , logger : ctrl .Log .WithName ("test" )})
952+ require .NoError (t , err )
953+ require .EqualValues (t , tc .expectedDaemonset .DaemonSet , tc .daemonset .DaemonSet )
954+ })
955+ }
956+ }
957+
847958func TestTransformMigManager (t * testing.T ) {
848959 testCases := []struct {
849960 description string
@@ -1009,6 +1120,222 @@ func TestTransformKataManager(t *testing.T) {
10091120 }
10101121}
10111122
1123+ func TestTransformVFIOManager (t * testing.T ) {
1124+ resources := corev1.ResourceRequirements {
1125+ Limits : corev1.ResourceList {
1126+ corev1 .ResourceCPU : resource .MustParse ("100m" ),
1127+ corev1 .ResourceMemory : resource .MustParse ("128Mi" ),
1128+ },
1129+ Requests : corev1.ResourceList {
1130+ corev1 .ResourceCPU : resource .MustParse ("50m" ),
1131+ corev1 .ResourceMemory : resource .MustParse ("64Mi" ),
1132+ },
1133+ }
1134+ secret := "pull-secret"
1135+ mockEnv := []gpuv1.EnvVar {{Name : "foo" , Value : "bar" }}
1136+ mockEnvCore := []corev1.EnvVar {{Name : "foo" , Value : "bar" }}
1137+
1138+ testCases := []struct {
1139+ description string
1140+ daemonset Daemonset
1141+ clusterPolicySpec * gpuv1.ClusterPolicySpec
1142+ expectedDaemonset Daemonset
1143+ }{
1144+ {
1145+ description : "transform vfio manager" ,
1146+ daemonset : NewDaemonset ().
1147+ WithContainer (corev1.Container {Name : "nvidia-vfio-manager" }).
1148+ WithContainer (corev1.Container {Name : "sidecar" }).
1149+ WithInitContainer (corev1.Container {Name : "k8s-driver-manager" }),
1150+ clusterPolicySpec : & gpuv1.ClusterPolicySpec {
1151+ VFIOManager : gpuv1.VFIOManagerSpec {
1152+ Repository : "nvcr.io/nvidia/cloud-native" ,
1153+ Image : "vfio-pci-manager" ,
1154+ Version : "v1.0.0" ,
1155+ ImagePullPolicy : "IfNotPresent" ,
1156+ ImagePullSecrets : []string {secret },
1157+ Resources : & gpuv1.ResourceRequirements {Limits : resources .Limits , Requests : resources .Requests },
1158+ Args : []string {"--test-flag" },
1159+ Env : mockEnv ,
1160+ DriverManager : gpuv1.DriverManagerSpec {
1161+ Repository : "nvcr.io/nvidia/cloud-native" ,
1162+ Image : "k8s-driver-manager" ,
1163+ Version : "v1.0.0" ,
1164+ ImagePullPolicy : "IfNotPresent" ,
1165+ Env : mockEnv ,
1166+ },
1167+ },
1168+ },
1169+ expectedDaemonset : NewDaemonset ().
1170+ WithContainer (corev1.Container {
1171+ Name : "nvidia-vfio-manager" ,
1172+ Image : "nvcr.io/nvidia/cloud-native/vfio-pci-manager:v1.0.0" ,
1173+ ImagePullPolicy : corev1 .PullIfNotPresent ,
1174+ Args : []string {"--test-flag" },
1175+ Env : mockEnvCore ,
1176+ Resources : resources ,
1177+ }).
1178+ WithContainer (corev1.Container {
1179+ Name : "sidecar" ,
1180+ Resources : resources ,
1181+ }).
1182+ WithInitContainer (corev1.Container {
1183+ Name : "k8s-driver-manager" ,
1184+ Image : "nvcr.io/nvidia/cloud-native/k8s-driver-manager:v1.0.0" ,
1185+ ImagePullPolicy : corev1 .PullIfNotPresent ,
1186+ Env : mockEnvCore ,
1187+ }).
1188+ WithPullSecret (secret ),
1189+ },
1190+ }
1191+
1192+ for _ , tc := range testCases {
1193+ t .Run (tc .description , func (t * testing.T ) {
1194+ err := TransformVFIOManager (tc .daemonset .DaemonSet , tc .clusterPolicySpec , ClusterPolicyController {logger : ctrl .Log .WithName ("test" )})
1195+ require .NoError (t , err )
1196+ require .EqualValues (t , tc .expectedDaemonset , tc .daemonset )
1197+ })
1198+ }
1199+ }
1200+
1201+ func TestTransformCCManager (t * testing.T ) {
1202+ resources := corev1.ResourceRequirements {
1203+ Limits : corev1.ResourceList {
1204+ corev1 .ResourceCPU : resource .MustParse ("100m" ),
1205+ corev1 .ResourceMemory : resource .MustParse ("128Mi" ),
1206+ },
1207+ Requests : corev1.ResourceList {
1208+ corev1 .ResourceCPU : resource .MustParse ("50m" ),
1209+ corev1 .ResourceMemory : resource .MustParse ("64Mi" ),
1210+ },
1211+ }
1212+ secret := "pull-secret"
1213+ mockEnv := []gpuv1.EnvVar {{Name : "foo" , Value : "bar" }}
1214+ defaultMode := "devtools"
1215+
1216+ testCases := []struct {
1217+ description string
1218+ daemonset Daemonset
1219+ clusterPolicySpec * gpuv1.ClusterPolicySpec
1220+ expectedDaemonset Daemonset
1221+ }{
1222+ {
1223+ description : "transform cc manager" ,
1224+ daemonset : NewDaemonset ().
1225+ WithContainer (corev1.Container {Name : "nvidia-cc-manager" }).
1226+ WithContainer (corev1.Container {Name : "sidecar" }),
1227+ clusterPolicySpec : & gpuv1.ClusterPolicySpec {
1228+ CCManager : gpuv1.CCManagerSpec {
1229+ Repository : "nvcr.io/nvidia/cloud-native" ,
1230+ Image : "k8s-cc-manager" ,
1231+ Version : "v1.0.0" ,
1232+ ImagePullPolicy : "IfNotPresent" ,
1233+ ImagePullSecrets : []string {secret },
1234+ Resources : & gpuv1.ResourceRequirements {Limits : resources .Limits , Requests : resources .Requests },
1235+ Args : []string {"--test-flag" },
1236+ DefaultMode : defaultMode ,
1237+ Env : mockEnv ,
1238+ },
1239+ },
1240+ expectedDaemonset : NewDaemonset ().
1241+ WithContainer (corev1.Container {
1242+ Name : "nvidia-cc-manager" ,
1243+ Image : "nvcr.io/nvidia/cloud-native/k8s-cc-manager:v1.0.0" ,
1244+ ImagePullPolicy : corev1 .PullIfNotPresent ,
1245+ Args : []string {"--test-flag" },
1246+ Env : []corev1.EnvVar {
1247+ {Name : "DEFAULT_CC_MODE" , Value : defaultMode },
1248+ {Name : "foo" , Value : "bar" },
1249+ },
1250+ Resources : resources ,
1251+ }).
1252+ WithContainer (corev1.Container {
1253+ Name : "sidecar" ,
1254+ Resources : resources ,
1255+ }).
1256+ WithPullSecret (secret ),
1257+ },
1258+ }
1259+
1260+ for _ , tc := range testCases {
1261+ t .Run (tc .description , func (t * testing.T ) {
1262+ err := TransformCCManager (tc .daemonset .DaemonSet , tc .clusterPolicySpec , ClusterPolicyController {logger : ctrl .Log .WithName ("test" )})
1263+ require .NoError (t , err )
1264+ require .EqualValues (t , tc .expectedDaemonset , tc .daemonset )
1265+ })
1266+ }
1267+ }
1268+
1269+ func TestTransformVGPUDeviceManager (t * testing.T ) {
1270+ resources := corev1.ResourceRequirements {
1271+ Limits : corev1.ResourceList {
1272+ corev1 .ResourceCPU : resource .MustParse ("100m" ),
1273+ corev1 .ResourceMemory : resource .MustParse ("128Mi" ),
1274+ },
1275+ Requests : corev1.ResourceList {
1276+ corev1 .ResourceCPU : resource .MustParse ("50m" ),
1277+ corev1 .ResourceMemory : resource .MustParse ("64Mi" ),
1278+ },
1279+ }
1280+ secret := "pull-secret"
1281+ mockEnv := []gpuv1.EnvVar {{Name : "foo" , Value : "bar" }}
1282+
1283+ testCases := []struct {
1284+ description string
1285+ daemonset Daemonset
1286+ clusterPolicySpec * gpuv1.ClusterPolicySpec
1287+ expectedDaemonset Daemonset
1288+ }{
1289+ {
1290+ description : "transform vgpu device manager" ,
1291+ daemonset : NewDaemonset ().
1292+ WithContainer (corev1.Container {Name : "nvidia-vgpu-device-manager" }).
1293+ WithContainer (corev1.Container {Name : "sidecar" }),
1294+ clusterPolicySpec : & gpuv1.ClusterPolicySpec {
1295+ VGPUDeviceManager : gpuv1.VGPUDeviceManagerSpec {
1296+ Repository : "nvcr.io/nvidia/cloud-native" ,
1297+ Image : "vgpu-device-manager" ,
1298+ Version : "v1.0.0" ,
1299+ ImagePullPolicy : "IfNotPresent" ,
1300+ ImagePullSecrets : []string {secret },
1301+ Resources : & gpuv1.ResourceRequirements {Limits : resources .Limits , Requests : resources .Requests },
1302+ Args : []string {"--test-flag" },
1303+ Env : mockEnv ,
1304+ Config : & gpuv1.VGPUDevicesConfigSpec {
1305+ Name : "custom-vgpu-config" ,
1306+ Default : "perf" ,
1307+ },
1308+ },
1309+ },
1310+ expectedDaemonset : NewDaemonset ().
1311+ WithContainer (corev1.Container {
1312+ Name : "nvidia-vgpu-device-manager" ,
1313+ Image : "nvcr.io/nvidia/cloud-native/vgpu-device-manager:v1.0.0" ,
1314+ ImagePullPolicy : corev1 .PullIfNotPresent ,
1315+ Args : []string {"--test-flag" },
1316+ Env : []corev1.EnvVar {
1317+ {Name : "foo" , Value : "bar" },
1318+ {Name : "DEFAULT_VGPU_CONFIG" , Value : "perf" },
1319+ },
1320+ Resources : resources ,
1321+ }).
1322+ WithContainer (corev1.Container {
1323+ Name : "sidecar" ,
1324+ Resources : resources ,
1325+ }).
1326+ WithPullSecret (secret ),
1327+ },
1328+ }
1329+
1330+ for _ , tc := range testCases {
1331+ t .Run (tc .description , func (t * testing.T ) {
1332+ err := TransformVGPUDeviceManager (tc .daemonset .DaemonSet , tc .clusterPolicySpec , ClusterPolicyController {logger : ctrl .Log .WithName ("test" )})
1333+ require .NoError (t , err )
1334+ require .EqualValues (t , tc .expectedDaemonset , tc .daemonset )
1335+ })
1336+ }
1337+ }
1338+
10121339func TestTransformValidationInitContainer (t * testing.T ) {
10131340 testCases := []struct {
10141341 description string
0 commit comments