Skip to content

Commit a4ac556

Browse files
authored
CON-12680 - add amd gpu device (metrics exporter) plugin cluster options (#1742)
* add amd gpu device (metrics exporter) plugin cluster options * add missing flag to update command
1 parent 0476db6 commit a4ac556

File tree

6 files changed

+134
-42
lines changed

6 files changed

+134
-42
lines changed

args.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ const (
149149
ArgClusterAutoscalerExpanders = "expanders"
150150
// ArgEnableRoutingAgent enables the routing-agent cluster plugin.
151151
ArgEnableRoutingAgent = "enable-routing-agent"
152+
// ArgEnableAmdGpuDevicePlugin enables automatic amd gpu device plugin installation.
153+
ArgEnableAmdGpuDevicePlugin = "enable-amd-gpu-device-plugin"
154+
// ArgEnableAmdGpuDeviceMetricsExporterPlugin enables automatic amd gpu device metrics exporter plugin installation.
155+
ArgEnableAmdGpuDeviceMetricsExporterPlugin = "enable-amd-gpu-device-metrics-exporter-plugin"
152156
// ArgSurgeUpgrade is a cluster's surge-upgrade argument.
153157
ArgSurgeUpgrade = "surge-upgrade"
154158
// ArgCommandUpsert is an upsert for a resource to be created or updated argument.

commands/displayers/kubernetes.go

Lines changed: 44 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ func (clusters *KubernetesClusters) Cols() []string {
5252
"Autoscaler.UnneededTime",
5353
"Autoscaler.Expanders",
5454
"RoutingAgent",
55+
"AmdGpuDevicePlugin",
56+
"AmdGpuDeviceMetricsExporterPlugin",
5557
}
5658
}
5759

@@ -68,25 +70,27 @@ func (clusters *KubernetesClusters) ColMap() map[string]string {
6870
}
6971
}
7072
return map[string]string{
71-
"ID": "ID",
72-
"Name": "Name",
73-
"Region": "Region",
74-
"Version": "Version",
75-
"AutoUpgrade": "Auto Upgrade",
76-
"HAControlPlane": "HA Control Plane",
77-
"ClusterSubnet": "Cluster Subnet",
78-
"ServiceSubnet": "Service Subnet",
79-
"IPv4": "IPv4",
80-
"Endpoint": "Endpoint",
81-
"Tags": "Tags",
82-
"Status": "Status",
83-
"Created": "Created At",
84-
"Updated": "Updated At",
85-
"NodePools": "Node Pools",
86-
"Autoscaler.UtilizationThreshold": "Autoscaler Scale Down Utilization",
87-
"Autoscaler.UnneededTime": "Autoscaler Scale Down Unneeded Time",
88-
"Autoscaler.Expanders": "Autoscaler Custom Expanders",
89-
"RoutingAgent": "Routing Agent",
73+
"ID": "ID",
74+
"Name": "Name",
75+
"Region": "Region",
76+
"Version": "Version",
77+
"AutoUpgrade": "Auto Upgrade",
78+
"HAControlPlane": "HA Control Plane",
79+
"ClusterSubnet": "Cluster Subnet",
80+
"ServiceSubnet": "Service Subnet",
81+
"IPv4": "IPv4",
82+
"Endpoint": "Endpoint",
83+
"Tags": "Tags",
84+
"Status": "Status",
85+
"Created": "Created At",
86+
"Updated": "Updated At",
87+
"NodePools": "Node Pools",
88+
"Autoscaler.UtilizationThreshold": "Autoscaler Scale Down Utilization",
89+
"Autoscaler.UnneededTime": "Autoscaler Scale Down Unneeded Time",
90+
"Autoscaler.Expanders": "Autoscaler Custom Expanders",
91+
"RoutingAgent": "Routing Agent",
92+
"AmdGpuDevicePlugin": "AMD GPU Device Plugin",
93+
"AmdGpuDeviceMetricsExporterPlugin": "AMD GPU Device Metrics Exporter Plugin",
9094
}
9195
}
9296

@@ -104,25 +108,27 @@ func (clusters *KubernetesClusters) KV() []map[string]any {
104108
}
105109

106110
o := map[string]any{
107-
"ID": cluster.ID,
108-
"Name": cluster.Name,
109-
"Region": cluster.RegionSlug,
110-
"Version": cluster.VersionSlug,
111-
"AutoUpgrade": cluster.AutoUpgrade,
112-
"HAControlPlane": cluster.HA,
113-
"ClusterSubnet": cluster.ClusterSubnet,
114-
"ServiceSubnet": cluster.ServiceSubnet,
115-
"IPv4": cluster.IPv4,
116-
"Endpoint": cluster.Endpoint,
117-
"Tags": tags,
118-
"Status": cluster.Status.State,
119-
"Created": cluster.CreatedAt,
120-
"Updated": cluster.UpdatedAt,
121-
"NodePools": strings.Join(nodePools, " "),
122-
"Autoscaler.UtilizationThreshold": "",
123-
"Autoscaler.UnneededTime": "",
124-
"Autoscaler.Expanders": "",
125-
"RoutingAgent": cluster.RoutingAgent != nil && *cluster.RoutingAgent.Enabled,
111+
"ID": cluster.ID,
112+
"Name": cluster.Name,
113+
"Region": cluster.RegionSlug,
114+
"Version": cluster.VersionSlug,
115+
"AutoUpgrade": cluster.AutoUpgrade,
116+
"HAControlPlane": cluster.HA,
117+
"ClusterSubnet": cluster.ClusterSubnet,
118+
"ServiceSubnet": cluster.ServiceSubnet,
119+
"IPv4": cluster.IPv4,
120+
"Endpoint": cluster.Endpoint,
121+
"Tags": tags,
122+
"Status": cluster.Status.State,
123+
"Created": cluster.CreatedAt,
124+
"Updated": cluster.UpdatedAt,
125+
"NodePools": strings.Join(nodePools, " "),
126+
"Autoscaler.UtilizationThreshold": "",
127+
"Autoscaler.UnneededTime": "",
128+
"Autoscaler.Expanders": "",
129+
"RoutingAgent": cluster.RoutingAgent != nil && *cluster.RoutingAgent.Enabled,
130+
"AmdGpuDevicePlugin": cluster.AmdGpuDevicePlugin != nil && *cluster.AmdGpuDevicePlugin.Enabled,
131+
"AmdGpuDeviceMetricsExporterPlugin": cluster.AmdGpuDeviceMetricsExporterPlugin != nil && *cluster.AmdGpuDeviceMetricsExporterPlugin.Enabled,
126132
}
127133

128134
if cfg := cluster.ClusterAutoscalerConfiguration; cfg != nil {

commands/kubernetes.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,10 @@ After creating a cluster, a configuration context is added to kubectl and made a
299299
"Customizes expanders used by cluster-autoscaler. The autoscaler will apply each expander from the provided comma-separated list to narrow down the selection of node types created to scale up, until either a single node type is left, or the list of expanders is exhausted. Available expanders: random, least-waste, priority. If this flag is empty, autoscaler will use its default expanders.")
300300
AddBoolFlag(cmdKubeClusterCreate, doctl.ArgEnableRoutingAgent, "", false,
301301
"Creates the cluster with routing-agent enabled. Defaults to false. To enable routing-agent, supply --enable-routing-agent=true.")
302+
AddBoolFlag(cmdKubeClusterCreate, doctl.ArgEnableAmdGpuDevicePlugin, "", false,
303+
"Creates the cluster with amd gpu device plugin installed. Defaults to true for clusters with AMD GPUs and otherwise false. To always enable it, supply --enable-amd-gpu-device-plugin=true.")
304+
AddBoolFlag(cmdKubeClusterCreate, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin, "", false,
305+
"Creates the cluster with amd gpu device metrics exporter plugin installed. Defaults to false. To enable it, supply --enable-amd-gpu-device-metrics-exporter-plugin=true.")
302306
AddStringSliceFlag(cmdKubeClusterCreate, doctl.ArgTag, "", nil,
303307
"A comma-separated list of `tags` to apply to the cluster, in addition to the default tags of `k8s` and `k8s:$K8S_CLUSTER_ID`.")
304308
AddStringFlag(cmdKubeClusterCreate, doctl.ArgSizeSlug, "",
@@ -349,6 +353,10 @@ Updates the configuration values for a Kubernetes cluster. The cluster must be r
349353
"Creates the cluster with control plane firewall enabled. Defaults to false. To enable the control plane firewall, supply --enable-control-plane-firewall=true.")
350354
AddBoolFlag(cmdKubeClusterUpdate, doctl.ArgEnableRoutingAgent, "", false,
351355
"Creates the cluster with routing-agent enabled. Defaults to false. To enable routing-agent, supply --routing-agent=true.")
356+
AddBoolFlag(cmdKubeClusterUpdate, doctl.ArgEnableAmdGpuDevicePlugin, "", false,
357+
"Creates the cluster with amd gpu device plugin installed. Defaults to true for clusters with AMD GPUs and otherwise false. To always enable it, supply --enable-amd-gpu-device-plugin=true.")
358+
AddBoolFlag(cmdKubeClusterUpdate, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin, "", false,
359+
"Creates the cluster with amd gpu device metrics exporter plugin installed. Defaults to false. To enable it, supply --enable-amd-gpu-device-metrics-exporter-plugin=true.")
352360
AddStringFlag(cmdKubeClusterUpdate, doctl.ArgClusterAutoscalerScaleDownUtilizationThreshold, "", "",
353361
"The threshold value for the cluster autoscaler's scale-down-utilization-threshold. It is the maximum value between the sum of CPU requests and sum of memory requests of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down. To set the scale-down-utilization-threshold to 50%, pass the floating point value 0.5.")
354362
AddStringFlag(cmdKubeClusterUpdate, doctl.ArgClusterAutoscalerScaleDownUnneededTime, "", "",
@@ -1716,6 +1724,32 @@ func buildClusterCreateRequestFromArgs(c *CmdConfig, r *godo.KubernetesClusterCr
17161724
}
17171725
}
17181726

1727+
// We need to differentiate here if the option is set or not, as it defaults to a different value on the server-side
1728+
// depending on whether there are AMD GPU nodes in the cluster or not.
1729+
//
1730+
// If we would always send "false", even if the flag isn't set, we would essentially disable the defaulting.
1731+
if c.Doit.IsSet(doctl.ArgEnableAmdGpuDevicePlugin) {
1732+
enableAmdGpuDevicePlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableAmdGpuDevicePlugin)
1733+
if err != nil {
1734+
return err
1735+
}
1736+
if enableAmdGpuDevicePlugin != nil {
1737+
r.AmdGpuDevicePlugin = &godo.KubernetesAmdGpuDevicePlugin{
1738+
Enabled: enableAmdGpuDevicePlugin,
1739+
}
1740+
}
1741+
}
1742+
1743+
enableAmdGpuDeviceMetricsExporterPlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin)
1744+
if err != nil {
1745+
return err
1746+
}
1747+
if enableAmdGpuDeviceMetricsExporterPlugin != nil {
1748+
r.AmdGpuDeviceMetricsExporterPlugin = &godo.KubernetesAmdGpuDeviceMetricsExporterPlugin{
1749+
Enabled: enableAmdGpuDeviceMetricsExporterPlugin,
1750+
}
1751+
}
1752+
17191753
var clusterAutoscalerConfiguration = &godo.KubernetesClusterAutoscalerConfiguration{}
17201754
thresholdStr, err := c.Doit.GetString(c.NS, doctl.ArgClusterAutoscalerScaleDownUtilizationThreshold)
17211755
if err != nil {
@@ -1873,6 +1907,32 @@ func buildClusterUpdateRequestFromArgs(c *CmdConfig, r *godo.KubernetesClusterUp
18731907
}
18741908
}
18751909

1910+
// We need to differentiate here if the option is set or not, as it defaults to a different value on the server-side
1911+
// depending on whether there are AMD GPU nodes in the cluster or not.
1912+
//
1913+
// If we would always send "false", even if the flag isn't set, we would essentially disable the defaulting.
1914+
if c.Doit.IsSet(doctl.ArgEnableAmdGpuDevicePlugin) {
1915+
enableAmdGpuDevicePlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableAmdGpuDevicePlugin)
1916+
if err != nil {
1917+
return err
1918+
}
1919+
if enableAmdGpuDevicePlugin != nil {
1920+
r.AmdGpuDevicePlugin = &godo.KubernetesAmdGpuDevicePlugin{
1921+
Enabled: enableAmdGpuDevicePlugin,
1922+
}
1923+
}
1924+
}
1925+
1926+
enableAmdGpuDeviceMetricsExporterPlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin)
1927+
if err != nil {
1928+
return err
1929+
}
1930+
if enableAmdGpuDeviceMetricsExporterPlugin != nil {
1931+
r.AmdGpuDeviceMetricsExporterPlugin = &godo.KubernetesAmdGpuDeviceMetricsExporterPlugin{
1932+
Enabled: enableAmdGpuDeviceMetricsExporterPlugin,
1933+
}
1934+
}
1935+
18761936
var clusterAutoscalerConfiguration = &godo.KubernetesClusterAutoscalerConfiguration{}
18771937
thresholdStr, err := c.Doit.GetString(c.NS, doctl.ArgClusterAutoscalerScaleDownUtilizationThreshold)
18781938
if err != nil {

commands/kubernetes_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ var (
4848
RoutingAgent: &godo.KubernetesRoutingAgent{
4949
Enabled: boolPtr(true),
5050
},
51+
AmdGpuDevicePlugin: &godo.KubernetesAmdGpuDevicePlugin{
52+
Enabled: boolPtr(true),
53+
},
54+
AmdGpuDeviceMetricsExporterPlugin: &godo.KubernetesAmdGpuDeviceMetricsExporterPlugin{
55+
Enabled: boolPtr(true),
56+
},
5157
},
5258
}
5359

@@ -529,6 +535,12 @@ func TestKubernetesCreate(t *testing.T) {
529535
RoutingAgent: &godo.KubernetesRoutingAgent{
530536
Enabled: boolPtr(true),
531537
},
538+
AmdGpuDevicePlugin: &godo.KubernetesAmdGpuDevicePlugin{
539+
Enabled: boolPtr(true),
540+
},
541+
AmdGpuDeviceMetricsExporterPlugin: &godo.KubernetesAmdGpuDeviceMetricsExporterPlugin{
542+
Enabled: boolPtr(true),
543+
},
532544
}
533545
tm.kubernetes.EXPECT().Create(&r).Return(&testCluster, nil)
534546

@@ -556,6 +568,8 @@ func TestKubernetesCreate(t *testing.T) {
556568
config.Doit.Set(config.NS, doctl.ArgClusterAutoscalerScaleDownUnneededTime, testCluster.ClusterAutoscalerConfiguration.ScaleDownUnneededTime)
557569

558570
config.Doit.Set(config.NS, doctl.ArgEnableRoutingAgent, testCluster.RoutingAgent.Enabled)
571+
config.Doit.Set(config.NS, doctl.ArgEnableAmdGpuDevicePlugin, testCluster.AmdGpuDevicePlugin.Enabled)
572+
config.Doit.Set(config.NS, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin, testCluster.AmdGpuDeviceMetricsExporterPlugin.Enabled)
559573

560574
// Test with no vpc-uuid specified
561575
err := testK8sCmdService().RunKubernetesClusterCreate("c-8", 3)(config)
@@ -617,6 +631,12 @@ func TestKubernetesUpdate(t *testing.T) {
617631
RoutingAgent: &godo.KubernetesRoutingAgent{
618632
Enabled: boolPtr(true),
619633
},
634+
AmdGpuDevicePlugin: &godo.KubernetesAmdGpuDevicePlugin{
635+
Enabled: boolPtr(true),
636+
},
637+
AmdGpuDeviceMetricsExporterPlugin: &godo.KubernetesAmdGpuDeviceMetricsExporterPlugin{
638+
Enabled: boolPtr(true),
639+
},
620640
}
621641
tm.kubernetes.EXPECT().Update(testCluster.ID, &r).Return(&testCluster, nil)
622642

@@ -631,6 +651,8 @@ func TestKubernetesUpdate(t *testing.T) {
631651
config.Doit.Set(config.NS, doctl.ArgClusterAutoscalerScaleDownUtilizationThreshold, testCluster.ClusterAutoscalerConfiguration.ScaleDownUtilizationThreshold)
632652
config.Doit.Set(config.NS, doctl.ArgClusterAutoscalerScaleDownUnneededTime, testCluster.ClusterAutoscalerConfiguration.ScaleDownUnneededTime)
633653
config.Doit.Set(config.NS, doctl.ArgEnableRoutingAgent, testCluster.RoutingAgent.Enabled)
654+
config.Doit.Set(config.NS, doctl.ArgEnableAmdGpuDevicePlugin, testCluster.AmdGpuDevicePlugin.Enabled)
655+
config.Doit.Set(config.NS, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin, testCluster.AmdGpuDeviceMetricsExporterPlugin.Enabled)
634656

635657
err := testK8sCmdService().RunKubernetesClusterUpdate(config)
636658
assert.NoError(t, err)

integration/kubernetes_clusters_get_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ var (
117117
`
118118

119119
k8sGetOutput = `
120-
ID Name Region Version Auto Upgrade HA Control Plane Status Endpoint IPv4 Cluster Subnet Service Subnet Tags Created At Updated At Node Pools Autoscaler Scale Down Utilization Autoscaler Scale Down Unneeded Time Autoscaler Custom Expanders Routing Agent
121-
some-cluster-id some-cluster-id nyc3 some-kube-version true false running production 2018-11-15 16:00:11 +0000 UTC 2018-11-15 16:00:11 +0000 UTC frontend-pool 50% 1m30s priority, random false
120+
ID Name Region Version Auto Upgrade HA Control Plane Status Endpoint IPv4 Cluster Subnet Service Subnet Tags Created At Updated At Node Pools Autoscaler Scale Down Utilization Autoscaler Scale Down Unneeded Time Autoscaler Custom Expanders Routing Agent AMD GPU Device Plugin AMD GPU Device Metrics Exporter Plugin
121+
some-cluster-id some-cluster-id nyc3 some-kube-version true false running production 2018-11-15 16:00:11 +0000 UTC 2018-11-15 16:00:11 +0000 UTC frontend-pool 50% 1m30s priority, random false false false
122122
`
123123
)

integration/projects_resources_get_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,8 @@ ID Name Size Region Filesyste
343343
}
344344
`
345345
projectsResourcesGetKubernetesOutput = `
346-
ID Name Region Version Auto Upgrade HA Control Plane Status Endpoint IPv4 Cluster Subnet Service Subnet Tags Created At Updated At Node Pools Autoscaler Scale Down Utilization Autoscaler Scale Down Unneeded Time Autoscaler Custom Expanders Routing Agent
347-
1111 false false provisioning k8s 2021-01-29 16:02:02 +0000 UTC 0001-01-01 00:00:00 +0000 UTC pool-test false
346+
ID Name Region Version Auto Upgrade HA Control Plane Status Endpoint IPv4 Cluster Subnet Service Subnet Tags Created At Updated At Node Pools Autoscaler Scale Down Utilization Autoscaler Scale Down Unneeded Time Autoscaler Custom Expanders Routing Agent AMD GPU Device Plugin AMD GPU Device Metrics Exporter Plugin
347+
1111 false false provisioning k8s 2021-01-29 16:02:02 +0000 UTC 0001-01-01 00:00:00 +0000 UTC pool-test false false false
348348
`
349349

350350
projectsResourcesListKubernetesOutput = `

0 commit comments

Comments
 (0)