Skip to content

Commit 9ea47d0

Browse files
✨ feat: Implement autoscaling from zero by auto-populating AWSMachineTemplate capacity (kubernetes-sigs#5711)
* feat: implement auto-population of AWSMachineTemplate capacity and nodeInfo Add AWSMachineTemplateReconciler to automatically populate capacity and node info fields by querying AWS EC2 API. This completes the autoscaling from zero implementation by ensuring the required metadata is available without manual configuration. Changes include: - Add NodeInfo struct with Architecture and OperatingSystem fields to AWSMachineTemplate status - Implement controller that queries EC2 API for instance type specifications - Auto-populate CPU, memory, pods, and ephemeral storage capacity - Auto-detect architecture (amd64/arm64) and OS (linux/windows) from AMI - Add conversion logic for backward compatibility with v1beta1 - Enable status subresource on AWSMachineTemplate CRD - Add comprehensive unit tests (351 lines) covering various scenarios - Add RBAC permissions for controller operations The controller automatically populates these fields when an AWSMachineTemplate is created or updated, eliminating the need for manual configuration and enabling Cluster Autoscaler to make informed scaling decisions from zero nodes. Related: https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md Squashed from 5 commits: - 9a92a43 Implement autoscaling from zero by auto-populating AWSMachineTemplate capacity - 86fe072 add AWSMachineTemplate NodeInfo - ddaf62c Fix review comments - 4ea52c8 Fix review comments 2 - b398ffc Fix review comments 3 * feat(api): add Conditions field and update for CAPI v1.11 Add Conditions to AWSMachineTemplateStatus and update controller for CAPI v1.11 API changes. Squashed from 2 commits: - ffdf7db Fix review comments 4 - 6493363 rebase kubernetes-sigs#5720
1 parent 26c3586 commit 9ea47d0

11 files changed

+1051
-10
lines changed

api/v1beta1/awsmachine_conversion.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ func (r *AWSMachineTemplate) ConvertTo(dstRaw conversion.Hub) error {
136136
}
137137
}
138138

139+
// Restore Status fields that don't exist in v1beta1.
140+
dst.Status.NodeInfo = restored.Status.NodeInfo
141+
dst.Status.Conditions = restored.Status.Conditions
142+
139143
return nil
140144
}
141145

api/v1beta1/conversion.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,8 @@ func Convert_v1beta2_AWSMachineStatus_To_v1beta1_AWSMachineStatus(in *v1beta2.AW
108108
// Note: DedicatedHostID is not present in v1beta1, so it will be dropped during conversion
109109
return autoConvert_v1beta2_AWSMachineStatus_To_v1beta1_AWSMachineStatus(in, out, s)
110110
}
111+
112+
func Convert_v1beta2_AWSMachineTemplateStatus_To_v1beta1_AWSMachineTemplateStatus(in *v1beta2.AWSMachineTemplateStatus, out *AWSMachineTemplateStatus, s conversion.Scope) error {
113+
// NodeInfo and Conditions fields are ignored (dropped) as they don't exist in v1beta1
114+
return autoConvert_v1beta2_AWSMachineTemplateStatus_To_v1beta1_AWSMachineTemplateStatus(in, out, s)
115+
}

api/v1beta1/zz_generated.conversion.go

Lines changed: 7 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1beta2/awsmachinetemplate_types.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,59 @@ import (
2323
clusterv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1"
2424
)
2525

26+
// Architecture represents the CPU architecture of the node.
27+
// Its underlying type is a string and its value can be any of amd64, arm64.
28+
type Architecture string
29+
30+
// Architecture constants.
31+
const (
32+
ArchitectureAmd64 Architecture = "amd64"
33+
ArchitectureArm64 Architecture = "arm64"
34+
)
35+
36+
// OperatingSystem represents the operating system of the node.
37+
// Its underlying type is a string and its value can be any of linux, windows.
38+
type OperatingSystem string
39+
40+
// Operating system constants.
41+
const (
42+
// OperatingSystemLinux represents the Linux operating system.
43+
OperatingSystemLinux OperatingSystem = "linux"
44+
// OperatingSystemWindows represents the Windows operating system.
45+
OperatingSystemWindows OperatingSystem = "windows"
46+
)
47+
48+
// NodeInfo contains information about the node's architecture and operating system.
49+
type NodeInfo struct {
50+
// Architecture is the CPU architecture of the node.
51+
// Its underlying type is a string and its value can be any of amd64, arm64.
52+
// +kubebuilder:validation:Enum=amd64;arm64
53+
// +optional
54+
Architecture Architecture `json:"architecture,omitempty"`
55+
// OperatingSystem is the operating system of the node.
56+
// Its underlying type is a string and its value can be any of linux, windows.
57+
// +kubebuilder:validation:Enum=linux;windows
58+
// +optional
59+
OperatingSystem OperatingSystem `json:"operatingSystem,omitempty"`
60+
}
61+
2662
// AWSMachineTemplateStatus defines a status for an AWSMachineTemplate.
2763
type AWSMachineTemplateStatus struct {
2864
// Capacity defines the resource capacity for this machine.
2965
// This value is used for autoscaling from zero operations as defined in:
3066
// https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
3167
// +optional
3268
Capacity corev1.ResourceList `json:"capacity,omitempty"`
69+
70+
// NodeInfo contains information about the node's architecture and operating system.
71+
// This value is used for autoscaling from zero operations as defined in:
72+
// https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
73+
// +optional
74+
NodeInfo *NodeInfo `json:"nodeInfo,omitempty"`
75+
76+
// Conditions defines current service state of the AWSMachineTemplate.
77+
// +optional
78+
Conditions clusterv1beta1.Conditions `json:"conditions,omitempty"`
3379
}
3480

3581
// AWSMachineTemplateSpec defines the desired state of AWSMachineTemplate.
@@ -40,6 +86,7 @@ type AWSMachineTemplateSpec struct {
4086
// +kubebuilder:object:root=true
4187
// +kubebuilder:resource:path=awsmachinetemplates,scope=Namespaced,categories=cluster-api,shortName=awsmt
4288
// +kubebuilder:storageversion
89+
// +kubebuilder:subresource:status
4390
// +k8s:defaulter-gen=true
4491

4592
// AWSMachineTemplate is the schema for the Amazon EC2 Machine Templates API.
@@ -71,6 +118,16 @@ type AWSMachineTemplateResource struct {
71118
Spec AWSMachineSpec `json:"spec"`
72119
}
73120

121+
// GetConditions returns the observations of the operational state of the AWSMachineTemplate resource.
122+
func (r *AWSMachineTemplate) GetConditions() clusterv1beta1.Conditions {
123+
return r.Status.Conditions
124+
}
125+
126+
// SetConditions sets the underlying service state of the AWSMachineTemplate to the predescribed clusterv1beta1.Conditions.
127+
func (r *AWSMachineTemplate) SetConditions(conditions clusterv1beta1.Conditions) {
128+
r.Status.Conditions = conditions
129+
}
130+
74131
func init() {
75132
SchemeBuilder.Register(&AWSMachineTemplate{}, &AWSMachineTemplateList{})
76133
}

api/v1beta2/zz_generated.deepcopy.go

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinetemplates.yaml

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,84 @@ spec:
11561156
This value is used for autoscaling from zero operations as defined in:
11571157
https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
11581158
type: object
1159+
conditions:
1160+
description: Conditions defines current service state of the AWSMachineTemplate.
1161+
items:
1162+
description: Condition defines an observation of a Cluster API resource
1163+
operational state.
1164+
properties:
1165+
lastTransitionTime:
1166+
description: |-
1167+
lastTransitionTime is the last time the condition transitioned from one status to another.
1168+
This should be when the underlying condition changed. If that is not known, then using the time when
1169+
the API field changed is acceptable.
1170+
format: date-time
1171+
type: string
1172+
message:
1173+
description: |-
1174+
message is a human readable message indicating details about the transition.
1175+
This field may be empty.
1176+
maxLength: 10240
1177+
minLength: 1
1178+
type: string
1179+
reason:
1180+
description: |-
1181+
reason is the reason for the condition's last transition in CamelCase.
1182+
The specific API may choose whether or not this field is considered a guaranteed API.
1183+
This field may be empty.
1184+
maxLength: 256
1185+
minLength: 1
1186+
type: string
1187+
severity:
1188+
description: |-
1189+
severity provides an explicit classification of Reason code, so the users or machines can immediately
1190+
understand the current situation and act accordingly.
1191+
The Severity field MUST be set only when Status=False.
1192+
maxLength: 32
1193+
type: string
1194+
status:
1195+
description: status of the condition, one of True, False, Unknown.
1196+
type: string
1197+
type:
1198+
description: |-
1199+
type of condition in CamelCase or in foo.example.com/CamelCase.
1200+
Many .condition.type values are consistent across resources like Available, but because arbitrary conditions
1201+
can be useful (see .node.status.conditions), the ability to deconflict is important.
1202+
maxLength: 256
1203+
minLength: 1
1204+
type: string
1205+
required:
1206+
- lastTransitionTime
1207+
- status
1208+
- type
1209+
type: object
1210+
type: array
1211+
nodeInfo:
1212+
description: |-
1213+
NodeInfo contains information about the node's architecture and operating system.
1214+
This value is used for autoscaling from zero operations as defined in:
1215+
https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
1216+
properties:
1217+
architecture:
1218+
description: |-
1219+
Architecture is the CPU architecture of the node.
1220+
Its underlying type is a string and its value can be any of amd64, arm64.
1221+
enum:
1222+
- amd64
1223+
- arm64
1224+
type: string
1225+
operatingSystem:
1226+
description: |-
1227+
OperatingSystem is the operating system of the node.
1228+
Its underlying type is a string and its value can be any of linux, windows.
1229+
enum:
1230+
- linux
1231+
- windows
1232+
type: string
1233+
type: object
11591234
type: object
11601235
type: object
11611236
served: true
11621237
storage: true
1238+
subresources:
1239+
status: {}

config/rbac/role.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ rules:
183183
resources:
184184
- awsclusters/status
185185
- awsfargateprofiles/status
186+
- awsmachinetemplates/status
186187
- rosaclusters/status
187188
- rosanetworks/status
188189
- rosaroleconfigs/status

0 commit comments

Comments
 (0)