Skip to content

Commit e4827b7

Browse files
authored
fix: avoid providing truncated AZ filter list (#698)
1 parent ea90c1c commit e4827b7

File tree

2 files changed

+68
-50
lines changed

2 files changed

+68
-50
lines changed

internal/deployers/eksapi/cluster.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ func (m *ClusterManager) getOrCreateCluster(infra *Infrastructure, opts *deploye
4343
ResourcesVpcConfig: &ekstypes.VpcConfigRequest{
4444
EndpointPrivateAccess: aws.Bool(true),
4545
EndpointPublicAccess: aws.Bool(true),
46-
SubnetIds: append(infra.subnetsPublic, infra.subnetsPrivate...),
46+
SubnetIds: infra.subnets(),
4747
},
4848
RoleArn: aws.String(infra.clusterRoleARN),
4949
KubernetesNetworkConfig: &ekstypes.KubernetesNetworkConfigRequest{

internal/deployers/eksapi/infra.go

Lines changed: 67 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
_ "embed"
66
"errors"
77
"fmt"
8-
"math"
98
"path"
109
"slices"
1110
"sort"
@@ -41,9 +40,6 @@ const (
4140
ipamControllerENITagKey = "eks:kubernetes-cni-node-name"
4241
)
4342

44-
// this value is not currently configurable, the infra stack is hardcoded to create 2
45-
const numInfraAZs = 2
46-
4743
// eksEndpointURLTag is the key for an optional tag on the infrastructure CloudFormation stack,
4844
// which indicates which EKS environment is associated with the stack's resources.
4945
// The tag is only added when --endpoint-url is passed to the deployer.
@@ -93,57 +89,33 @@ func (i *Infrastructure) subnets() []string {
9389
}
9490

9591
func (m *InfrastructureManager) createInfrastructureStack(opts *deployerOptions) (*Infrastructure, error) {
96-
// TODO: create a subnet in every AZ
97-
// get two AZs for the subnets
98-
azs, err := m.clients.EC2().DescribeAvailabilityZones(context.TODO(), &ec2.DescribeAvailabilityZonesInput{
99-
Filters: []ec2types.Filter{
100-
{
101-
Name: aws.String("zone-type"),
102-
Values: []string{opts.ZoneType},
103-
},
104-
},
105-
})
106-
if err != nil {
107-
return nil, err
108-
}
109-
110-
var allowedAZs []string
111-
for i := 0; i < numInfraAZs; i++ {
112-
allowedAZs = append(allowedAZs, aws.ToString(azs.AvailabilityZones[i].ZoneName))
113-
}
114-
11592
var subnetAzs []string
11693
if opts.CapacityReservation {
117-
subnetAzs, err = m.getAZsWithCapacity(opts)
94+
azs, err := m.getAZsWithCapacity(opts)
11895
if err != nil {
11996
return nil, err
12097
}
98+
subnetAzs = azs
12199
} else if len(opts.InstanceTypes) > 0 {
122-
azs, err := m.getRankedAZsForInstanceTypes(opts, allowedAZs)
100+
azs, err := m.getRankedAZsForInstanceTypes(opts)
123101
if err != nil {
124102
return nil, err
125103
}
126104
if len(azs) == 0 {
127105
return nil, fmt.Errorf("no AZs support any of the provided instance types (%v)", opts.InstanceTypes)
128106
}
129-
subnetAzs = azs[0:int(math.Min(float64(len(azs)), numInfraAZs))]
130-
} else {
131-
for i := 0; i < numInfraAZs; i++ {
132-
subnetAzs = append(subnetAzs, aws.ToString(azs.AvailabilityZones[i].ZoneName))
133-
}
107+
subnetAzs = azs
134108
}
135-
// make sure we always have the number of AZs used in the infra stack. can end up here if using
136-
// a single capacity reservation or the provided instance types are offered in fewer AZs
137-
for _, az := range azs.AvailabilityZones {
138-
if len(subnetAzs) == numInfraAZs {
139-
break
140-
}
141-
if !slices.Contains(subnetAzs, *az.ZoneName) {
142-
az := *az.ZoneName
143-
klog.Infof("padding infra stack with AZ: %v", az)
144-
subnetAzs = append(subnetAzs, az)
145-
}
109+
110+
// this value is not currently configurable, the infra stack is hardcoded to create 2
111+
// TODO: create a subnet in every AZ. today we need exactly 2 AZs for the subnets.
112+
const numInfraAZs = 2
113+
114+
subnetAzs, err := m.normalizeAZs(opts, subnetAzs, numInfraAZs)
115+
if err != nil {
116+
return nil, err
146117
}
118+
147119
klog.Infof("creating infrastructure stack with AZs: %v", subnetAzs)
148120
input := cloudformation.CreateStackInput{
149121
StackName: aws.String(m.resourceID),
@@ -416,30 +388,76 @@ func (m *InfrastructureManager) getVPCCNINetworkInterfaceIds(vpcId string) ([]st
416388
return enis, nil
417389
}
418390

391+
// normalizeAZs removes availability zones that don't meet launch requirements
392+
// for instances and ensures that the resulting list containers enough AZs to
393+
// satisfy the deployment.
394+
func (m *InfrastructureManager) normalizeAZs(opts *deployerOptions, subnetAZs []string, expectedCount int) ([]string, error) {
395+
azs, err := m.clients.EC2().DescribeAvailabilityZones(context.TODO(), &ec2.DescribeAvailabilityZonesInput{
396+
Filters: []ec2types.Filter{
397+
{
398+
Name: aws.String("zone-type"),
399+
Values: []string{opts.ZoneType},
400+
},
401+
},
402+
})
403+
if err != nil {
404+
return nil, err
405+
}
406+
407+
var supporttedAZs []string
408+
for _, az := range azs.AvailabilityZones {
409+
supporttedAZs = append(supporttedAZs, aws.ToString(az.ZoneName))
410+
}
411+
412+
var filteredAZs []string
413+
for _, az := range subnetAZs {
414+
if slices.Contains(supporttedAZs, az) {
415+
filteredAZs = append(filteredAZs, az)
416+
}
417+
}
418+
419+
// truncate the list if we went over the max
420+
filteredAZs = filteredAZs[:min(len(filteredAZs), expectedCount)]
421+
422+
// pad the availability zones with supported entries if we end up not having
423+
// enough after filtering.
424+
if len(filteredAZs) < expectedCount {
425+
for _, az := range supporttedAZs {
426+
if len(filteredAZs) == expectedCount {
427+
break
428+
}
429+
if !slices.Contains(filteredAZs, az) {
430+
klog.Infof("padding infra stack with AZ: %v", az)
431+
filteredAZs = append(filteredAZs, az)
432+
}
433+
}
434+
}
435+
436+
if len(filteredAZs) != expectedCount {
437+
return nil, fmt.Errorf("failed to provide AZs with expected count %d: %v", expectedCount, filteredAZs)
438+
}
439+
440+
return filteredAZs, nil
441+
}
442+
419443
// getAZsWithInstanceTypes returns the availability zones ordered decreasingly by the number of
420444
// requested instance types they support
421-
func (m *InfrastructureManager) getRankedAZsForInstanceTypes(opts *deployerOptions, allowedAZs []string) ([]string, error) {
445+
func (m *InfrastructureManager) getRankedAZsForInstanceTypes(opts *deployerOptions) ([]string, error) {
422446
offerings, err := m.clients.EC2().DescribeInstanceTypeOfferings(context.TODO(), &ec2.DescribeInstanceTypeOfferingsInput{
423447
LocationType: ec2types.LocationTypeAvailabilityZone,
424448
Filters: []ec2types.Filter{
425449
{
426450
Name: aws.String("instance-type"),
427451
Values: opts.InstanceTypes,
428452
},
429-
{
430-
Name: aws.String("location"),
431-
Values: allowedAZs,
432-
},
433453
},
434454
})
435455
if err != nil {
436456
return nil, fmt.Errorf("failed to describe instance type offerings: %v", err)
437457
}
438458
counts := make(map[string]int)
439459
for _, offering := range offerings.InstanceTypeOfferings {
440-
az := aws.ToString(offering.Location)
441-
count := counts[az]
442-
counts[az] = count + 1
460+
counts[aws.ToString(offering.Location)]++
443461
}
444462
var azs []string
445463
for az := range counts {

0 commit comments

Comments
 (0)