Skip to content

Commit 14ee71a

Browse files
committed
azure: Distribute compute subnets to proper zones.
Distributing the compute subnets across NAT gateways. Depends on nat gateway availability zones and vm zones.
1 parent 8f1c1da commit 14ee71a

File tree

6 files changed

+247
-19
lines changed

6 files changed

+247
-19
lines changed

pkg/asset/installconfig/azure/metadata.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import (
66
"sort"
77
"sync"
88

9+
"sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
10+
911
typesazure "github.com/openshift/installer/pkg/types/azure"
1012
)
1113

@@ -17,7 +19,9 @@ type Metadata struct {
1719
client API
1820
dnsCfg *DNSConfig
1921
availabilityZones []string
22+
vmZones []string
2023
region string
24+
ZonesSubnetMap map[string][]string
2125

2226
// CloudName indicates the Azure cloud environment (e.g. public, gov't).
2327
CloudName typesazure.CloudEnvironment `json:"cloudName,omitempty"`
@@ -122,3 +126,75 @@ func (m *Metadata) AvailabilityZones(ctx context.Context) ([]string, error) {
122126

123127
return m.availabilityZones, nil
124128
}
129+
130+
// VMAvailabilityZones retrieves a list of availability zones for the configured region and instance type.
131+
func (m *Metadata) VMAvailabilityZones(ctx context.Context, instanceType string) ([]string, error) {
132+
m.mutex.Lock()
133+
defer m.mutex.Unlock()
134+
135+
if len(m.vmZones) == 0 {
136+
zones, err := m.client.GetAvailabilityZones(ctx, m.region, instanceType)
137+
if err != nil {
138+
return nil, fmt.Errorf("error retrieving Availability Zones: %w", err)
139+
}
140+
if zones != nil {
141+
sort.Strings(zones)
142+
m.vmZones = zones
143+
}
144+
}
145+
146+
return m.vmZones, nil
147+
}
148+
149+
// GenerateZonesSubnetMap creates a map of all the zones that are supported for nat gateways and vms and
150+
// sets it to the subnets provided. If no subnets are provided, it creates subnets for multi zone
151+
// functionality.
152+
func (m *Metadata) GenerateZonesSubnetMap(subnetSpec []typesazure.SubnetSpec, defaultComputeSubnet string) (map[string][]string, error) {
153+
if m.ZonesSubnetMap == nil {
154+
// Get the availability zones.
155+
if m.availabilityZones == nil {
156+
_, err := m.AvailabilityZones(context.TODO())
157+
if err != nil {
158+
return nil, err
159+
}
160+
}
161+
subnetZones := m.availabilityZones
162+
computeSubnets := []string{}
163+
164+
// Get all the byo subnets or generate subnet per az.
165+
if len(subnetSpec) != 0 {
166+
sort.Slice(subnetSpec, func(i, j int) bool {
167+
return subnetSpec[i].Name < subnetSpec[j].Name
168+
})
169+
for _, subnet := range subnetSpec {
170+
if subnet.Role == v1beta1.SubnetNode {
171+
computeSubnets = append(computeSubnets, subnet.Name)
172+
}
173+
}
174+
} else {
175+
for idx := range subnetZones {
176+
computeName := fmt.Sprintf("%s-%d", defaultComputeSubnet, idx)
177+
if idx == 0 {
178+
computeName = defaultComputeSubnet
179+
}
180+
computeSubnets = append(computeSubnets, computeName)
181+
}
182+
}
183+
184+
// Assign zone to subnets.
185+
subnetMap := map[string][]string{}
186+
zoneIndex := 0
187+
for _, subnet := range computeSubnets {
188+
if _, ok := subnetMap[subnetZones[zoneIndex]]; !ok {
189+
subnetMap[subnetZones[zoneIndex]] = []string{}
190+
}
191+
subnetMap[subnetZones[zoneIndex]] = append(subnetMap[subnetZones[zoneIndex]], subnet)
192+
zoneIndex++
193+
if zoneIndex >= len(subnetZones) {
194+
zoneIndex = 0
195+
}
196+
}
197+
m.ZonesSubnetMap = subnetMap
198+
}
199+
return m.ZonesSubnetMap, nil
200+
}

pkg/asset/machines/azure/machinesets.go

Lines changed: 148 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,24 @@ package azure
22

33
import (
44
"fmt"
5+
"slices"
56
"sort"
67

78
"github.com/pkg/errors"
89
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
910
"k8s.io/apimachinery/pkg/runtime"
11+
"k8s.io/apimachinery/pkg/util/sets"
1012

1113
clusterapi "github.com/openshift/api/machine/v1beta1"
14+
"github.com/openshift/installer/pkg/asset/installconfig"
1215
icazure "github.com/openshift/installer/pkg/asset/installconfig/azure"
1316
"github.com/openshift/installer/pkg/types"
1417
"github.com/openshift/installer/pkg/types/azure"
1518
)
1619

1720
// MachineSets returns a list of machinesets for a machinepool.
18-
func MachineSets(clusterID string, config *types.InstallConfig, pool *types.MachinePool, osImage, role, userDataSecret string, capabilities map[string]string, useImageGallery bool, subnetZones []string, session *icazure.Session) ([]*clusterapi.MachineSet, error) {
21+
func MachineSets(clusterID string, ic *installconfig.InstallConfig, pool *types.MachinePool, osImage, role, userDataSecret string, capabilities map[string]string, useImageGallery bool, subnetZones []string, session *icazure.Session) ([]*clusterapi.MachineSet, error) {
22+
config := ic.Config
1923
if configPlatform := config.Platform.Name(); configPlatform != azure.Name {
2024
return nil, fmt.Errorf("non-azure configuration: %q", configPlatform)
2125
}
@@ -47,6 +51,27 @@ func MachineSets(clusterID string, config *types.InstallConfig, pool *types.Mach
4751
sort.Strings(azs)
4852
subnetIndex := -1
4953
var machinesets []*clusterapi.MachineSet
54+
55+
if config.Azure.OutboundType == azure.NATGatewayMultiZoneOutboundType {
56+
return getMultiZoneMachineSets(multiZoneMachineSetInput{
57+
networkResourceGroup: networkResourceGroup,
58+
virtualNetworkName: virtualNetworkName,
59+
platform: platform,
60+
mpool: mpool,
61+
osImage: osImage,
62+
userDataSecret: userDataSecret,
63+
clusterID: clusterID,
64+
role: role,
65+
capabilities: capabilities,
66+
useImageGallery: useImageGallery,
67+
session: session,
68+
subnetSpec: config.Azure.Subnets,
69+
replicas: total,
70+
ic: ic,
71+
azs: azs,
72+
pool: pool,
73+
})
74+
}
5075
for idx, az := range azs {
5176
replicas := int32(total / numOfAZs)
5277
if int64(idx) < total%numOfAZs {
@@ -102,3 +127,125 @@ func MachineSets(clusterID string, config *types.InstallConfig, pool *types.Mach
102127
}
103128
return machinesets, nil
104129
}
130+
131+
type multiZoneMachineSetInput struct {
132+
networkResourceGroup string
133+
platform *azure.Platform
134+
mpool *azure.MachinePool
135+
osImage string
136+
userDataSecret string
137+
clusterID string
138+
role string
139+
capabilities map[string]string
140+
useImageGallery bool
141+
session *icazure.Session
142+
virtualNetworkName string
143+
subnetSpec []azure.SubnetSpec
144+
replicas int64
145+
ic *installconfig.InstallConfig
146+
azs []string
147+
pool *types.MachinePool
148+
}
149+
150+
func getMultiZoneMachineSets(in multiZoneMachineSetInput) ([]*clusterapi.MachineSet, error) {
151+
// Deep copy metadata map.
152+
zoneSubnetmap := map[string][]string{}
153+
subnetCount := 0
154+
// Filter for the zones the user provided for compute nodes.
155+
for key, value := range in.ic.Azure.ZonesSubnetMap {
156+
if slices.Contains(in.azs, key) {
157+
zoneSubnetmap[key] = sets.NewString(value...).List()
158+
subnetCount += len(value)
159+
}
160+
}
161+
machineSets := []*clusterapi.MachineSet{}
162+
replicasToCreate := int32(in.replicas)
163+
// Calculate the replicas per machine set.
164+
// This just first finds the nearest multiple of subnet count
165+
// then distributes the remainder across the machine sets one by one.
166+
// If there are 3 subnets and 8 replicas, first we would
167+
// set 8/3 = 2 replicas for each subnet (2,2,2) and distribute the
168+
// remaining machines (2) evenly to have (3,3,2).
169+
replicaPerSet := max(replicasToCreate/int32(subnetCount), 1)
170+
remainder := replicasToCreate % int32(subnetCount)
171+
if replicasToCreate < int32(subnetCount) {
172+
remainder = 0
173+
}
174+
numAZUsed := map[string]int{}
175+
for _, az := range in.azs {
176+
numAZUsed[az] = 0
177+
}
178+
179+
// Iterate till we used up all the replicas mentioned.
180+
// Iterate through the zones provided and find a subnet to use.
181+
for replicasToCreate != 0 && len(zoneSubnetmap) != 0 {
182+
for idx, az := range in.azs {
183+
if _, ok := zoneSubnetmap[az]; !ok {
184+
continue
185+
}
186+
subnet := zoneSubnetmap[az][0]
187+
if len(zoneSubnetmap[az]) == 1 {
188+
delete(zoneSubnetmap, az)
189+
} else {
190+
zoneSubnetmap[az] = zoneSubnetmap[az][1:]
191+
}
192+
currentReplica := replicaPerSet
193+
if remainder != 0 {
194+
currentReplica++
195+
remainder--
196+
}
197+
provider, err := provider(in.platform, in.mpool, in.osImage, in.userDataSecret, in.clusterID, in.role, &idx, in.capabilities, in.useImageGallery, in.session, in.networkResourceGroup, in.virtualNetworkName, subnet)
198+
if err != nil {
199+
return nil, errors.Wrap(err, "failed to create provider")
200+
}
201+
name := fmt.Sprintf("%s-%s-%s%s-%d", in.clusterID, in.pool.Name, in.platform.Region, az, numAZUsed[az])
202+
if numAZUsed[az] == 0 {
203+
name = fmt.Sprintf("%s-%s-%s%s", in.clusterID, in.pool.Name, in.platform.Region, az)
204+
}
205+
numAZUsed[az]++
206+
mset := &clusterapi.MachineSet{
207+
TypeMeta: metav1.TypeMeta{
208+
APIVersion: "machine.openshift.io/v1beta1",
209+
Kind: "MachineSet",
210+
},
211+
ObjectMeta: metav1.ObjectMeta{
212+
Namespace: "openshift-machine-api",
213+
Name: name,
214+
Labels: map[string]string{
215+
"machine.openshift.io/cluster-api-cluster": in.clusterID,
216+
"machine.openshift.io/cluster-api-machine-role": in.role,
217+
"machine.openshift.io/cluster-api-machine-type": in.role,
218+
},
219+
},
220+
Spec: clusterapi.MachineSetSpec{
221+
Replicas: &currentReplica,
222+
Selector: metav1.LabelSelector{
223+
MatchLabels: map[string]string{
224+
"machine.openshift.io/cluster-api-machineset": name,
225+
"machine.openshift.io/cluster-api-cluster": in.clusterID,
226+
},
227+
},
228+
Template: clusterapi.MachineTemplateSpec{
229+
ObjectMeta: clusterapi.ObjectMeta{
230+
Labels: map[string]string{
231+
"machine.openshift.io/cluster-api-machineset": name,
232+
"machine.openshift.io/cluster-api-cluster": in.clusterID,
233+
"machine.openshift.io/cluster-api-machine-role": in.role,
234+
"machine.openshift.io/cluster-api-machine-type": in.role,
235+
},
236+
},
237+
Spec: clusterapi.MachineSpec{
238+
ProviderSpec: clusterapi.ProviderSpec{
239+
Value: &runtime.RawExtension{Object: provider},
240+
},
241+
// we don't need to set Versions, because we control those via cluster operators.
242+
},
243+
},
244+
},
245+
}
246+
machineSets = append(machineSets, mset)
247+
replicasToCreate -= currentReplica
248+
}
249+
}
250+
return machineSets, nil
251+
}

pkg/asset/machines/clusterapi.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ func (c *ClusterAPI) Generate(ctx context.Context, dependencies asset.Parents) e
236236
}
237237

238238
if len(mpool.Zones) == 0 {
239-
azs, err := client.GetAvailabilityZones(ctx, ic.Platform.Azure.Region, mpool.InstanceType)
239+
azs, err := installConfig.Azure.VMAvailabilityZones(ctx, mpool.InstanceType)
240240
if err != nil {
241241
return fmt.Errorf("failed to fetch availability zones: %w", err)
242242
}

pkg/asset/machines/master.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ func (m *Master) Generate(ctx context.Context, dependencies asset.Parents) error
370370
}
371371

372372
if len(mpool.Zones) == 0 {
373-
azs, err := client.GetAvailabilityZones(ctx, ic.Platform.Azure.Region, mpool.InstanceType)
373+
azs, err := installConfig.Azure.VMAvailabilityZones(ctx, mpool.InstanceType)
374374
if err != nil {
375375
return errors.Wrap(err, "failed to fetch availability zones")
376376
}

pkg/asset/machines/worker.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ func (w *Worker) Generate(ctx context.Context, dependencies asset.Parents) error
571571
}
572572

573573
if len(mpool.Zones) == 0 {
574-
azs, err := client.GetAvailabilityZones(ctx, ic.Platform.Azure.Region, mpool.InstanceType)
574+
azs, err := installConfig.Azure.VMAvailabilityZones(ctx, mpool.InstanceType)
575575
if err != nil {
576576
return errors.Wrap(err, "failed to fetch availability zones")
577577
}
@@ -588,6 +588,11 @@ func (w *Worker) Generate(ctx context.Context, dependencies asset.Parents) error
588588
if err != nil {
589589
return errors.Wrap(err, "failed to fetch availability zones")
590590
}
591+
computeSubnet := installConfig.Config.Azure.ComputeSubnetName(clusterID.InfraID)
592+
_, err := installConfig.Azure.GenerateZonesSubnetMap(installConfig.Config.Azure.Subnets, computeSubnet)
593+
if err != nil {
594+
return err
595+
}
591596
}
592597

593598
if mpool.OSImage.Publisher != "" {
@@ -610,7 +615,7 @@ func (w *Worker) Generate(ctx context.Context, dependencies asset.Parents) error
610615
}
611616

612617
useImageGallery := ic.Platform.Azure.CloudName != azuretypes.StackCloud
613-
sets, err := azure.MachineSets(clusterID.InfraID, ic, &pool, rhcosImage.Compute, "worker", workerUserDataSecretName, capabilities, useImageGallery, subnetZones, session)
618+
sets, err := azure.MachineSets(clusterID.InfraID, installConfig, &pool, rhcosImage.Compute, "worker", workerUserDataSecretName, capabilities, useImageGallery, subnetZones, session)
614619
if err != nil {
615620
return errors.Wrap(err, "failed to create worker machine objects")
616621
}

0 commit comments

Comments
 (0)