Skip to content

Commit 9f90c82

Browse files
PCP-5484: Replacement CP LXD VM created in a different resource pool during reconciliation (#282) (#288)
* PCP-5484: Replacement CP LXD VM created in a different resource pool during reconciliation * Strict placment for VMs (cherry picked from commit 08dfeb1) Co-authored-by: Amit Sahastrabuddhe <[email protected]>
1 parent fd30718 commit 9f90c82

File tree

2 files changed

+270
-30
lines changed

2 files changed

+270
-30
lines changed

pkg/maas/lxd/host_maas_client.go

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ func UnregisterLXDHostByNameWithMaasClient(apiKey, apiEndpoint, hostName string)
232232
}
233233

234234
// isHostUnderMaintenance checks if a host has maintenance tags
235-
func isHostUnderMaintenance(client maasclient.ClientSetInterface, hostSystemID string, log logr.Logger) bool {
235+
func isHostUnderMaintenance(client machineGetter, hostSystemID string, log logr.Logger) bool {
236236
if hostSystemID == "" {
237237
return false
238238
}
@@ -253,15 +253,31 @@ func isHostUnderMaintenance(client maasclient.ClientSetInterface, hostSystemID s
253253
return false
254254
}
255255

256+
// machineGetter narrows the client interface to only what is needed here
257+
type machineGetter interface {
258+
Machines() maasclient.Machines
259+
}
260+
256261
// SelectLXDHostWithMaasClient selects an LXD host based on availability, AZ, and resource pool
257-
func SelectLXDHostWithMaasClient(client maasclient.ClientSetInterface, hosts []maasclient.VMHost, az, resourcePool string) (maasclient.VMHost, error) {
262+
func SelectLXDHostWithMaasClient(client machineGetter, hosts []maasclient.VMHost, az, resourcePool string) (maasclient.VMHost, error) {
258263
log := textlogger.NewLogger(textlogger.NewConfig())
259264

260265
if len(hosts) == 0 {
261266
return nil, fmt.Errorf("no LXD hosts available")
262267
}
263268

264-
// First, try to find a host in the specified AZ and resource pool
269+
// If either AZ or resource pool is specified, enforce strict placement on the provided constraints.
270+
// By design, AZ and pool are optional; but if specified, they must be honored strictly.
271+
strictConstraints := (az != "" || resourcePool != "")
272+
273+
// Helper to detect Palette-managed hosts by naming convention
274+
isManagedHost := func(h maasclient.VMHost) bool {
275+
return strings.HasPrefix(strings.ToLower(strings.TrimSpace(h.Name())), "lxd-host-")
276+
}
277+
278+
// First pass (strict matching): collect healthy, non-maintenance hosts matching AZ/pool
279+
var strictManaged maasclient.VMHost
280+
var strictAny maasclient.VMHost
265281
for _, host := range hosts {
266282
hostZone := ""
267283
if host.Zone() != nil {
@@ -274,39 +290,54 @@ func SelectLXDHostWithMaasClient(client maasclient.ClientSetInterface, hosts []m
274290
}
275291

276292
if (az == "" || hostZone == az) && (resourcePool == "" || hostPool == resourcePool) {
277-
278-
// Check if the underlying host machine is deployed and powered on
279293
hostSystemID := host.HostSystemID()
280-
if hostSystemID != "" {
281-
// Check actual machine status using MAAS client
282-
ctx := context.Background()
283-
machine, err := client.Machines().Machine(hostSystemID).Get(ctx)
284-
if err != nil {
285-
log.Info("Failed to get machine details", "system-id", hostSystemID, "error", err.Error())
286-
continue
287-
}
288-
289-
powerState := machine.PowerState()
290-
machineState := machine.State()
291-
isHealthy := powerState == "on" && machineState == "Deployed"
292-
293-
// Check if host is under maintenance
294-
isUnderMaintenance := isHostUnderMaintenance(client, hostSystemID, log)
295-
296-
if isUnderMaintenance {
297-
log.Info("Skipping LXD host under maintenance", "host-name", host.Name(), "host-id", hostSystemID)
298-
continue
299-
}
294+
if hostSystemID == "" {
295+
continue
296+
}
297+
// Check host health and maintenance state
298+
ctx := context.Background()
299+
machine, err := client.Machines().Machine(hostSystemID).Get(ctx)
300+
if err != nil {
301+
log.Info("Failed to get machine details", "system-id", hostSystemID, "error", err.Error())
302+
continue
303+
}
304+
powerState := machine.PowerState()
305+
machineState := machine.State()
306+
if powerState != "on" || machineState != "Deployed" {
307+
continue
308+
}
309+
if isHostUnderMaintenance(client, hostSystemID, log) {
310+
log.Info("Skipping LXD host under maintenance", "host-name", host.Name(), "host-id", hostSystemID)
311+
continue
312+
}
300313

301-
if isHealthy {
302-
log.Info("Selected LXD host", "host-name", host.Name(), "host-id", host.SystemID())
303-
return host, nil
304-
}
314+
// Prefer managed host, but allow OOB within constraints
315+
if isManagedHost(host) && strictManaged == nil {
316+
strictManaged = host
305317
}
306-
continue
318+
if strictAny == nil {
319+
strictAny = host
320+
}
321+
}
322+
}
323+
324+
if strictConstraints {
325+
if strictManaged != nil {
326+
log.Info("Selected LXD host (managed, strict)", "host-name", strictManaged.Name(), "host-id", strictManaged.SystemID())
327+
return strictManaged, nil
328+
}
329+
if strictAny != nil {
330+
log.Info("Selected LXD host (strict)", "host-name", strictAny.Name(), "host-id", strictAny.SystemID())
331+
return strictAny, nil
307332
}
333+
log.Info("Strict LXD host placement enforced; no matching host found", "zone", az, "resourcePool", resourcePool)
334+
return nil, fmt.Errorf("no LXD host available matching zone %s and pool %s", az, resourcePool)
308335
}
309336

337+
// If strict constraints are provided (AZ and/or resource pool), do not fall back.
338+
// Return an error so the caller can retry rather than violating placement constraints.
339+
// (Already handled above) If not strict, continue with existing fallback behavior below.
340+
310341
// If no host matches the AZ and resource pool, try to find a host in the specified AZ (without maintenance)
311342
if resourcePool != "" {
312343
for _, host := range hosts {
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
package lxd
2+
3+
import (
4+
"context"
5+
"net"
6+
"testing"
7+
8+
"github.com/golang/mock/gomock"
9+
mock_clientset "github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/client/mock"
10+
"github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/maintenance"
11+
"github.com/spectrocloud/maas-client-go/maasclient"
12+
)
13+
14+
// ---- Fakes for maasclient interfaces ----
15+
16+
type fakeZone struct {
17+
name string
18+
id int
19+
}
20+
21+
func (z *fakeZone) Name() string { return z.name }
22+
func (z *fakeZone) ID() int { return z.id }
23+
func (z *fakeZone) Description() string { return "" }
24+
25+
type fakeResourcePool struct {
26+
name string
27+
id int
28+
}
29+
30+
func (p *fakeResourcePool) Name() string { return p.name }
31+
func (p *fakeResourcePool) ID() int { return p.id }
32+
func (p *fakeResourcePool) Description() string { return "" }
33+
34+
type fakeVMHost struct {
35+
name string
36+
systemID string
37+
hostSystemID string
38+
zone *fakeZone
39+
pool *fakeResourcePool
40+
}
41+
42+
func (h *fakeVMHost) Get(context.Context) (maasclient.VMHost, error) { return h, nil }
43+
func (h *fakeVMHost) Update(context.Context, maasclient.Params) (maasclient.VMHost, error) {
44+
return h, nil
45+
}
46+
func (h *fakeVMHost) Delete(context.Context) error { return nil }
47+
func (h *fakeVMHost) Composer() maasclient.VMComposer { return nil }
48+
func (h *fakeVMHost) Machines() maasclient.VMHostMachines { return nil }
49+
func (h *fakeVMHost) SystemID() string { return h.systemID }
50+
func (h *fakeVMHost) Name() string { return h.name }
51+
func (h *fakeVMHost) Type() string { return "lxd" }
52+
func (h *fakeVMHost) PowerAddress() string { return "" }
53+
func (h *fakeVMHost) HostSystemID() string { return h.hostSystemID }
54+
func (h *fakeVMHost) Zone() maasclient.Zone {
55+
if h.zone == nil {
56+
return nil
57+
}
58+
return h.zone
59+
}
60+
func (h *fakeVMHost) ResourcePool() maasclient.ResourcePool {
61+
if h.pool == nil {
62+
return nil
63+
}
64+
return h.pool
65+
}
66+
func (h *fakeVMHost) TotalCores() int { return 0 }
67+
func (h *fakeVMHost) TotalMemory() int { return 0 }
68+
func (h *fakeVMHost) UsedCores() int { return 0 }
69+
func (h *fakeVMHost) UsedMemory() int { return 0 }
70+
func (h *fakeVMHost) AvailableCores() int { return 0 }
71+
func (h *fakeVMHost) AvailableMemory() int { return 0 }
72+
func (h *fakeVMHost) Capabilities() []string { return nil }
73+
func (h *fakeVMHost) Projects() []string { return nil }
74+
func (h *fakeVMHost) StoragePools() []maasclient.StoragePool { return nil }
75+
76+
type fakeMachine struct {
77+
systemID string
78+
powerState string
79+
state string
80+
tags []string
81+
}
82+
83+
func (m *fakeMachine) Get(ctx context.Context) (maasclient.Machine, error) { return m, nil }
84+
func (m *fakeMachine) Delete(context.Context) error { return nil }
85+
func (m *fakeMachine) Releaser() maasclient.MachineReleaser { return nil }
86+
func (m *fakeMachine) Modifier() maasclient.MachineModifier { return nil }
87+
func (m *fakeMachine) Deployer() maasclient.MachineDeployer { return nil }
88+
func (m *fakeMachine) SystemID() string { return m.systemID }
89+
func (m *fakeMachine) FQDN() string { return "" }
90+
func (m *fakeMachine) Zone() maasclient.Zone { return nil }
91+
func (m *fakeMachine) PowerState() string { return m.powerState }
92+
func (m *fakeMachine) PowerType() string { return "" }
93+
func (m *fakeMachine) Hostname() string { return "" }
94+
func (m *fakeMachine) IPAddresses() []net.IP { return nil }
95+
func (m *fakeMachine) State() string { return m.state }
96+
func (m *fakeMachine) OSSystem() string { return "" }
97+
func (m *fakeMachine) DistroSeries() string { return "" }
98+
func (m *fakeMachine) SwapSize() int { return 0 }
99+
func (m *fakeMachine) PowerManagerOn() maasclient.PowerManagerOn { return nil }
100+
func (m *fakeMachine) BootInterfaceID() string { return "" }
101+
func (m *fakeMachine) TotalStorageGB() float64 { return 0 }
102+
func (m *fakeMachine) GetBootInterfaceType() string { return "" }
103+
func (m *fakeMachine) ResourcePoolName() string { return "" }
104+
func (m *fakeMachine) ZoneName() string { return "" }
105+
func (m *fakeMachine) BootInterfaceName() string { return "" }
106+
func (m *fakeMachine) Tags() []string { return m.tags }
107+
func (m *fakeMachine) Parent() string { return "" }
108+
109+
// ---- Tests ----
110+
111+
type fakeClient struct{ machines maasclient.Machines }
112+
113+
func (f fakeClient) Machines() maasclient.Machines { return f.machines }
114+
115+
func TestSelectLXDHost_StrictPrefersManaged(t *testing.T) {
116+
ctrl := gomock.NewController(t)
117+
defer ctrl.Finish()
118+
119+
machines := mock_clientset.NewMockMachines(ctrl)
120+
client := fakeClient{machines: machines}
121+
122+
// Healthy, non-maintenance backing machines
123+
m1 := &fakeMachine{systemID: "H1", powerState: "on", state: "Deployed", tags: nil}
124+
m2 := &fakeMachine{systemID: "H2", powerState: "on", state: "Deployed", tags: nil}
125+
machines.EXPECT().Machine("H1").Return(m1).AnyTimes()
126+
machines.EXPECT().Machine("H2").Return(m2).AnyTimes()
127+
128+
hosts := []maasclient.VMHost{
129+
&fakeVMHost{name: "lxd-host-a", systemID: "1", hostSystemID: "H1", zone: &fakeZone{name: "z1", id: 1}, pool: &fakeResourcePool{name: "p1", id: 1}},
130+
&fakeVMHost{name: "oob-host-b", systemID: "2", hostSystemID: "H2", zone: &fakeZone{name: "z1", id: 1}, pool: &fakeResourcePool{name: "p1", id: 1}},
131+
}
132+
133+
selected, err := SelectLXDHostWithMaasClient(client, hosts, "z1", "p1")
134+
if err != nil {
135+
t.Fatalf("unexpected error: %v", err)
136+
}
137+
if selected.Name() != "lxd-host-a" {
138+
t.Fatalf("expected managed host to be selected, got %s", selected.Name())
139+
}
140+
}
141+
142+
func TestSelectLXDHost_StrictAllowsOOB(t *testing.T) {
143+
ctrl := gomock.NewController(t)
144+
defer ctrl.Finish()
145+
146+
machines := mock_clientset.NewMockMachines(ctrl)
147+
client := fakeClient{machines: machines}
148+
149+
m := &fakeMachine{systemID: "H3", powerState: "on", state: "Deployed", tags: nil}
150+
machines.EXPECT().Machine("H3").Return(m).AnyTimes()
151+
152+
hosts := []maasclient.VMHost{
153+
&fakeVMHost{name: "external-host", systemID: "3", hostSystemID: "H3", zone: &fakeZone{name: "z1", id: 1}, pool: &fakeResourcePool{name: "p1", id: 1}},
154+
}
155+
156+
selected, err := SelectLXDHostWithMaasClient(client, hosts, "z1", "p1")
157+
if err != nil {
158+
t.Fatalf("unexpected error: %v", err)
159+
}
160+
if selected.Name() != "external-host" {
161+
t.Fatalf("expected OOB host to be selected, got %s", selected.Name())
162+
}
163+
}
164+
165+
func TestSelectLXDHost_StrictNoMatchError(t *testing.T) {
166+
ctrl := gomock.NewController(t)
167+
defer ctrl.Finish()
168+
169+
machines := mock_clientset.NewMockMachines(ctrl)
170+
client := fakeClient{machines: machines}
171+
172+
m := &fakeMachine{systemID: "H4", powerState: "on", state: "Deployed", tags: nil}
173+
machines.EXPECT().Machine("H4").Return(m).AnyTimes()
174+
175+
hosts := []maasclient.VMHost{
176+
&fakeVMHost{name: "lxd-host-x", systemID: "4", hostSystemID: "H4", zone: &fakeZone{name: "z1", id: 1}, pool: &fakeResourcePool{name: "p1", id: 1}},
177+
}
178+
179+
if _, err := SelectLXDHostWithMaasClient(client, hosts, "z2", "p1"); err == nil {
180+
t.Fatalf("expected error when no hosts match strict filters")
181+
}
182+
}
183+
184+
func TestSelectLXDHost_SkipMaintenance(t *testing.T) {
185+
ctrl := gomock.NewController(t)
186+
defer ctrl.Finish()
187+
188+
machines := mock_clientset.NewMockMachines(ctrl)
189+
client := fakeClient{machines: machines}
190+
191+
// First host is managed but under maintenance; second is healthy OOB
192+
m1 := &fakeMachine{systemID: "H5", powerState: "on", state: "Deployed", tags: []string{maintenance.TagHostMaintenance}}
193+
m2 := &fakeMachine{systemID: "H6", powerState: "on", state: "Deployed", tags: nil}
194+
machines.EXPECT().Machine("H5").Return(m1).AnyTimes()
195+
machines.EXPECT().Machine("H6").Return(m2).AnyTimes()
196+
197+
hosts := []maasclient.VMHost{
198+
&fakeVMHost{name: "lxd-host-maint", systemID: "5", hostSystemID: "H5", zone: &fakeZone{name: "z1", id: 1}, pool: &fakeResourcePool{name: "p1", id: 1}},
199+
&fakeVMHost{name: "oob-healthy", systemID: "6", hostSystemID: "H6", zone: &fakeZone{name: "z1", id: 1}, pool: &fakeResourcePool{name: "p1", id: 1}},
200+
}
201+
202+
selected, err := SelectLXDHostWithMaasClient(client, hosts, "z1", "p1")
203+
if err != nil {
204+
t.Fatalf("unexpected error: %v", err)
205+
}
206+
if selected.Name() != "oob-healthy" {
207+
t.Fatalf("expected maintenance host to be skipped, got %s", selected.Name())
208+
}
209+
}

0 commit comments

Comments
 (0)