Skip to content

Commit 9473f87

Browse files
committed
PCP-5152: Multiple node HCP cluster losing interface reference
1 parent 9a599b6 commit 9473f87

File tree

9 files changed

+179
-82
lines changed

9 files changed

+179
-82
lines changed

controllers/lxd_initializer_ds.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"sigs.k8s.io/yaml"
1313

1414
"github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/scope"
15+
"github.com/spectrocloud/cluster-api-provider-maas/pkg/util/trust"
1516

1617
// embed template
1718
_ "embed"
@@ -85,8 +86,12 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus
8586
}
8687

8788
nt := cfg.NICType
89+
if nt == "" {
90+
nt = "bridged"
91+
}
8892
np := cfg.NICParent
89-
tp := "capmaas"
93+
// Deterministic per-cluster trust password derived from cluster UID
94+
tp := trust.DeriveTrustPassword(string(cluster.UID))
9095

9196
rendered := render(map[string]string{
9297
"${STORAGE_BACKEND}": sb,

controllers/maasmachine_controller.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ import (
5050
lxd "github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/lxd"
5151
maasmachine "github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/machine"
5252
"github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/scope"
53+
"github.com/spectrocloud/maas-client-go/maasclient"
5354
)
5455

5556
var ErrRequeueDNS = errors.New("need to requeue DNS")
@@ -227,6 +228,30 @@ func (r *MaasMachineReconciler) reconcileDelete(_ context.Context, machineScope
227228
api := clusterScope.GetMaasClientIdentity()
228229
// choose ExternalIP first, then InternalIP
229230
nodeIP := getNodeIP(m.Addresses)
231+
// For control-plane BM that backs an LXD VM host, force-delete guest VMs to unblock release
232+
if clusterScope.IsLXDHostEnabled() && machineScope.IsControlPlane() {
233+
ctx := context.Background()
234+
client := maasclient.NewAuthenticatedClientSet(api.URL, api.Token)
235+
if hosts, herr := client.VMHosts().List(ctx, nil); herr == nil {
236+
for _, h := range hosts {
237+
if h.HostSystemID() == m.ID {
238+
if guests, gerr := h.Machines().List(ctx); gerr == nil {
239+
for _, g := range guests {
240+
gid := g.SystemID()
241+
if gid == "" {
242+
continue
243+
}
244+
// Fetch details to confirm and delete
245+
if gm, ge := client.Machines().Machine(gid).Get(ctx); ge == nil {
246+
_ = client.Machines().Machine(gm.SystemID()).Delete(ctx)
247+
}
248+
}
249+
}
250+
break
251+
}
252+
}
253+
}
254+
}
230255
if nodeIP != "" {
231256
if uerr := lxd.UnregisterLXDHostWithMaasClient(api.Token, api.URL, nodeIP); uerr != nil {
232257
machineScope.Error(uerr, "failed to unregister LXD VM host prior to release")

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ require (
1111
github.com/onsi/ginkgo v1.16.5
1212
github.com/onsi/gomega v1.36.1
1313
github.com/pkg/errors v0.9.1
14-
github.com/spectrocloud/maas-client-go v0.0.7-beta1
14+
github.com/spectrocloud/maas-client-go v0.0.8-beta1
1515
github.com/spf13/pflag v1.0.5
1616
k8s.io/api v0.31.3
1717
k8s.io/apiextensions-apiserver v0.31.3

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@ github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU
166166
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
167167
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
168168
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
169-
github.com/spectrocloud/maas-client-go v0.0.7-beta1 h1:2GryA5JSrjlsvzLaCIGyPfxcaSCPrw7fm8ixMf7aRbY=
170-
github.com/spectrocloud/maas-client-go v0.0.7-beta1/go.mod h1:CaqAAlh6/xfzc/cDpU8eMG0wqnwx1ODSyXcH86uV7Ww=
169+
github.com/spectrocloud/maas-client-go v0.0.8-beta1 h1:PCY6M3M9uXZG8dzoe0jNcMnh4nOhJuZBF2C3vsUXp9A=
170+
github.com/spectrocloud/maas-client-go v0.0.8-beta1/go.mod h1:CaqAAlh6/xfzc/cDpU8eMG0wqnwx1ODSyXcH86uV7Ww=
171171
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
172172
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
173173
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=

lxd-initializer/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ go 1.24.5
44

55
require (
66
github.com/canonical/lxd v0.0.0-20250730070707-c4a122e242bb
7-
github.com/spectrocloud/maas-client-go v0.0.6-beta1
7+
github.com/spectrocloud/maas-client-go v0.0.8-beta1
88
k8s.io/apimachinery v0.31.3
99
k8s.io/client-go v0.31.3
1010
)

lxd-initializer/go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA=
103103
github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU=
104104
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
105105
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
106-
github.com/spectrocloud/maas-client-go v0.0.6-beta1 h1:sajM2xeYEQNe/3ObyIkTxJJEsy2OM9w4loYsmDCzqio=
107-
github.com/spectrocloud/maas-client-go v0.0.6-beta1/go.mod h1:CaqAAlh6/xfzc/cDpU8eMG0wqnwx1ODSyXcH86uV7Ww=
106+
github.com/spectrocloud/maas-client-go v0.0.8-beta1 h1:PCY6M3M9uXZG8dzoe0jNcMnh4nOhJuZBF2C3vsUXp9A=
107+
github.com/spectrocloud/maas-client-go v0.0.8-beta1/go.mod h1:CaqAAlh6/xfzc/cDpU8eMG0wqnwx1ODSyXcH86uV7Ww=
108108
github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M=
109109
github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
110110
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

lxd-initializer/lxd-initializer.go

Lines changed: 131 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,89 @@ func getMachineInfoFromMaas(nodeName, maasAPIKey, maasEndpoint string) (zone, re
175175
return zone, resourcePool, bootInterface, nil
176176
}
177177

178+
// registerWithMAAS registers the node as an LXD VM host in MAAS (idempotent)
179+
func registerWithMAAS(maasEndpoint, maasAPIKey, systemID, nodeIP, trustPassword, zone, resourcePool, hostName string) error {
180+
if maasEndpoint == "" || maasAPIKey == "" {
181+
return fmt.Errorf("MAAS credentials unavailable")
182+
}
183+
ctx := context.Background()
184+
client := maasclient.NewAuthenticatedClientSet(maasEndpoint, maasAPIKey)
185+
186+
// Guard 1: Verify the MAAS machine identified by systemID owns nodeIP (or derive power IP)
187+
m, err := client.Machines().Machine(systemID).Get(ctx)
188+
if err != nil {
189+
return fmt.Errorf("get machine %s: %w", systemID, err)
190+
}
191+
hostIP := nodeIP
192+
owns := false
193+
for _, ip := range m.IPAddresses() {
194+
if ip.String() == nodeIP {
195+
owns = true
196+
break
197+
}
198+
}
199+
if !owns {
200+
ips := m.IPAddresses()
201+
if len(ips) == 0 {
202+
return fmt.Errorf("ownership check failed: system-id %s has no IPs; cannot register", systemID)
203+
}
204+
hostIP = ips[0].String()
205+
}
206+
207+
// Idempotency/conflict checks via API for speed
208+
hosts, err := client.VMHosts().List(ctx, nil)
209+
if err != nil {
210+
return fmt.Errorf("list vm hosts: %w", err)
211+
}
212+
wantHost := fmt.Sprintf("https://%s:8443", hostIP)
213+
for _, h := range hosts {
214+
if h.PowerAddress() == wantHost && h.HostSystemID() != "" && h.HostSystemID() != systemID {
215+
return fmt.Errorf("conflict: existing VM host %s uses %s mapped to %s", h.Name(), wantHost, h.HostSystemID())
216+
}
217+
if h.Name() == hostName || h.PowerAddress() == wantHost {
218+
log.Printf("MAAS VM host already present: name=%s power_address=%s", h.Name(), h.PowerAddress())
219+
return nil
220+
}
221+
}
222+
223+
// Prefer MAAS CLI for creation to match manual success path
224+
if _, err := exec.LookPath("maas"); err == nil {
225+
profile := "ds"
226+
// Non-interactive login (idempotent)
227+
_ = runCmd("maas", []string{"login", profile, maasEndpoint, maasAPIKey})
228+
args := []string{profile, "vm-hosts", "create", "type=lxd", fmt.Sprintf("power_address=%s", wantHost), fmt.Sprintf("password=%s", trustPassword), fmt.Sprintf("name=%s", hostName)}
229+
// Do not pass zone/pool on create
230+
if err := runCmd("maas", args); err != nil {
231+
return fmt.Errorf("maas cli create failed: %w", err)
232+
}
233+
log.Printf("MAAS VM host registered via CLI: %s (%s)", hostName, wantHost)
234+
return nil
235+
}
236+
237+
// Fallback: minimal API create
238+
params := maasclient.ParamsBuilder().
239+
Set("type", "lxd").
240+
Set("power_address", wantHost).
241+
Set("name", hostName)
242+
if trustPassword != "" {
243+
params.Set("password", trustPassword)
244+
}
245+
if _, err := client.VMHosts().Create(ctx, params); err != nil {
246+
return fmt.Errorf("create vm host: %w", err)
247+
}
248+
log.Printf("MAAS VM host registered via API: %s (%s)", hostName, wantHost)
249+
return nil
250+
}
251+
252+
func runCmd(bin string, args []string) error {
253+
cmd := exec.Command(bin, args...)
254+
out, err := cmd.CombinedOutput()
255+
if err != nil {
256+
return fmt.Errorf("%s %v: %s", bin, args, string(out))
257+
}
258+
return nil
259+
}
260+
178261
func main() {
179262
log.Println("Starting LXD initializer")
180263

@@ -218,7 +301,13 @@ func main() {
218301
maasAPIKey := *maasAPIKeyFlag
219302
maasEndpoint := *maasEndpointFlag
220303

221-
// If flags are not provided, try to read from the Kubernetes secret
304+
// If flags are not provided, use env (set by DS rendering) or try to read from the Kubernetes secret
305+
if maasEndpoint == "" {
306+
maasEndpoint = os.Getenv("MAAS_ENDPOINT")
307+
}
308+
if maasAPIKey == "" {
309+
maasAPIKey = os.Getenv("MAAS_API_KEY")
310+
}
222311
if maasAPIKey == "" || maasEndpoint == "" {
223312
if secretEndpoint, secretAPIKey, err := getMaasCredentialsFromSecret(); err == nil {
224313
if maasEndpoint == "" {
@@ -245,7 +334,16 @@ func main() {
245334
storageSize = "50"
246335
}
247336

248-
nicType := "macvlan"
337+
// Determine NIC type and parent
338+
// NIC_TYPE env supports values like "bridge"/"bridged" or "macvlan". Default to bridge.
339+
nicTypeEnv := os.Getenv("NIC_TYPE")
340+
if nicTypeEnv == "" {
341+
nicTypeEnv = "bridge"
342+
}
343+
nicMode := strings.ToLower(nicTypeEnv)
344+
if nicMode == "bridged" {
345+
nicMode = "bridge"
346+
}
249347

250348
networkBridge := *networkBridgeFlag
251349
if networkBridge == "" {
@@ -264,9 +362,19 @@ func main() {
264362
log.Printf("Resource pool retrieved from MAAS: %s", resourcePool)
265363
log.Printf("Boot interface retrieved from MAAS: %s", bootInterfaceName)
266364

267-
nicParent := bootInterfaceName
365+
nicParent := os.Getenv("NIC_PARENT")
366+
if nicParent == "" {
367+
nicParent = bootInterfaceName
368+
}
268369

269-
log.Printf("Using NIC type=%s parent=%s", nicType, nicParent)
370+
// Log final NIC config
371+
log.Printf("Using NIC mode=%s (device nictype=%s) parent=%s", nicMode, func() string {
372+
if nicMode == "bridge" {
373+
return "bridged"
374+
} else {
375+
return nicMode
376+
}
377+
}(), nicParent)
270378

271379
skipNetworkUpdate := *skipNetworkUpdateFlag
272380
if !skipNetworkUpdate {
@@ -290,7 +398,7 @@ func main() {
290398
// Perform actions based on the specified action
291399
if actionStr == "init" || actionStr == "both" {
292400
// Initialize LXD
293-
if err := initializeLXD(storageBackend, storageSize, networkBridge, skipNetworkUpdate, trustPassword, nicType, nicParent); err != nil {
401+
if err := initializeLXD(storageBackend, storageSize, networkBridge, skipNetworkUpdate, trustPassword, nicMode, nicParent); err != nil {
294402
log.Fatalf("Failed to initialize LXD: %v", err)
295403
}
296404

@@ -305,6 +413,18 @@ func main() {
305413
}
306414
}
307415

416+
if actionStr == "register" || actionStr == "both" {
417+
// Build a stable host name using MAAS system-id
418+
systemID, sErr := extractSystemIDFromNodeName(nodeName)
419+
if sErr != nil {
420+
log.Fatalf("Failed to extract system ID from node name: %v", sErr)
421+
}
422+
hostName := fmt.Sprintf("lxd-host-%s", systemID)
423+
if err := registerWithMAAS(maasEndpoint, maasAPIKey, systemID, nodeIP, trustPassword, zone, resourcePool, hostName); err != nil {
424+
log.Fatalf("Failed to register LXD host in MAAS: %v", err)
425+
}
426+
}
427+
308428
// If running as a standalone binary, exit after completing the actions
309429
if actionStr == "once" {
310430
log.Println("Actions completed successfully")
@@ -602,6 +722,11 @@ func ensureMAASProfile(c lxdclient.InstanceServer, nicType, nicParent, pool stri
602722
return nil // already present
603723
}
604724
}
725+
// Map network mode to device nictype expected by LXD
726+
deviceNictype := nicType
727+
if deviceNictype == "bridge" {
728+
deviceNictype = "bridged"
729+
}
605730
profile := api.ProfilesPost{
606731
Name: profileName,
607732
ProfilePut: api.ProfilePut{
@@ -614,7 +739,7 @@ func ensureMAASProfile(c lxdclient.InstanceServer, nicType, nicParent, pool stri
614739
},
615740
"eth0": {
616741
"type": "nic",
617-
"nictype": nicType,
742+
"nictype": deviceNictype,
618743
"parent": nicParent,
619744
"name": "eth0",
620745
},

pkg/maas/lxd/host_maas_client.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
// HostConfig contains the configuration for setting up an LXD host
2929
type HostConfig struct {
3030
NodeIP string
31+
HostName string
3132
MaasAPIKey string
3233
MaasAPIEndpoint string
3334
StorageBackend string
@@ -142,10 +143,14 @@ func registerWithMaasClient(client maasclient.ClientSetInterface, config HostCon
142143
ctx := context.Background()
143144

144145
// Create registration parameters
146+
name := config.HostName
147+
if name == "" {
148+
name = fmt.Sprintf("lxd-host-%s", config.NodeIP)
149+
}
145150
params := maasclient.ParamsBuilder().
146151
Set("type", "lxd").
147152
Set("power_address", fmt.Sprintf("https://%s:8443", config.NodeIP)).
148-
Set("name", fmt.Sprintf("lxd-host-%s", config.NodeIP))
153+
Set("name", name)
149154

150155
if config.Zone != "" {
151156
// Pass the zone name directly. MAAS API expects the zone name, not ID.
@@ -162,7 +167,7 @@ func registerWithMaasClient(client maasclient.ClientSetInterface, config HostCon
162167
}
163168

164169
log := textlogger.NewLogger(textlogger.NewConfig())
165-
log.Info("register params", "zone", params.Values().Get("zone"), "pool", params.Values().Get("pool"))
170+
log.Info("register params", "zone", params.Values().Get("zone"), "pool", params.Values().Get("pool"), "name", name)
166171

167172
// Register the host with MAAS
168173
_, err := client.VMHosts().Create(ctx, params)

0 commit comments

Comments
 (0)