diff --git a/controllers/lxd_initializer_ds.go b/controllers/lxd_initializer_ds.go index ede37f8..ea0c20a 100644 --- a/controllers/lxd_initializer_ds.go +++ b/controllers/lxd_initializer_ds.go @@ -15,8 +15,8 @@ import ( "sigs.k8s.io/yaml" "github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/scope" - "github.com/spectrocloud/cluster-api-provider-maas/pkg/util/trust" "github.com/spectrocloud/cluster-api-provider-maas/pkg/util" + "github.com/spectrocloud/cluster-api-provider-maas/pkg/util/trust" // embed template _ "embed" @@ -43,9 +43,9 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus dsNamespace := cluster.Namespace // Always operate on the TARGET cluster client - remoteClient, err := clusterScope.GetWorkloadClusterClient(ctx) + remoteClient, err := r.getTargetClient(ctx, clusterScope) if err != nil { - return fmt.Errorf("failed to get target cluster client: %v", err) + return err } // If feature is off or cluster is being deleted, we're done @@ -54,65 +54,150 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus } // Gate: ensure pivot completed. Require mgmt namespace to have clusterEnv=target - mgmtNS := &corev1.Namespace{} - if err := r.Client.Get(ctx, client.ObjectKey{Name: dsNamespace}, mgmtNS); err != nil { - // Namespace not readable yet on management cluster; skip for now + isTarget, clusterEnv := r.namespaceIsTarget(ctx, dsNamespace) + if !isTarget { + r.Log.Info("Namespace not marked as target; deferring LXD initializer", "namespace", dsNamespace, "clusterEnv", clusterEnv) return nil } - if v := strings.TrimSpace(mgmtNS.Annotations["clusterEnv"]); v != "target" { - r.Log.Info("Namespace not marked as target; deferring LXD initializer", "namespace", dsNamespace, "clusterEnv", v) + + // Gate: derive desired CP count from MaasCloudConfig; fallback to KCP + desiredCP, readyByKCP := r.computeDesiredControlPlane(ctx, dsNamespace, cluster.Name) + + if ok := r.enoughCPNodesReady(ctx, remoteClient, desiredCP, readyByKCP); !ok { return nil } - // Gate: derive desired CP count from MaasCloudConfig; fallback to KCP + if err := r.deleteExistingInitializerDS(ctx, remoteClient, dsNamespace); err != nil { + return err + } + + // Ensure RBAC resources are created on target cluster + if err := r.ensureLXDInitializerRBACOnTarget(ctx, remoteClient, dsNamespace); err != nil { + return fmt.Errorf("failed to ensure LXD initializer RBAC: %v", err) + } + + if done, err := r.maybeShortCircuitDelete(ctx, remoteClient, dsNamespace, desiredCP, dsName); err != nil { + return err + } else if done { + return nil + } + + ds, err := r.renderDaemonSetForCluster(clusterScope, dsName, dsNamespace) + if err != nil { + return err + } + + // Do not set owner refs across clusters; just create/patch on target cluster + _, err = controllerutil.CreateOrPatch(ctx, remoteClient, ds, func() error { return nil }) + return err +} + +// ensureLXDInitializerRBACOnTarget creates the RBAC resources for lxd-initializer on the target cluster +func (r *MaasClusterReconciler) ensureLXDInitializerRBACOnTarget(ctx context.Context, remoteClient client.Client, namespace string) error { + // Parse RBAC template into separate resources + rbacYaml := strings.ReplaceAll(lxdInitRBACTemplate, "namespace: default", fmt.Sprintf("namespace: %s", namespace)) + + // Split the YAML into separate documents + docs := strings.Split(rbacYaml, "---") + + for _, doc := range docs { + doc = strings.TrimSpace(doc) + if doc == "" { + continue + } + + // Parse as unstructured object to handle different resource types + obj := &unstructured.Unstructured{} + if err := yaml.Unmarshal([]byte(doc), obj); err != nil { + return fmt.Errorf("failed to unmarshal RBAC resource: %v", err) + } + + // Set namespace for namespaced resources + if obj.GetKind() == "ServiceAccount" { + obj.SetNamespace(namespace) + } + + // Create or update the resource on target cluster + _, err := controllerutil.CreateOrPatch(ctx, remoteClient, obj, func() error { return nil }) + if err != nil { + return fmt.Errorf("failed to create/patch %s %s: %v", obj.GetKind(), obj.GetName(), err) + } + } + + return nil +} + +// getTargetClient returns the workload cluster client or a wrapped error +func (r *MaasClusterReconciler) getTargetClient(ctx context.Context, clusterScope *scope.ClusterScope) (client.Client, error) { + remoteClient, err := clusterScope.GetWorkloadClusterClient(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get target cluster client: %v", err) + } + return remoteClient, nil +} + +// namespaceIsTarget checks if the management namespace is annotated as clusterEnv=target +func (r *MaasClusterReconciler) namespaceIsTarget(ctx context.Context, namespace string) (bool, string) { + mgmtNS := &corev1.Namespace{} + if err := r.Client.Get(ctx, client.ObjectKey{Name: namespace}, mgmtNS); err != nil { + return false, "" + } + if mgmtNS.Annotations == nil { + return false, "" + } + v := strings.TrimSpace(mgmtNS.Annotations["clusterEnv"]) + return v == "target", v +} + +// computeDesiredControlPlane determines desired control-plane replicas and ready count +func (r *MaasClusterReconciler) computeDesiredControlPlane(ctx context.Context, namespace, clusterName string) (int32, int32) { desiredCP := int32(1) readyByKCP := int32(0) // Prefer MaasCloudConfig.spec.machinePoolConfig[].size where isControlPlane=true (sum) - { - mccList := &unstructured.UnstructuredList{} - mccList.SetGroupVersionKind(schema.GroupVersionKind{Group: "cluster.spectrocloud.com", Version: "v1alpha1", Kind: "MaasCloudConfigList"}) - if err := r.Client.List(ctx, mccList, client.InNamespace(dsNamespace)); err == nil { - var sum int64 - for _, it := range mccList.Items { - owned := false - for _, or := range it.GetOwnerReferences() { - if or.Name == cluster.Name { - owned = true - break - } - } - if !owned && !strings.HasSuffix(it.GetName(), "-maas-config") { - continue - } - pools, found, _ := unstructured.NestedSlice(it.Object, "spec", "machinePoolConfig") - if !found { - continue + mccList := &unstructured.UnstructuredList{} + mccList.SetGroupVersionKind(schema.GroupVersionKind{Group: "cluster.spectrocloud.com", Version: "v1alpha1", Kind: "MaasCloudConfigList"}) + if err := r.Client.List(ctx, mccList, client.InNamespace(namespace)); err == nil { + var sum int64 + for _, it := range mccList.Items { + owned := false + for _, or := range it.GetOwnerReferences() { + if or.Name == clusterName { + owned = true + break } - for _, p := range pools { - if mp, ok := p.(map[string]interface{}); ok { - isCP, _, _ := unstructured.NestedBool(mp, "isControlPlane") - if !isCP { - continue - } - if v, foundSz, _ := unstructured.NestedInt64(mp, "size"); foundSz && v > 0 { - sum += v - } + } + if !owned && !strings.HasSuffix(it.GetName(), "-maas-config") { + continue + } + pools, found, _ := unstructured.NestedSlice(it.Object, "spec", "machinePoolConfig") + if !found { + continue + } + for _, p := range pools { + if mp, ok := p.(map[string]interface{}); ok { + isCP, _, _ := unstructured.NestedBool(mp, "isControlPlane") + if !isCP { + continue + } + if v, foundSz, _ := unstructured.NestedInt64(mp, "size"); foundSz && v > 0 { + sum += v } } - if sum > 0 { - desiredCP = util.SafeInt64ToInt32(sum) - break - } + } + if sum > 0 { + desiredCP = util.SafeInt64ToInt32(sum) + break } } } + // Fallback: use KCP if MCC not found if desiredCP == 1 { kcpList := &unstructured.UnstructuredList{} kcpList.SetGroupVersionKind(schema.GroupVersionKind{Group: "controlplane.cluster.x-k8s.io", Version: "v1beta1", Kind: "KubeadmControlPlaneList"}) - if err := r.Client.List(ctx, kcpList, client.InNamespace(dsNamespace), client.MatchingLabels{ - "cluster.x-k8s.io/cluster-name": cluster.Name, + if err := r.Client.List(ctx, kcpList, client.InNamespace(namespace), client.MatchingLabels{ + "cluster.x-k8s.io/cluster-name": clusterName, }); err == nil { if len(kcpList.Items) > 0 { item := kcpList.Items[0] @@ -126,6 +211,11 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus } } + return desiredCP, readyByKCP +} + +// enoughCPNodesReady checks the target cluster for Ready control-plane nodes +func (r *MaasClusterReconciler) enoughCPNodesReady(ctx context.Context, remoteClient client.Client, desiredCP, readyByKCP int32) bool { nodeList := &corev1.NodeList{} cpSelector := labels.SelectorFromSet(labels.Set{ "node-role.kubernetes.io/control-plane": "", @@ -140,63 +230,64 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus } } } - // Proceed when CP nodes are present and Ready, regardless of KCP readyReplicas if int64(len(nodeList.Items)) < int64(desiredCP) || int64(ready) < int64(desiredCP) { r.Log.Info("Not enough control-plane nodes present/ready yet; skipping DS for now", "desiredCP", desiredCP, "readyByKCP", readyByKCP, "nodeList", len(nodeList.Items), "ready", ready) - // Not enough control-plane nodes present/ready yet; skip DS for now - return nil + return false } } + return true +} - // Clean up any existing DaemonSets in this namespace (old naming/labels) +// deleteExistingInitializerDS removes any DaemonSets with old labeling in the namespace +func (r *MaasClusterReconciler) deleteExistingInitializerDS(ctx context.Context, remoteClient client.Client, namespace string) error { dsList := &appsv1.DaemonSetList{} - if err := remoteClient.List(ctx, dsList, client.InNamespace(dsNamespace), client.MatchingLabels{ + if err := remoteClient.List(ctx, dsList, client.InNamespace(namespace), client.MatchingLabels{ "app": "lxd-initializer", }); err != nil { return fmt.Errorf("failed to list DaemonSets: %v", err) } - // Delete all existing LXD initializer DaemonSets for _, ds := range dsList.Items { if err := remoteClient.Delete(ctx, &ds); err != nil { return fmt.Errorf("failed to delete DaemonSet %s: %v", ds.Name, err) } } + return nil +} - // Ensure RBAC resources are created on target cluster - if err := r.ensureLXDInitializerRBACOnTarget(ctx, remoteClient, dsNamespace); err != nil { - return fmt.Errorf("failed to ensure LXD initializer RBAC: %v", err) - } - - // Short-circuit deletion: if all control-plane nodes are labeled initialized, delete DS +// maybeShortCircuitDelete deletes the DS if all CP nodes are already initialized +func (r *MaasClusterReconciler) maybeShortCircuitDelete(ctx context.Context, remoteClient client.Client, namespace string, desiredCP int32, dsName string) (bool, error) { shortCircuitNodes := &corev1.NodeList{} shortCircuitSelector := labels.SelectorFromSet(labels.Set{ "node-role.kubernetes.io/control-plane": "", }) - if err := remoteClient.List(ctx, shortCircuitNodes, &client.ListOptions{LabelSelector: shortCircuitSelector}); err == nil && len(shortCircuitNodes.Items) > 0 { - // Count how many CP nodes are initialized - initCount := 0 - for _, n := range shortCircuitNodes.Items { - if n.Labels != nil && n.Labels["lxdhost.cluster.com/initialized"] == "true" { - initCount++ - } + if err := remoteClient.List(ctx, shortCircuitNodes, &client.ListOptions{LabelSelector: shortCircuitSelector}); err != nil || len(shortCircuitNodes.Items) == 0 { + return false, nil + } + + initCount := 0 + for _, n := range shortCircuitNodes.Items { + if n.Labels != nil && n.Labels["lxdhost.cluster.com/initialized"] == "true" { + initCount++ } - // Delete DS only when we see at least desiredCP control-plane nodes - // AND desiredCP of them are initialized - if int64(len(shortCircuitNodes.Items)) >= int64(desiredCP) && int64(initCount) >= int64(desiredCP) { - // Delete existing DSs and return - shortCircuitDSList := &appsv1.DaemonSetList{} - if err := remoteClient.List(ctx, shortCircuitDSList, client.InNamespace(dsNamespace), client.MatchingLabels{"app": dsName}); err == nil { - for _, ds := range shortCircuitDSList.Items { - _ = remoteClient.Delete(ctx, &ds) - } + } + if int64(len(shortCircuitNodes.Items)) >= int64(desiredCP) && int64(initCount) >= int64(desiredCP) { + shortCircuitDSList := &appsv1.DaemonSetList{} + if err := remoteClient.List(ctx, shortCircuitDSList, client.InNamespace(namespace), client.MatchingLabels{"app": dsName}); err == nil { + for _, ds := range shortCircuitDSList.Items { + _ = remoteClient.Delete(ctx, &ds) } - return nil } + return true, nil } + return false, nil +} - // pull LXD config +// renderDaemonSetForCluster renders the DS YAML from template using cluster config and returns a DaemonSet object +func (r *MaasClusterReconciler) renderDaemonSetForCluster(clusterScope *scope.ClusterScope, dsName, namespace string) (*appsv1.DaemonSet, error) { + cluster := clusterScope.MaasCluster cfg := clusterScope.GetLXDConfig() + sb := cfg.StorageBackend if sb == "" { sb = "zfs" @@ -233,7 +324,7 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus ds := &appsv1.DaemonSet{} if err := yaml.Unmarshal([]byte(dsYaml), ds); err != nil { - return err + return nil, err } // ensure names/labels are cluster-specific without touching the image name @@ -244,44 +335,7 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus ds.Labels["app"] = dsName ds.Spec.Selector.MatchLabels["app"] = dsName ds.Spec.Template.Labels["app"] = dsName - ds.Namespace = dsNamespace - - // Do not set owner refs across clusters; just create/patch on target cluster - _, err = controllerutil.CreateOrPatch(ctx, remoteClient, ds, func() error { return nil }) - return err -} - -// ensureLXDInitializerRBACOnTarget creates the RBAC resources for lxd-initializer on the target cluster -func (r *MaasClusterReconciler) ensureLXDInitializerRBACOnTarget(ctx context.Context, remoteClient client.Client, namespace string) error { - // Parse RBAC template into separate resources - rbacYaml := strings.ReplaceAll(lxdInitRBACTemplate, "namespace: default", fmt.Sprintf("namespace: %s", namespace)) - - // Split the YAML into separate documents - docs := strings.Split(rbacYaml, "---") - - for _, doc := range docs { - doc = strings.TrimSpace(doc) - if doc == "" { - continue - } - - // Parse as unstructured object to handle different resource types - obj := &unstructured.Unstructured{} - if err := yaml.Unmarshal([]byte(doc), obj); err != nil { - return fmt.Errorf("failed to unmarshal RBAC resource: %v", err) - } - - // Set namespace for namespaced resources - if obj.GetKind() == "ServiceAccount" { - obj.SetNamespace(namespace) - } + ds.Namespace = namespace - // Create or update the resource on target cluster - _, err := controllerutil.CreateOrPatch(ctx, remoteClient, obj, func() error { return nil }) - if err != nil { - return fmt.Errorf("failed to create/patch %s %s: %v", obj.GetKind(), obj.GetName(), err) - } - } - - return nil + return ds, nil } diff --git a/controllers/maasmachine_controller.go b/controllers/maasmachine_controller.go index e4536ff..6cf020c 100644 --- a/controllers/maasmachine_controller.go +++ b/controllers/maasmachine_controller.go @@ -184,12 +184,10 @@ func (r *MaasMachineReconciler) reconcileDelete(_ context.Context, machineScope if m == nil { // Gate finalizer removal to avoid races during early delete phases. - if !maasMachine.DeletionTimestamp.IsZero() { + if gate, _ := r.shouldGateFinalizerRemoval(maasMachine); gate { deletionAge := time.Since(maasMachine.DeletionTimestamp.Time) - if deletionAge < 2*time.Minute { - machineScope.Info("Not removing finalizer yet; waiting for deletion age threshold", "age", deletionAge.String()) - return ctrl.Result{RequeueAfter: 30 * time.Second}, nil - } + machineScope.Info("Not removing finalizer yet; waiting for deletion age threshold", "age", deletionAge.String()) + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } machineScope.V(2).Info("Unable to locate MaaS instance by ID or tags", "system-id", machineScope.GetInstanceID()) r.Recorder.Eventf(maasMachine, corev1.EventTypeWarning, "NoMachineFound", "Unable to find matching MaaS machine") @@ -210,18 +208,62 @@ func (r *MaasMachineReconciler) reconcileDelete(_ context.Context, machineScope // If LXD host feature is enabled and this is a control-plane node, proactively // attempt to unregister the VM host before releasing the machine. This avoids // MAAS 400 errors requiring VM host removal. + // unregister the VM host before releasing the machine so upgrades/deletes pass cleanly. if clusterScope.IsLXDHostEnabled() && machineScope.IsControlPlane() { - api := clusterScope.GetMaasClientIdentity() - nodeIP := getNodeIP(m.Addresses) - if nodeIP != "" { - if uerr := lxd.UnregisterLXDHostWithMaasClient(api.Token, api.URL, nodeIP); uerr != nil { - machineScope.Error(uerr, "best-effort unregister of LXD VM host before release failed", "nodeIP", nodeIP) - } else { - machineScope.Info("Best-effort unregistered LXD VM host before release", "nodeIP", nodeIP) - } - } + r.bestEffortUnregisterLXDHost(clusterScope, machineScope, m) + } + + if err := r.tryReleaseWithVMHostHandling(context.Background(), machineScope, clusterScope, machineSvc, m); err != nil { + return ctrl.Result{}, err } + // If this is an LXD VM, delete it after successful release + r.maybeDeleteDynamicLXDVM(clusterScope, machineScope, m.ID) + + // Finalize deletion: mark conditions, event, and remove finalizer + r.finalizeMachineDeletion(machineScope, maasMachine, m.ID) + + //// v1alpah3 MAASMachine finalizer + //// Machine is deleted so remove the finalizer. + //controllerutil.RemoveFinalizer(maasMachine, infrav1alpha3.MachineFinalizer) + + return reconcile.Result{}, nil +} + +// shouldGateFinalizerRemoval returns true if finalizer removal should be delayed to avoid races during early delete +func (r *MaasMachineReconciler) shouldGateFinalizerRemoval(maasMachine *infrav1beta1.MaasMachine) (bool, time.Duration) { + if maasMachine == nil || maasMachine.DeletionTimestamp.IsZero() { + return false, 0 + } + deletionAge := time.Since(maasMachine.DeletionTimestamp.Time) + if deletionAge < 2*time.Minute { + return true, 30 * time.Second + } + return false, 0 +} + +// bestEffortUnregisterLXDHost unregisters the LXD VM host if possible, logging any errors +func (r *MaasMachineReconciler) bestEffortUnregisterLXDHost(clusterScope *scope.ClusterScope, machineScope *scope.MachineScope, m *infrav1beta1.Machine) { + api := clusterScope.GetMaasClientIdentity() + hostName := canonicalLXDHostName(m.Hostname) + if hostName == "lxd-host-" { + return + } + if uerr := lxd.UnregisterLXDHostByNameWithMaasClient(api.Token, api.URL, hostName); uerr != nil { + machineScope.Error(uerr, "best-effort unregister of LXD VM host before release failed", "hostName", hostName) + return + } + machineScope.Info("Best-effort unregistered LXD VM host before release", "hostName", hostName) +} + +// canonicalLXDHostName returns lxd-host- or an empty suffix if hostname is empty +func canonicalLXDHostName(hostname string) string { + name := strings.ToLower(strings.TrimSpace(hostname)) + return fmt.Sprintf("lxd-host-%s", name) +} + +// tryReleaseWithVMHostHandling releases the machine, handling VM host dependencies when required +func (r *MaasMachineReconciler) tryReleaseWithVMHostHandling(ctx context.Context, machineScope *scope.MachineScope, clusterScope *scope.ClusterScope, machineSvc *maasmachine.Service, m *infrav1beta1.Machine) error { if err := machineSvc.ReleaseMachine(m.ID); err != nil { // If MAAS requires VM host removal first, attempt best-effort unregister and retry once if isVMHostRemovalRequiredError(err) { @@ -229,77 +271,78 @@ func (r *MaasMachineReconciler) reconcileDelete(_ context.Context, machineScope // For control-plane BM that backs an LXD VM host, force-delete guest VMs to unblock release if clusterScope.IsLXDHostEnabled() && machineScope.IsControlPlane() { - ctx := context.Background() - client := maasclient.NewAuthenticatedClientSet(api.URL, api.Token) - if hosts, herr := client.VMHosts().List(ctx, nil); herr == nil { - for _, h := range hosts { - if h.HostSystemID() == m.ID { - if guests, gerr := h.Machines().List(ctx); gerr == nil { - for _, g := range guests { - gid := g.SystemID() - if gid == "" { - continue - } - // Fetch details to confirm and delete - if gm, ge := client.Machines().Machine(gid).Get(ctx); ge == nil { - if derr := client.Machines().Machine(gm.SystemID()).Delete(ctx); derr != nil { - machineScope.Error(derr, "failed to delete guest VM during host release cleanup", "guestSystemID", gm.SystemID()) - } - } - } + r.forceDeleteGuestVMsIfControlPlane(ctx, clusterScope, machineScope, m.ID) + } + + // Unregister by canonical name lxd-host- + hostName := canonicalLXDHostName(m.Hostname) + if hostName == "lxd-host-" { + return err + } + if uerr := lxd.UnregisterLXDHostByNameWithMaasClient(api.Token, api.URL, hostName); uerr != nil { + machineScope.Error(uerr, "failed to unregister LXD VM host prior to release") + return err + } + machineScope.Info("Unregistered LXD VM host prior to release", "hostName", hostName) + // retry release + if rerr := machineSvc.ReleaseMachine(m.ID); rerr != nil { + machineScope.Error(rerr, "failed to release machine after unregistering VM host") + return rerr + } + return nil + } + machineScope.Error(err, "failed to release machine") + return err + } + return nil +} + +// forceDeleteGuestVMsIfControlPlane removes guest VMs from a VM host backed by this control-plane machine +func (r *MaasMachineReconciler) forceDeleteGuestVMsIfControlPlane(ctx context.Context, clusterScope *scope.ClusterScope, machineScope *scope.MachineScope, hostSystemID string) { + api := clusterScope.GetMaasClientIdentity() + client := maasclient.NewAuthenticatedClientSet(api.URL, api.Token) + if hosts, herr := client.VMHosts().List(ctx, nil); herr == nil { + for _, h := range hosts { + if h.HostSystemID() == hostSystemID { + if guests, gerr := h.Machines().List(ctx); gerr == nil { + for _, g := range guests { + gid := g.SystemID() + if gid == "" { + continue + } + // Fetch details to confirm and delete + if gm, ge := client.Machines().Machine(gid).Get(ctx); ge == nil { + if derr := client.Machines().Machine(gm.SystemID()).Delete(ctx); derr != nil { + machineScope.Error(derr, "failed to delete guest VM during host release cleanup", "guestSystemID", gm.SystemID()) } - break } } } + break } - - // choose ExternalIP first, then InternalIP - nodeIP := getNodeIP(m.Addresses) - if nodeIP != "" { - if uerr := lxd.UnregisterLXDHostWithMaasClient(api.Token, api.URL, nodeIP); uerr != nil { - machineScope.Error(uerr, "failed to unregister LXD VM host prior to release") - return ctrl.Result{}, err - } - machineScope.Info("Unregistered LXD VM host prior to release", "nodeIP", nodeIP) - // retry release - if rerr := machineSvc.ReleaseMachine(m.ID); rerr != nil { - machineScope.Error(rerr, "failed to release machine after unregistering VM host") - return ctrl.Result{}, rerr - } - } else { - machineScope.Error(err, "failed to release machine and no node IP for VM host unregister") - return ctrl.Result{}, err - } - } else { - machineScope.Error(err, "failed to release machine") - return ctrl.Result{}, err } } +} - // If this is an LXD VM, delete it after successful release - if machineScope.GetDynamicLXD() { - machineScope.Info("Deleting LXD VM after release", "system-id", m.ID) - api := clusterScope.GetMaasClientIdentity() - if uerr := lxd.DeleteLXDVMWithMaasClient(api.Token, api.URL, m.ID); uerr != nil { - machineScope.Error(uerr, "failed to delete LXD VM after release", "system-id", m.ID) - // Continue with cleanup despite deletion failure - } else { - machineScope.Info("Successfully deleted LXD VM after release", "system-id", m.ID) - } +// maybeDeleteDynamicLXDVM deletes the LXD VM if the MaasMachine was dynamically created +func (r *MaasMachineReconciler) maybeDeleteDynamicLXDVM(clusterScope *scope.ClusterScope, machineScope *scope.MachineScope, systemID string) { + if !machineScope.GetDynamicLXD() { + return + } + machineScope.Info("Deleting LXD VM after release", "system-id", systemID) + api := clusterScope.GetMaasClientIdentity() + if uerr := lxd.DeleteLXDVMWithMaasClient(api.Token, api.URL, systemID); uerr != nil { + machineScope.Error(uerr, "failed to delete LXD VM after release", "system-id", systemID) + return } + machineScope.Info("Successfully deleted LXD VM after release", "system-id", systemID) +} +// finalizeMachineDeletion marks conditions, records event, and removes the finalizer +func (r *MaasMachineReconciler) finalizeMachineDeletion(machineScope *scope.MachineScope, maasMachine *infrav1beta1.MaasMachine, systemID string) { conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") - r.Recorder.Eventf(machineScope.MaasMachine, corev1.EventTypeNormal, "SuccessfulRelease", "Released instance %q", m.ID) - - // Machine is deleted so remove the finalizer. + r.Recorder.Eventf(machineScope.MaasMachine, corev1.EventTypeNormal, "SuccessfulRelease", "Released instance %q", systemID) controllerutil.RemoveFinalizer(maasMachine, infrav1beta1.MachineFinalizer) - - //// v1alpah3 MAASMachine finalizer - //// Machine is deleted so remove the finalizer. - //controllerutil.RemoveFinalizer(maasMachine, infrav1alpha3.MachineFinalizer) - - return reconcile.Result{}, nil } // findInstance queries the EC2 apis and retrieves the instance if it exists, returns nil otherwise. diff --git a/controllers/templates/lxd_initializer_ds.yaml b/controllers/templates/lxd_initializer_ds.yaml index 2b76dc7..38176c9 100644 --- a/controllers/templates/lxd_initializer_ds.yaml +++ b/controllers/templates/lxd_initializer_ds.yaml @@ -73,7 +73,7 @@ spec: mountPropagation: HostToContainer containers: - name: lxd-initializer - image: us-east1-docker.pkg.dev/spectro-images/dev/amit/cluster-api/lxd-initializer:v0.6.1-spectro-4.0.0-dev-16102025-01 + image: us-east1-docker.pkg.dev/spectro-images/dev/amit/cluster-api/lxd-initializer:v0.6.1-spectro-4.0.0-dev-17102025-01 imagePullPolicy: Always securityContext: privileged: true diff --git a/lxd-initializer/lxd-initializer-daemonset.yaml b/lxd-initializer/lxd-initializer-daemonset.yaml index 0d4895f..496fdd7 100644 --- a/lxd-initializer/lxd-initializer-daemonset.yaml +++ b/lxd-initializer/lxd-initializer-daemonset.yaml @@ -20,7 +20,7 @@ spec: hostPID: true containers: - name: lxd-initializer - image: us-east1-docker.pkg.dev/spectro-images/dev/amit/cluster-api/lxd-initializer:v0.6.1-spectro-4.0.0-dev-16102025-01 + image: us-east1-docker.pkg.dev/spectro-images/dev/amit/cluster-api/lxd-initializer:v0.6.1-spectro-4.0.0-dev-17102025-01 imagePullPolicy: Always securityContext: privileged: true diff --git a/lxd-initializer/lxd-initializer.go b/lxd-initializer/lxd-initializer.go index 15adadd..b45a499 100644 --- a/lxd-initializer/lxd-initializer.go +++ b/lxd-initializer/lxd-initializer.go @@ -468,15 +468,6 @@ func main() { nicMode = "bridge" } - // // Determine host short name for unique bridge suffix - // hostShort := nodeName - // if hostShort == "" { - // if osHN, _ := os.Hostname(); osHN != "" { - // hostShort = osHN - // } - // } - // hostToken := normalizeName(hostShort) - networkBridge := *networkBridgeFlag if networkBridge == "" { networkBridge = os.Getenv("NETWORK_BRIDGE") @@ -484,11 +475,6 @@ func main() { networkBridge = "br0" // Default to br0 } } - // // Unique bridge per host to avoid cross-host name collisions - // uniqueBridge := networkBridge - // if hostToken != "" { - // uniqueBridge = fmt.Sprintf("%s-%s", networkBridge, hostToken) - // } // Auto-detect zone, resource pool, and boot interface name from MAAS zone, resourcePool, bootInterfaceName, err := getMachineInfoFromMaas(nodeName, maasAPIKey, maasEndpoint) @@ -645,12 +631,16 @@ func main() { } if actionStr == "register" || actionStr == "both" { - // Build a stable host name using MAAS system-id + // Single naming convention: lxd-host- systemID, sErr := extractSystemIDFromNodeName(nodeName) if sErr != nil { log.Fatalf("Failed to extract system ID from node name: %v", sErr) } - hostName := fmt.Sprintf("lxd-host-%s", systemID) + hostToken := normalizeName(nodeName) + if hostToken == "" { + hostToken = "node" + } + hostName := fmt.Sprintf("lxd-host-%s", hostToken) if err := registerWithMAAS(maasEndpoint, maasAPIKey, systemID, nodeIP, trustPassword, project, zone, resourcePool, hostName); err != nil { log.Fatalf("Failed to register LXD host in MAAS: %v", err) } @@ -662,17 +652,6 @@ func main() { return } - // // Keep the container running to maintain the DaemonSet if in daemon mode - // log.Println("LXD initialization completed successfully") - // log.Println("Starting periodic trust-password maintainer") - // go func() { - // for { - // if err := setTrustPassword(trustPassword); err != nil { - // log.Printf("periodic trust password set failed: %v", err) - // } - // time.Sleep(15 * time.Minute) - // } - // }() log.Println("Entering sleep loop to keep the container running") for { time.Sleep(3600 * time.Second) diff --git a/pkg/maas/lxd/host_maas_client.go b/pkg/maas/lxd/host_maas_client.go index 4bec89e..919a361 100644 --- a/pkg/maas/lxd/host_maas_client.go +++ b/pkg/maas/lxd/host_maas_client.go @@ -71,8 +71,13 @@ func SetupLXDHostWithMaasClient(config HostConfig) error { // Create MAAS client client := maasclient.NewAuthenticatedClientSet(config.MaasAPIEndpoint, config.MaasAPIKey) - // Check if the host is already registered with MAAS - isRegistered, err := isHostRegisteredWithMaasClient(client, config.NodeIP) + // Check if the host is already registered with MAAS (by systemID, desired name, or power address) + hn := strings.TrimSpace(config.HostName) + desiredName := fmt.Sprintf("lxd-host-%s", hn) + if hn == "" { + desiredName = fmt.Sprintf("lxd-host-%s", config.NodeIP) + } + isRegistered, err := isHostRegisteredWithMaasClientAdvanced(client, "", desiredName, config.NodeIP) if err != nil { return fmt.Errorf("failed to check if host is registered: %w", err) } @@ -115,7 +120,8 @@ func normalizeHost(s string) string { } // isHostRegisteredWithMaasClient checks if a host is already registered with MAAS as a VM host -func isHostRegisteredWithMaasClient(client maasclient.ClientSetInterface, nodeIP string) (bool, error) { +// isHostRegisteredWithMaasClientAdvanced returns true if a VM host exists matching systemID, desired name, or power address host +func isHostRegisteredWithMaasClientAdvanced(client maasclient.ClientSetInterface, systemID, desiredName, nodeIP string) (bool, error) { ctx := context.Background() vmHosts, err := client.VMHosts().List(ctx, nil) @@ -123,15 +129,20 @@ func isHostRegisteredWithMaasClient(client maasclient.ClientSetInterface, nodeIP return false, fmt.Errorf("failed to get VM hosts: %w", err) } - wantName := fmt.Sprintf("lxd-host-%s", nodeIP) - wantHost := normalizeHost(nodeIP) + wantName := strings.TrimSpace(desiredName) + wantHost := normalizeHost(strings.TrimSpace(nodeIP)) for _, host := range vmHosts { - // Compare by name (our deterministic naming) or by normalized power address host component. - if host.Name() == wantName { + // 1) Prefer exact hostSystemID match when provided + if strings.TrimSpace(systemID) != "" && host.HostSystemID() == strings.TrimSpace(systemID) { + return true, nil + } + // 2) Compare by desired name + if wantName != "" && host.Name() == wantName { return true, nil } - if normalizeHost(host.PowerAddress()) == wantHost { + // 3) Legacy power address host match + if wantHost != "" && normalizeHost(host.PowerAddress()) == wantHost { return true, nil } } @@ -143,8 +154,10 @@ func registerWithMaasClient(client maasclient.ClientSetInterface, config HostCon ctx := context.Background() // Create registration parameters - name := config.HostName - if name == "" { + name := strings.TrimSpace(config.HostName) + if name != "" { + name = fmt.Sprintf("lxd-host-%s", name) + } else { name = fmt.Sprintf("lxd-host-%s", config.NodeIP) } params := maasclient.ParamsBuilder(). @@ -193,8 +206,8 @@ func GetAvailableLXDHostsWithMaasClient(apiKey, apiEndpoint string) ([]maasclien return vmHosts, nil } -// UnregisterLXDHostWithMaasClient removes a VM host registration from MAAS by matching name or power address -func UnregisterLXDHostWithMaasClient(apiKey, apiEndpoint, nodeIP string) error { +// UnregisterLXDHostByNameWithMaasClient removes a VM host registration from MAAS by matching the exact host name +func UnregisterLXDHostByNameWithMaasClient(apiKey, apiEndpoint, hostName string) error { client := maasclient.NewAuthenticatedClientSet(apiEndpoint, apiKey) ctx := context.Background() @@ -203,21 +216,16 @@ func UnregisterLXDHostWithMaasClient(apiKey, apiEndpoint, nodeIP string) error { return fmt.Errorf("failed to get VM hosts: %w", err) } - wantName := fmt.Sprintf("lxd-host-%s", nodeIP) - wantHost := normalizeHost(nodeIP) - for _, host := range vmHosts { - if host.Name() == wantName || normalizeHost(host.PowerAddress()) == wantHost { - // Found the host; delete it by system ID as required by the client + if host.Name() == hostName { if derr := client.VMHosts().VMHost(host.SystemID()).Delete(ctx); derr != nil { return fmt.Errorf("failed to delete VM host %s (id=%s): %w", host.Name(), host.SystemID(), derr) } log := textlogger.NewLogger(textlogger.NewConfig()) - log.Info("Successfully unregistered LXD host", "node", nodeIP, "id", host.SystemID(), "name", host.Name()) + log.Info("Successfully unregistered LXD host", "name", hostName, "id", host.SystemID()) return nil } } - // Not found -> nothing to do return nil }