Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ endif
# Image URL to use all building/pushing image targets
IMAGE_NAME := cluster-api-provider-maas-controller
REGISTRY ?= "us-east1-docker.pkg.dev/spectro-images/dev/${USER}/cluster-api"
SPECTRO_VERSION ?= 4.0.0-dev-29082025
SPECTRO_VERSION ?= 4.8.3-dev-12112025
IMG_TAG ?= v0.6.1-spectro-${SPECTRO_VERSION}
CONTROLLER_IMG ?= ${REGISTRY}/${IMAGE_NAME}

Expand Down
64 changes: 52 additions & 12 deletions controllers/lxd_initializer_ds.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,13 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus
if !r.anyNodeNeedsInitialization(ctx, remoteClient) {
r.Log.Info("All nodes already labeled initialized; considering DS cleanup", "namespace", dsNamespace, "ds", dsName)
if done, err := r.maybeShortCircuitDelete(ctx, remoteClient, dsNamespace, desiredCP, dsName); err != nil {
r.Log.Error(err, "failed to maybe short circuit delete", "namespace", dsNamespace, "ds", dsName)
return err
} else if done {
r.Log.Info("deleted existing initializer DS - all nodes are ready and initialized", "namespace", dsNamespace, "ds", dsName)
return nil
}
r.Log.Info("no nodes need initialization; skipping DS creation", "namespace", dsNamespace, "ds", dsName)
return nil
}

Expand All @@ -80,28 +83,68 @@ func (r *MaasClusterReconciler) ensureLXDInitializerDS(ctx context.Context, clus
// }

if err := r.deleteExistingInitializerDS(ctx, remoteClient, dsNamespace); err != nil {
r.Log.Error(err, "failed to delete existing initializer DS", "namespace", dsNamespace, "ds", dsName)
return err
}

// Ensure RBAC resources are created on the target cluster
if err := r.ensureLXDInitializerRBACOnTarget(ctx, remoteClient, dsNamespace); err != nil {
r.Log.Error(err, "failed to ensure LXD initializer RBAC", "namespace", dsNamespace, "ds", dsName)
return fmt.Errorf("failed to ensure LXD initializer RBAC: %v", err)
}

if done, err := r.maybeShortCircuitDelete(ctx, remoteClient, dsNamespace, desiredCP, dsName); err != nil {
r.Log.Error(err, "failed to maybe short circuit delete", "namespace", dsNamespace, "ds", dsName)
return err
} else if done {
r.Log.Info("deleted existing initializer DS - all nodes are ready and initialized", "namespace", dsNamespace, "ds", dsName)
return nil
}

ds, err := r.renderDaemonSetForCluster(clusterScope, dsName, dsNamespace)
if err != nil {
r.Log.Error(err, "failed to render DaemonSet for cluster", "namespace", dsNamespace, "ds", dsName)
return err
}

// Do not set owner refs across clusters; just create/patch on target cluster
_, err = controllerutil.CreateOrPatch(ctx, remoteClient, ds, func() error { return nil })
return err
// Do not set owner refs across clusters; just create/patch on target cluster.
// Mutate existing DaemonSet so changes to template/spec take effect on reconcile.
current := &appsv1.DaemonSet{}
current.Name = dsName
current.Namespace = dsNamespace

_, err = controllerutil.CreateOrPatch(ctx, remoteClient, current, func() error {
// Preserve immutable selector if already present; align labels.
current.Labels = ds.Labels
current.Annotations = ds.Annotations

// Update pod template and mutable spec fields
current.Spec.Template = ds.Spec.Template
current.Spec.UpdateStrategy = ds.Spec.UpdateStrategy
current.Spec.MinReadySeconds = ds.Spec.MinReadySeconds
current.Spec.RevisionHistoryLimit = ds.Spec.RevisionHistoryLimit

// Initialize selector if missing (only valid on create)
if current.Spec.Selector == nil || len(current.Spec.Selector.MatchLabels) == 0 {
current.Spec.Selector = ds.Spec.Selector
}
// Ensure template labels include selector labels
if current.Spec.Selector != nil && len(current.Spec.Selector.MatchLabels) > 0 {
if current.Spec.Template.Labels == nil {
current.Spec.Template.Labels = map[string]string{}
}
for k, v := range current.Spec.Selector.MatchLabels {
current.Spec.Template.Labels[k] = v
}
}
return nil
})
if err != nil {
r.Log.Error(err, "failed to create/patch DaemonSet", "namespace", dsNamespace, "ds", dsName)
return err
}
r.Log.Info("created/patched DaemonSet", "namespace", dsNamespace, "ds", dsName)
return nil
}

// ensureLXDInitializerRBACOnTarget creates the RBAC resources for lxd-initializer on the target cluster
Expand Down Expand Up @@ -295,21 +338,18 @@ func (r *MaasClusterReconciler) maybeShortCircuitDelete(ctx context.Context, rem
}
}

// Only delete if:
// 1. We have exactly desiredCP nodes (not more, which would indicate maintenance/new nodes)
// 2. All nodes are Ready
// 3. All nodes are initialized
if int64(len(shortCircuitNodes.Items)) == int64(desiredCP) &&
int64(readyCount) == int64(desiredCP) &&
int64(initCount) >= int64(desiredCP) {
// Delete initializer DS only when ALL nodes (control-plane + worker) are initialized.
// This matches the new requirement to register both CP and worker nodes.
totalNodes := len(shortCircuitNodes.Items)
if totalNodes > 0 && initCount == totalNodes {
shortCircuitDSList := &appsv1.DaemonSetList{}
if err := remoteClient.List(ctx, shortCircuitDSList, client.InNamespace(namespace), client.MatchingLabels{"app": dsName}); err == nil {
for _, ds := range shortCircuitDSList.Items {
_ = remoteClient.Delete(ctx, &ds)
}
}
r.Log.Info("Deleted LXD initializer DaemonSet - all nodes are ready and initialized",
"desiredCP", desiredCP, "totalNodes", len(shortCircuitNodes.Items), "readyNodes", readyCount, "initializedNodes", initCount)
r.Log.Info("Deleted LXD initializer DaemonSet - all nodes initialized",
"desiredCP", desiredCP, "totalNodes", totalNodes, "readyNodes", readyCount, "initializedNodes", initCount)
return true, nil
}
return false, nil
Expand Down
25 changes: 11 additions & 14 deletions controllers/templates/lxd_initializer_ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,24 @@ spec:
- -c
- |
set -ex
if ! command -v lxd >/dev/null 2>&1; then
echo "LXD not present, installing via snap";
apt-get update;
apt-get install -y snapd;
systemctl enable --now snapd.socket;
snap install lxd --channel=5.0/stable;
# Check on the HOST, not in the container
if ! nsenter -t 1 -m -p -- bash -lc 'command -v lxd >/dev/null 2>&1'; then
echo "LXD not present on host, installing snapd and LXD on host";
nsenter -t 1 -m -p -- bash -lc 'export DEBIAN_FRONTEND=noninteractive; apt-get update && apt-get install -y snapd'
# Enable and start snapd on the host
nsenter -t 1 -m -p -- systemctl enable --now snapd.socket
# Install LXD via snap on the host
nsenter -t 1 -m -p -- snap install lxd --channel=5.0/stable
fi
echo "Ensuring LXD daemon is running on host";
if nsenter -t 1 -m -p -- systemctl is-active --quiet snap.lxd.daemon; then
echo "LXD daemon already active";
else
echo "Starting LXD daemon via host systemd";
nsenter -t 1 -m -p -- systemctl start snap.lxd.daemon || snap start --enable lxd.daemon;
fi
# Start/enable via snap (avoid systemd invocation from the pod)
nsenter -t 1 -m -p -- snap start --enable lxd.daemon || true
# Wait for LXD to report readiness (up to 5 minutes)
echo "Waiting for LXD to become ready on host (timeout 5 min)…"
if ! nsenter -t 1 -m -p -- /snap/bin/lxd waitready --timeout 300 ; then
echo "LXD did not become ready after 5 minutes"; exit 1;
fi
echo "Host LXD is ready";

securityContext:
privileged: true
volumeMounts:
Expand All @@ -71,7 +68,7 @@ spec:
mountPropagation: HostToContainer
containers:
- name: lxd-initializer
image: us-east1-docker.pkg.dev/spectro-images/dev/jayeshsrivastava/cluster-api/lxd-initializer:v0.6.1-spectro-4.8.2
image: us-east1-docker.pkg.dev/spectro-images/dev/amit/cluster-api/lxd-initializer:v0.6.1-spectro-4.8.3
imagePullPolicy: Always
securityContext:
privileged: true
Expand Down
17 changes: 6 additions & 11 deletions lxd-initializer/lxd-initializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,11 +644,11 @@ func logLXDDiagnostics() {
}

// host daemon status
out, err := exec.Command("nsenter", "-t", "1", "-m", "-p", "--", "systemctl", "status", "snap.lxd.daemon").CombinedOutput()
out, err := exec.Command("nsenter", "-t", "1", "-m", "-p", "--", "snap", "services", "lxd").CombinedOutput()
if err == nil {
log.Printf("systemctl status snap.lxd.daemon:\n%s", string(out))
log.Printf("snap services lxd:\n%s", string(out))
} else {
log.Printf("nsenter systemctl status failed: %v", err)
log.Printf("nsenter snap services failed: %v", err)
}

// process list
Expand Down Expand Up @@ -974,16 +974,11 @@ func configureLXDNetwork(trustPassword, hostIP string) error {
return err
}
}
// Restart LXD to apply changes
cmd = exec.Command("systemctl", "restart", "snap.lxd.daemon")
// Restart LXD to apply changes (avoid systemd; use snap)
cmd = exec.Command("nsenter", "-t", "1", "-m", "-p", "--", "snap", "restart", "lxd")
output, err = cmd.CombinedOutput()
if err != nil {
log.Printf("systemctl restart inside container failed (%v), trying nsenter fallback", err)
cmd = exec.Command("nsenter", "-t", "1", "-m", "-p", "--", "systemctl", "restart", "snap.lxd.daemon")
output, err = cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to restart LXD (fallback): %s: %w", string(output), err)
}
return fmt.Errorf("failed to restart LXD via snap: %s: %w", string(output), err)
}

log.Printf("LXD configured to listen on %s", address)
Expand Down