@@ -20,9 +20,11 @@ import (
2020 "context"
2121 "encoding/json"
2222 "fmt"
23+ "strings"
2324
2425 "github.com/go-logr/logr"
2526 appsv1 "k8s.io/api/apps/v1"
27+ corev1 "k8s.io/api/core/v1"
2628 apierrors "k8s.io/apimachinery/pkg/api/errors"
2729 "k8s.io/apimachinery/pkg/api/meta"
2830 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -402,7 +404,7 @@ func (s *stateSkel) getSyncState(ctx context.Context, objs []*unstructured.Unstr
402404
403405 // Object exists, check for Kind specific readiness
404406 if found .GetKind () == "DaemonSet" {
405- if ready , err := s .isDaemonSetReady (found , reqLogger ); err != nil || ! ready {
407+ if ready , err := s .isDaemonSetReady (ctx , found , reqLogger ); err != nil || ! ready {
406408 reqLogger .V (consts .LogLevelInfo ).Info ("Object is not ready" , "Kind:" , obj .GetKind (), "Name" , obj .GetName ())
407409 return SyncStateNotReady , err
408410 }
@@ -413,7 +415,7 @@ func (s *stateSkel) getSyncState(ctx context.Context, objs []*unstructured.Unstr
413415}
414416
415417// isDaemonSetReady checks if daemonset is ready
416- func (s * stateSkel ) isDaemonSetReady (uds * unstructured.Unstructured , reqLogger logr.Logger ) (bool , error ) {
418+ func (s * stateSkel ) isDaemonSetReady (ctx context. Context , uds * unstructured.Unstructured , reqLogger logr.Logger ) (bool , error ) {
417419 buf , err := uds .MarshalJSON ()
418420 if err != nil {
419421 return false , fmt .Errorf ("failed to marshall unstructured daemonset object: %w" , err )
@@ -433,14 +435,33 @@ func (s *stateSkel) isDaemonSetReady(uds *unstructured.Unstructured, reqLogger l
433435 "UpdatedPodsScheduled" , ds .Status .UpdatedNumberScheduled ,
434436 "PodsReady:" , ds .Status .NumberReady ,
435437 "Conditions:" , ds .Status .Conditions )
436- // Note(adrianc): We check for DesiredNumberScheduled!=0 as we expect to have at least one node that would need
437- // to have DaemonSet Pods deployed onto it. DesiredNumberScheduled == 0 then indicates that this field was not yet
438- // updated by the DaemonSet controller
439- // TODO: Check if we can use another field maybe to indicate it was processed by the DaemonSet controller.
440- if ds .Status .DesiredNumberScheduled != 0 && ds .Status .DesiredNumberScheduled == ds .Status .NumberAvailable &&
441- ds .Status .UpdatedNumberScheduled == ds .Status .NumberAvailable {
438+
439+ if ds .Status .DesiredNumberScheduled == 0 {
440+ reqLogger .V (consts .LogLevelDebug ).Info ("DesiredNumberScheduled is 0, DaemonSet not ready" )
441+ return false , nil
442+ }
443+
444+ // Check basic availability
445+ if ds .Status .NumberUnavailable != 0 {
446+ reqLogger .V (consts .LogLevelInfo ).Info ("DaemonSet has unavailable pods" )
447+ return false , nil
448+ }
449+
450+ if ds .Status .DesiredNumberScheduled != ds .Status .NumberAvailable {
451+ reqLogger .V (consts .LogLevelInfo ).Info ("Not all desired pods are available" )
452+ return false , nil
453+ }
454+
455+ // For OnDelete strategy, use revision hash checking with node placement verification
456+ if ds .Spec .UpdateStrategy .Type == appsv1 .OnDeleteDaemonSetStrategyType {
457+ return s .isDaemonSetReadyOnDelete (ctx , ds , reqLogger )
458+ }
459+
460+ // For RollingUpdate strategy, check UpdatedNumberScheduled
461+ if ds .Status .UpdatedNumberScheduled == ds .Status .NumberAvailable {
442462 return true , nil
443463 }
464+
444465 return false , nil
445466}
446467
@@ -454,3 +475,125 @@ func (s *stateSkel) checkAttributesExist(attrs nodeinfo.NodeAttributes, attrType
454475 }
455476 return nil
456477}
478+
479+ // isDaemonSetReadyOnDelete checks if a DaemonSet with OnDelete update strategy is ready
480+ // by verifying pod revision hashes and node placement
481+ func (s * stateSkel ) isDaemonSetReadyOnDelete (ctx context.Context , ds * appsv1.DaemonSet , reqLogger logr.Logger ) (bool , error ) {
482+ ownedPods , err := s .getOwnedPods (ctx , ds )
483+ if err != nil {
484+ reqLogger .Error (err , "Failed to list pods for DaemonSet" )
485+ return false , err
486+ }
487+ if len (ownedPods ) == 0 {
488+ return false , nil
489+ }
490+
491+ if ! s .arePodsHealthy (ownedPods ) {
492+ return false , nil
493+ }
494+
495+ dsRevisionHash , err := s .getLatestRevisionHash (ctx , ds )
496+ if err != nil {
497+ // If revision hash unavailable, fall back to health-only check
498+ reqLogger .V (consts .LogLevelWarning ).Info ("Could not get revision hash, using health-only check" , "error" , err )
499+ return true , nil
500+ }
501+
502+ // Check if all pods have the latest revision
503+ for _ , pod := range ownedPods {
504+ if hash , ok := pod .Labels ["controller-revision-hash" ]; ! ok || hash != dsRevisionHash {
505+ // Pods have outdated revision - verify they're on nodes matching current nodeSelector
506+ reqLogger .V (consts .LogLevelInfo ).Info ("Pods have outdated revision, verifying node placement" )
507+ return s .verifyNodePlacement (ctx , ds , ownedPods , reqLogger )
508+ }
509+ }
510+
511+ return true , nil
512+ }
513+
514+ // getOwnedPods returns pods owned by the DaemonSet
515+ func (s * stateSkel ) getOwnedPods (ctx context.Context , ds * appsv1.DaemonSet ) ([]corev1.Pod , error ) {
516+ podList := & corev1.PodList {}
517+ opts := []client.ListOption {
518+ client .MatchingLabels (ds .Spec .Template .Labels ),
519+ client .InNamespace (ds .Namespace ),
520+ }
521+ if err := s .client .List (ctx , podList , opts ... ); err != nil {
522+ return nil , err
523+ }
524+
525+ var ownedPods []corev1.Pod
526+ for _ , pod := range podList .Items {
527+ if len (pod .OwnerReferences ) > 0 && pod .OwnerReferences [0 ].UID == ds .UID {
528+ ownedPods = append (ownedPods , pod )
529+ }
530+ }
531+ return ownedPods , nil
532+ }
533+
534+ // getLatestRevisionHash retrieves the latest ControllerRevision hash for a DaemonSet
535+ func (s * stateSkel ) getLatestRevisionHash (ctx context.Context , ds * appsv1.DaemonSet ) (string , error ) {
536+ revisionList := & appsv1.ControllerRevisionList {}
537+ if err := s .client .List (ctx , revisionList ,
538+ client .MatchingLabels (ds .Spec .Selector .MatchLabels ),
539+ client .InNamespace (ds .Namespace )); err != nil {
540+ return "" , err
541+ }
542+
543+ // Find revision with highest revision number in single pass
544+ var latestRevision * appsv1.ControllerRevision
545+ namePrefix := ds .Name + "-"
546+
547+ for i := range revisionList .Items {
548+ rev := & revisionList .Items [i ]
549+ if ! strings .HasPrefix (rev .Name , namePrefix ) {
550+ continue
551+ }
552+ if latestRevision == nil || rev .Revision > latestRevision .Revision {
553+ latestRevision = rev
554+ }
555+ }
556+
557+ if latestRevision == nil {
558+ return "" , fmt .Errorf ("no revisions found" )
559+ }
560+
561+ return strings .TrimPrefix (latestRevision .Name , namePrefix ), nil
562+ }
563+
564+ // arePodsHealthy checks if all pods are running with all containers ready
565+ func (s * stateSkel ) arePodsHealthy (pods []corev1.Pod ) bool {
566+ for _ , pod := range pods {
567+ if pod .Status .Phase != corev1 .PodRunning {
568+ return false
569+ }
570+ for _ , cs := range pod .Status .ContainerStatuses {
571+ if ! cs .Ready {
572+ return false
573+ }
574+ }
575+ }
576+ return true
577+ }
578+
579+ // verifyNodePlacement checks if all pods are on nodes matching the DaemonSet's nodeSelector
580+ func (s * stateSkel ) verifyNodePlacement (ctx context.Context , ds * appsv1.DaemonSet , pods []corev1.Pod , reqLogger logr.Logger ) (bool , error ) {
581+ for _ , pod := range pods {
582+ node := & corev1.Node {}
583+ if err := s .client .Get (ctx , types.NamespacedName {Name : pod .Spec .NodeName }, node ); err != nil {
584+ return false , err
585+ }
586+
587+ // Check if node matches nodeSelector
588+ for key , value := range ds .Spec .Template .Spec .NodeSelector {
589+ if node .Labels [key ] != value {
590+ reqLogger .V (consts .LogLevelInfo ).Info ("Pod on non-matching node" ,
591+ "pod" , pod .Name , "node" , pod .Spec .NodeName , "selector" , key , "expected" , value , "actual" , node .Labels [key ])
592+ return false , nil
593+ }
594+ }
595+ }
596+
597+ reqLogger .V (consts .LogLevelInfo ).Info ("Pods healthy on correct nodes despite outdated revision" )
598+ return true , nil
599+ }
0 commit comments