@@ -55,11 +55,15 @@ type Daemon interface {
5555
5656type daemon struct {
5757 config config.DaemonConfig
58- watcher watcher.Watcher
58+ podWatcher watcher.Watcher
59+ nadWatcher watcher.Watcher // NAD watcher for network definition changes
5960 kubeClient k8sClient.Client
6061 guidPool guid.Pool
6162 smClient plugins.SubnetManagerClient
6263 guidPodNetworkMap map [string ]string // allocated guid mapped to the pod and network
64+
65+ // NAD add-only cache
66+ nadCache map [string ]* v1.NetworkAttachmentDefinition // network ID -> NAD
6367}
6468
6569// Temporary struct used to proceed pods' networks
@@ -109,6 +113,7 @@ func NewDaemon() (Daemon, error) {
109113 }
110114
111115 podEventHandler := resEvenHandler .NewPodEventHandler ()
116+ nadEventHandler := resEvenHandler .NewNADEventHandler ()
112117 client , err := k8sClient .NewK8sClient ()
113118 if err != nil {
114119 return nil , err
@@ -151,13 +156,16 @@ func NewDaemon() (Daemon, error) {
151156 }
152157
153158 podWatcher := watcher .NewWatcher (podEventHandler , client )
159+ nadWatcher := watcher .NewWatcher (nadEventHandler , client )
154160 return & daemon {
155161 config : daemonConfig ,
156- watcher : podWatcher ,
162+ podWatcher : podWatcher ,
163+ nadWatcher : nadWatcher ,
157164 kubeClient : client ,
158165 guidPool : guidPool ,
159166 smClient : smClient ,
160167 guidPodNetworkMap : make (map [string ]string ),
168+ nadCache : make (map [string ]* v1.NetworkAttachmentDefinition ),
161169 }, nil
162170}
163171
@@ -289,11 +297,14 @@ func (d *daemon) runLeaderLogic() {
289297
290298 go wait .Until (d .AddPeriodicUpdate , time .Duration (d .config .PeriodicUpdate )* time .Second , stopPeriodicsChan )
291299 go wait .Until (d .DeletePeriodicUpdate , time .Duration (d .config .PeriodicUpdate )* time .Second , stopPeriodicsChan )
300+ go wait .Until (d .ProcessNADChanges , time .Duration (d .config .PeriodicUpdate )* time .Second , stopPeriodicsChan )
292301 defer close (stopPeriodicsChan )
293302
294- // Run Watcher in background, calling watcherStopFunc() will stop the watcher
295- watcherStopFunc := d .watcher .RunBackground ()
296- defer watcherStopFunc ()
303+ // Run both watchers in background
304+ podWatcherStopFunc := d .podWatcher .RunBackground ()
305+ nadWatcherStopFunc := d .nadWatcher .RunBackground ()
306+ defer podWatcherStopFunc ()
307+ defer nadWatcherStopFunc ()
297308
298309 // Run until interrupted by os signals
299310 sigChan := make (chan os.Signal , 1 )
@@ -303,26 +314,16 @@ func (d *daemon) runLeaderLogic() {
303314}
304315
305316// If network identified by networkID is IbSriov return network name and spec
306- //
307- //nolint:nilerr
308317func (d * daemon ) getIbSriovNetwork (networkID string ) (string , * utils.IbSriovCniSpec , error ) {
309- networkNamespace , networkName , err := utils .ParseNetworkID (networkID )
318+ _ , networkName , err := utils .ParseNetworkID (networkID )
310319 if err != nil {
311320 return "" , nil , fmt .Errorf ("failed to parse network id %s with error: %v" , networkID , err )
312321 }
313322
314- // Try to get net-attach-def in backoff loop
315- var netAttInfo * v1.NetworkAttachmentDefinition
316- if err = wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
317- netAttInfo , err = d .kubeClient .GetNetworkAttachmentDefinition (networkNamespace , networkName )
318- if err != nil {
319- log .Warn ().Msgf ("failed to get networkName attachment %s with error %v" ,
320- networkName , err )
321- return false , nil
322- }
323- return true , nil
324- }); err != nil {
325- return "" , nil , fmt .Errorf ("failed to get networkName attachment %s" , networkName )
323+ // Try to get net-attach-def from cache first, then fallback to API
324+ netAttInfo , err := d .getCachedNAD (networkID )
325+ if err != nil {
326+ return "" , nil , fmt .Errorf ("failed to get network attachment %s: %v" , networkName , err )
326327 }
327328 log .Debug ().Msgf ("networkName attachment %v" , netAttInfo )
328329
@@ -551,7 +552,7 @@ func (d *daemon) updatePodNetworkAnnotation(pi *podNetworkInfo, removedList *[]n
551552//nolint:nilerr
552553func (d * daemon ) AddPeriodicUpdate () {
553554 log .Info ().Msgf ("running periodic add update" )
554- addMap , _ := d .watcher .GetHandler ().GetResults ()
555+ addMap , _ := d .podWatcher .GetHandler ().GetResults ()
555556 addMap .Lock ()
556557 defer addMap .Unlock ()
557558 // Contains ALL pods' networks
@@ -569,12 +570,10 @@ func (d *daemon) AddPeriodicUpdate() {
569570 if len (pods ) == 0 {
570571 continue
571572 }
572-
573- log .Info ().Msgf ("processing network networkID %s" , networkID )
574573 networkName , ibCniSpec , err := d .getIbSriovNetwork (networkID )
575574 if err != nil {
576- addMap . UnSafeRemove ( networkID )
577- log .Error ().Msgf ("droping network: %v" , err )
575+ // Do not drop the network; keep for next periodic run when NAD becomes available
576+ log .Warn ().Msgf ("NAD not ready for network %s : %v (will retry)" , networkID , err )
578577 continue
579578 }
580579
@@ -678,7 +677,7 @@ func getAllPodGUIDsForNetwork(pod *kapi.Pod, networkName string) ([]net.Hardware
678677//nolint:nilerr
679678func (d * daemon ) DeletePeriodicUpdate () {
680679 log .Info ().Msg ("running delete periodic update" )
681- _ , deleteMap := d .watcher .GetHandler ().GetResults ()
680+ _ , deleteMap := d .podWatcher .GetHandler ().GetResults ()
682681 deleteMap .Lock ()
683682 defer deleteMap .Unlock ()
684683 for networkID , podsInterface := range deleteMap .Items {
@@ -753,6 +752,64 @@ func (d *daemon) DeletePeriodicUpdate() {
753752 log .Info ().Msg ("delete periodic update finished" )
754753}
755754
755+ // ProcessNADChanges processes NAD add events
756+ func (d * daemon ) ProcessNADChanges () {
757+ log .Debug ().Msg ("Processing NAD changes..." )
758+
759+ nadHandler := d .nadWatcher .GetHandler ().(* resEvenHandler.NADEventHandler )
760+ addedNADs , _ := nadHandler .GetResults ()
761+
762+ // Process NAD add events only
763+ addedNADs .Lock ()
764+ for networkID , nad := range addedNADs .Items {
765+ nadObj := nad .(* v1.NetworkAttachmentDefinition )
766+
767+ // Add-only: cache the NAD; ignore updates/deletes
768+ d .nadCache [networkID ] = nadObj
769+
770+ log .Info ().Msgf ("Successfully processed NAD event: %s" , networkID )
771+
772+ // Remove processed item
773+ addedNADs .UnSafeRemove (networkID )
774+ }
775+ addedNADs .Unlock ()
776+
777+ log .Debug ().Msg ("NAD changes processing completed" )
778+ }
779+
780+ // getCachedNAD retrieves NAD from cache, falling back to API if not cached
781+ func (d * daemon ) getCachedNAD (networkID string ) (* v1.NetworkAttachmentDefinition , error ) {
782+ // First check cache
783+ if nad , exists := d .nadCache [networkID ]; exists {
784+ return nad , nil
785+ }
786+
787+ // Fall back to API call (existing behavior)
788+ networkNamespace , networkName , err := utils .ParseNetworkID (networkID )
789+ if err != nil {
790+ return nil , fmt .Errorf ("failed to parse network id %s with error: %v" , networkID , err )
791+ }
792+
793+ var netAttInfo * v1.NetworkAttachmentDefinition
794+ if err = wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
795+ var getErr error
796+ netAttInfo , getErr = d .kubeClient .GetNetworkAttachmentDefinition (networkNamespace , networkName )
797+ if getErr != nil {
798+ log .Warn ().Msgf ("failed to get network attachment %s with error %v" , networkName , getErr )
799+ // keep retrying until backoff exhausted
800+ return false , nil
801+ }
802+ return true , nil
803+ }); err != nil {
804+ return nil , fmt .Errorf ("failed to get network attachment %s" , networkName )
805+ }
806+
807+ // Cache the result
808+ d .nadCache [networkID ] = netAttInfo
809+
810+ return netAttInfo , nil
811+ }
812+
756813// initPool check the guids that are already allocated by the running pods
757814func (d * daemon ) initPool () error {
758815 log .Info ().Msg ("Initializing GUID pool." )
0 commit comments