@@ -49,6 +49,7 @@ import (
4949
5050 nropv1 "github.com/openshift-kni/numaresources-operator/api/v1"
5151 "github.com/openshift-kni/numaresources-operator/internal/machineconfigpools"
52+ intreconcile "github.com/openshift-kni/numaresources-operator/internal/reconcile"
5253 "github.com/openshift-kni/numaresources-operator/pkg/apply"
5354 "github.com/openshift-kni/numaresources-operator/pkg/kubeletconfig"
5455 "github.com/openshift-kni/numaresources-operator/pkg/objectnames"
@@ -57,7 +58,8 @@ import (
5758)
5859
5960const (
60- kubeletConfigRetryPeriod = 30 * time .Second
61+ kubeletConfigRetryPeriod = 30 * time .Second
62+ MachineConfigPoolPausedRetryPeriod = 2 * time .Minute
6163)
6264
6365const (
@@ -116,22 +118,20 @@ func (r *KubeletConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques
116118
117119 // KubeletConfig changes are expected to be sporadic, yet are important enough
118120 // to be made visible at kubernetes level. So we generate events to handle them
119- cm , err := r .reconcileConfigMap (ctx , instance , req .NamespacedName )
120- if err != nil {
121- var klErr * InvalidKubeletConfig
122- if errors .As (err , & klErr ) {
123- r .Recorder .Event (instance , "Normal" , "ProcessSkip" , "ignored kubelet config " + klErr .ObjectName )
124- return ctrl.Result {}, nil
125- }
126-
127- klog .ErrorS (err , "failed to reconcile configmap" , "controller" , "kubeletconfig" )
121+ cm , step := r .reconcileConfigMap (ctx , instance , req .NamespacedName )
122+ if step .Error != nil && step .ConditionInfo .Reason != intreconcile .EventProcessSkip {
123+ klog .ErrorS (step .Error , "failed to reconcile configmap" , "controller" , "kubeletconfig" )
124+ r .Recorder .Event (instance , step .ConditionInfo .Type , step .ConditionInfo .Reason , step .ConditionInfo .Message )
125+ return step .Result , step .Error
126+ }
128127
129- r . Recorder . Event ( instance , "Warning" , "ProcessFailed" , "Failed to update RTE config from kubelet config " + req . NamespacedName . String ())
130- return ctrl. Result {}, err
128+ if step . ConditionInfo . Reason == intreconcile . EventProcessSuccess {
129+ step = step . WithMessage ( fmt . Sprintf ( "Updated RTE config %s/%s from kubelet config %s" , cm . Namespace , cm . Name , req . NamespacedName . String ()))
131130 }
132131
133- r .Recorder .Event (instance , "Normal" , "ProcessOK" , fmt .Sprintf ("Updated RTE config %s/%s from kubelet config %s" , cm .Namespace , cm .Name , req .NamespacedName .String ()))
134- return ctrl.Result {}, nil
132+ r .Recorder .Event (instance , step .ConditionInfo .Type , step .ConditionInfo .Reason , step .ConditionInfo .Message )
133+
134+ return step .Result , nil
135135}
136136
137137func (r * KubeletConfigReconciler ) SetupWithManager (mgr ctrl.Manager ) error {
@@ -197,25 +197,29 @@ func (e *InvalidKubeletConfig) Unwrap() error {
197197 return e .Err
198198}
199199
200- func (r * KubeletConfigReconciler ) reconcileConfigMap (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* corev1.ConfigMap , error ) {
200+ func (r * KubeletConfigReconciler ) reconcileConfigMap (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* corev1.ConfigMap , intreconcile. Step ) {
201201 // first check if the ConfigMap should be deleted
202202 // to save all the additional work related for create/update
203203 cm , deleted , err := r .deleteConfigMap (ctx , instance , kcKey )
204204 if deleted {
205- return cm , err
205+ return cm , intreconcile . StepWarning ( fmt . Errorf ( "Failed to update RTE config from kubelet config %s: %v" , kcKey . Name , err ))
206206 }
207207
208- kcHandler , err := r .makeKCHandlerForPlatform (ctx , instance , kcKey )
209- if err != nil {
210- return nil , err
208+ kcHandler , step := r .makeKCHandlerForPlatform (ctx , instance , kcKey )
209+ if step . Error != nil {
210+ return nil , step
211211 }
212+
212213 kubeletConfig , err := kubeletconfig .MCOKubeletConfToKubeletConf (kcHandler .mcoKc )
213214 if err != nil {
214215 klog .ErrorS (err , "cannot extract KubeletConfiguration from MCO KubeletConfig" , "name" , kcKey .Name )
215- return nil , err
216+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
216217 }
217-
218- return r .syncConfigMap (ctx , kubeletConfig , instance , kcHandler )
218+ cm , err = r .syncConfigMap (ctx , kubeletConfig , instance , kcHandler )
219+ if err != nil {
220+ return cm , FailedConfigMapUpdateStep (kcKey .Name , err )
221+ }
222+ return cm , intreconcile .StepNormalSucess ("" )
219223}
220224
221225func (r * KubeletConfigReconciler ) syncConfigMap (ctx context.Context , kubeletConfig * kubeletconfigv1beta1.KubeletConfiguration , instance * nropv1.NUMAResourcesOperator , kcHandler * kubeletConfigHandler ) (* corev1.ConfigMap , error ) {
@@ -244,63 +248,68 @@ func (r *KubeletConfigReconciler) syncConfigMap(ctx context.Context, kubeletConf
244248 return rendered , nil
245249}
246250
247- func (r * KubeletConfigReconciler ) makeKCHandlerForPlatform (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* kubeletConfigHandler , error ) {
251+ func (r * KubeletConfigReconciler ) makeKCHandlerForPlatform (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* kubeletConfigHandler , intreconcile. Step ) {
248252 switch r .Platform {
249253 case platform .OpenShift :
250254 mcoKc := & mcov1.KubeletConfig {}
251255 if err := r .Client .Get (ctx , kcKey , mcoKc ); err != nil {
252- return nil , err
256+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
253257 }
254258
255259 mcps , err := machineconfigpools .GetListByNodeGroupsV1 (ctx , r .Client , instance .Spec .NodeGroups )
256260 if err != nil {
257- return nil , err
261+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
258262 }
259263
260264 mcp , err := machineconfigpools .FindBySelector (mcps , mcoKc .Spec .MachineConfigPoolSelector )
261265 if err != nil {
262266 klog .ErrorS (err , "cannot find a matching mcp for MCO KubeletConfig" , "name" , kcKey .Name )
263267 var notFound * machineconfigpools.NotFound
264268 if errors .As (err , & notFound ) {
265- return nil , & InvalidKubeletConfig {
266- ObjectName : kcKey .Name ,
267- Err : notFound ,
268- }
269+ return nil , intreconcile .StepNormalSkip (fmt .Errorf ("%s: %v" , kcKey , notFound ))
269270 }
270- return nil , err
271+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
271272 }
272273
273274 klog .V (3 ).InfoS ("matched MCP to MCO KubeletConfig" , "kubeletconfig name" , kcKey .Name , "MCP name" , mcp .Name )
274275
275276 // nothing we care about, and we can't do much anyway
276277 if mcoKc .Spec .KubeletConfig == nil {
277278 klog .InfoS ("detected KubeletConfig with empty payload, ignoring" , "name" , kcKey .Name )
278- return nil , & InvalidKubeletConfig { ObjectName : kcKey .Name }
279+ return nil , intreconcile . StepNormalSkip ( fmt . Errorf ( "Invalid KubeletConfig %s" , kcKey .Name ))
279280 }
281+
282+ if mcp .Spec .Paused {
283+ klog .InfoS ("detected paused MCP" , "name" , mcp .Name )
284+ step := intreconcile .StepNormalSkip (fmt .Errorf ("MachineConfigPool of KubeletConfig %s is paused" , kcKey .Name ))
285+ step .Result = ctrl.Result {RequeueAfter : MachineConfigPoolPausedRetryPeriod }
286+ return nil , step
287+ }
288+
280289 return & kubeletConfigHandler {
281290 ownerObject : mcoKc ,
282291 mcoKc : mcoKc ,
283292 poolName : mcp .Name ,
284293 setCtrlRef : controllerutil .SetControllerReference ,
285- }, nil
294+ }, intreconcile . StepNormalSucess ( "" )
286295
287296 case platform .HyperShift :
288297 cmKc := & corev1.ConfigMap {}
289298 if err := r .Client .Get (ctx , kcKey , cmKc ); err != nil {
290- return nil , err
299+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
291300 }
292301
293302 nodePoolName := cmKc .Labels [HyperShiftNodePoolLabel ]
294303 kcData := cmKc .Data [HyperShiftConfigMapConfigKey ]
295304 mcoKc , err := kubeletconfig .DecodeFromData ([]byte (kcData ), r .Scheme )
296305 if err != nil {
297- return nil , err
306+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
298307 }
299308
300309 // nothing we care about, and we can't do much anyway
301310 if mcoKc .Spec .KubeletConfig == nil {
302311 klog .InfoS ("detected KubeletConfig with empty payload, ignoring" , "name" , kcKey .Name )
303- return nil , & InvalidKubeletConfig { ObjectName : kcKey .Name }
312+ return nil , intreconcile . StepNormalSkip ( fmt . Errorf ( "Invalid KubeletConfig %s" , kcKey .Name ))
304313 }
305314 return & kubeletConfigHandler {
306315 ownerObject : cmKc ,
@@ -312,9 +321,12 @@ func (r *KubeletConfigReconciler) makeKCHandlerForPlatform(ctx context.Context,
312321 setCtrlRef : func (owner , controlled metav1.Object , scheme * runtime.Scheme , opts ... controllerutil.OwnerReferenceOption ) error {
313322 return nil
314323 },
315- }, nil
324+ }, intreconcile . StepNormalSucess ( "" )
316325 }
317- return nil , fmt .Errorf ("unsupported platform: %s" , r .Platform )
326+ return nil , FailedConfigMapUpdateStep (kcKey .Name , fmt .Errorf ("unsupported platform: %s" , r .Platform ))
327+ }
328+ func FailedConfigMapUpdateStep (objName string , err error ) intreconcile.Step {
329+ return intreconcile .StepWarning (fmt .Errorf ("Failed to update RTE config from kubelet config %s: %v" , objName , err ))
318330}
319331
320332func (r * KubeletConfigReconciler ) deleteConfigMap (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* corev1.ConfigMap , bool , error ) {
0 commit comments