Skip to content

Commit 26488dd

Browse files
feat: manage nvidia.com/operator.nic-configuration.wait label (#1809)
When NIC Configuration Operator is disabled, manage the label similar to how the mofed.wait label is managed
2 parents ae0fae9 + 0f6b008 commit 26488dd

File tree

7 files changed

+94
-36
lines changed

7 files changed

+94
-36
lines changed

controllers/nicclusterpolicy_controller.go

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ type NicClusterPolicyReconciler struct {
108108

109109
// Reconcile is part of the main kubernetes reconciliation loop which aims to
110110
// move the current state of the cluster closer to the desired state.
111+
//
112+
//nolint:gocognit,funlen
111113
func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
112114
// Wait for migration flow to finish
113115
select {
@@ -126,14 +128,24 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req
126128
// Request object not found, could have been deleted after reconcile request.
127129
// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
128130
// Return and don't requeue
129-
shouldRequeue, err := r.handleMOFEDWaitLabelsNoConfig(ctx)
131+
shouldRequeue, err := r.handleWaitLabelsNoConfig(ctx, consts.OfedDriverLabel, nodeinfo.NodeLabelWaitOFED)
130132
if err != nil {
131133
reqLogger.V(consts.LogLevelError).Error(err, "Fail to clear Mofed label on CR deletion.")
132134
return reconcile.Result{}, err
133135
}
134136
if shouldRequeue {
135137
return r.requeue()
136138
}
139+
140+
shouldRequeue, err = r.handleWaitLabelsNoConfig(
141+
ctx, consts.NicConfigurationDaemonLabel, nodeinfo.NodeLabelWaitNicConfig)
142+
if err != nil {
143+
reqLogger.V(consts.LogLevelError).Error(err, "Fail to clear NIC Configuration wait label on CR deletion.")
144+
return reconcile.Result{}, err
145+
}
146+
if shouldRequeue {
147+
return r.requeue()
148+
}
137149
return reconcile.Result{}, nil
138150
}
139151
// Error reading the object - requeue the request.
@@ -175,16 +187,27 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req
175187
} else {
176188
r.DocaDriverImagesProvider.SetImageSpec(nil)
177189
}
190+
178191
// Sync state and update status
179192
managerStatus := r.stateManager.SyncState(ctx, instance, sc)
180193
r.updateCrStatus(ctx, instance, managerStatus)
181194

182-
shouldRequeue, err := r.handleMOFEDWaitLabels(ctx, instance)
195+
shouldRequeueMofed, err := r.handleMOFEDWaitLabels(ctx, instance)
183196
if err != nil {
184197
return reconcile.Result{}, err
185198
}
186199

187-
if shouldRequeue || managerStatus.Status != state.SyncStateReady {
200+
shouldRequeueNicConfig := false
201+
// If NIC Configuration Operator is not configured, handle NIC Configuration wait labels
202+
if instance.Spec.NicConfigurationOperator == nil {
203+
shouldRequeueNicConfig, err = r.handleWaitLabelsNoConfig(
204+
ctx, consts.NicConfigurationDaemonLabel, nodeinfo.NodeLabelWaitNicConfig)
205+
if err != nil {
206+
return reconcile.Result{}, err
207+
}
208+
}
209+
210+
if shouldRequeueMofed || shouldRequeueNicConfig || managerStatus.Status != state.SyncStateReady {
188211
return r.requeue()
189212
}
190213

@@ -206,7 +229,7 @@ func (r *NicClusterPolicyReconciler) handleMOFEDWaitLabels(
206229
reqLogger := log.FromContext(ctx)
207230
if cr.Spec.OFEDDriver == nil {
208231
reqLogger.V(consts.LogLevelDebug).Info("no OFED config in the policy, check OFED wait label on nodes")
209-
return r.handleMOFEDWaitLabelsNoConfig(ctx)
232+
return r.handleWaitLabelsNoConfig(ctx, consts.OfedDriverLabel, nodeinfo.NodeLabelWaitOFED)
210233
}
211234
pods := &corev1.PodList{}
212235
_ = r.Client.List(ctx, pods, client.MatchingLabels{"nvidia.com/ofed-driver": ""})
@@ -224,31 +247,32 @@ func (r *NicClusterPolicyReconciler) handleMOFEDWaitLabels(
224247
"node", pod.Spec.NodeName)
225248
labelValue = "false"
226249
}
227-
if err := setOFEDWaitLabel(ctx, r.Client, pod.Spec.NodeName, labelValue); err != nil {
250+
if err := setNodeLabel(ctx, r.Client, pod.Spec.NodeName, nodeinfo.NodeLabelWaitOFED, labelValue); err != nil {
228251
return false, err
229252
}
230253
}
231254
return false, nil
232255
}
233256

234-
// handleMOFEDWaitLabelsNoConfig handles MOFED wait label for scenarios when OFED is
235-
// not configured in NicClusterPolicy, do the following:
236-
// - set "network.nvidia.com/operator.mofed.wait" to false on Nodes with NVIDIA NICs
237-
// - remove "network.nvidia.com/operator.mofed.wait" which have no NVIDIA NICs anymore
238-
// - set "network.nvidia.com/operator.mofed.wait" to true if detects OFED Pod
239-
// on the node (probably in the terminating state).
257+
// handleWaitLabelsNoConfig handles wait labels for for scenarios when given component is
258+
// not configured in NicClusterPolicy and does the following:
259+
// - sets given label to false on Nodes with NVIDIA NICs
260+
// - removes given label from nodes which have no NVIDIA NICs anymore
261+
// - sets given label to true if detects a pod with a specific label on the node (probably in the terminating state).
240262
// returns true if requeue (resync) is required
241-
func (r *NicClusterPolicyReconciler) handleMOFEDWaitLabelsNoConfig(ctx context.Context) (bool, error) {
263+
//
264+
//nolint:lll
265+
func (r *NicClusterPolicyReconciler) handleWaitLabelsNoConfig(ctx context.Context, podLabel, waitLabel string) (bool, error) {
242266
reqLogger := log.FromContext(ctx)
243-
nodesWithOFEDContainer := map[string]struct{}{}
267+
nodesWithPod := map[string]struct{}{}
244268
pods := &corev1.PodList{}
245-
if err := r.Client.List(ctx, pods, client.MatchingLabels{consts.OfedDriverLabel: ""}); err != nil {
269+
if err := r.Client.List(ctx, pods, client.MatchingLabels{podLabel: ""}); err != nil {
246270
return false, errors.Wrap(err, "failed to list pods")
247271
}
248272
for i := range pods.Items {
249273
pod := pods.Items[i]
250274
if pod.Spec.NodeName != "" {
251-
nodesWithOFEDContainer[pod.Spec.NodeName] = struct{}{}
275+
nodesWithPod[pod.Spec.NodeName] = struct{}{}
252276
}
253277
}
254278
nodes := &corev1.NodeList{}
@@ -258,40 +282,40 @@ func (r *NicClusterPolicyReconciler) handleMOFEDWaitLabelsNoConfig(ctx context.C
258282
for i := range nodes.Items {
259283
node := nodes.Items[i]
260284
labelValue := ""
261-
if _, hasOFED := nodesWithOFEDContainer[node.Name]; hasOFED {
285+
if _, hasPod := nodesWithPod[node.Name]; hasPod {
262286
labelValue = "true"
263287
} else if node.GetLabels()[nodeinfo.NodeLabelMlnxNIC] == "true" {
264288
labelValue = "false"
265289
}
266-
if err := setOFEDWaitLabel(ctx, r.Client, node.Name, labelValue); err != nil {
290+
if err := setNodeLabel(ctx, r.Client, node.Name, waitLabel, labelValue); err != nil {
267291
return false, err
268292
}
269293
}
270-
if len(nodesWithOFEDContainer) > 0 {
271-
// There is no OFED spec in the NicClusterPolicy, but some OFED Pods are on nodes.
294+
if len(nodesWithPod) > 0 {
295+
// There is no given component spec in the NicClusterPolicy, but some pods are on nodes.
272296
// These Pods should be eventually removed from the cluster,
273-
// and we will need to update the OFED wait label for nodes.
297+
// and we will need to update the given wait label for nodes.
274298
// Here, we trigger resync explicitly to ensure that we will always handle the removal of the Pod.
275-
// This explicit resync is required because we don't watch for Pods and can't rely on the OFED DaemonSet
276-
// update in this case (cache with Pods can be outdated when we handle OFED DaemonSet removal event).
299+
// This explicit resync is required because we don't watch for Pods and can't rely on the DaemonSet
300+
// update in this case (cache with Pods can be outdated when we handle DaemonSet removal event).
277301
reqLogger.V(consts.LogLevelDebug).Info(
278-
"no OFED spec in NicClusterPolicy but there are OFED Pods on nodes, requeue")
302+
"no given component spec in NicClusterPolicy but there are pods on nodes, requeue", "component", podLabel)
279303
return true, nil
280304
}
281305
return false, nil
282306
}
283307

284-
// set the value for the OFED wait label, remove the label if the value is ""
285-
func setOFEDWaitLabel(ctx context.Context, c client.Client, node, value string) error {
308+
// setNodeLabel sets the value for the given label, remove the label if the value is ""
309+
func setNodeLabel(ctx context.Context, c client.Client, node, label, value string) error {
286310
reqLogger := log.FromContext(ctx)
287311
var patch []byte
288312
if value == "" {
289-
patch = []byte(fmt.Sprintf(`{"metadata":{"labels":{%q: null}}}`, nodeinfo.NodeLabelWaitOFED))
290-
reqLogger.V(consts.LogLevelDebug).Info("remove OFED wait label from the node", "node", node)
313+
patch = []byte(fmt.Sprintf(`{"metadata":{"labels":{%q: null}}}`, label))
314+
reqLogger.V(consts.LogLevelDebug).Info("remove given label from the node", "node", node, "label", label)
291315
} else {
292-
patch = []byte(fmt.Sprintf(`{"metadata":{"labels":{%q: %q}}}`, nodeinfo.NodeLabelWaitOFED, value))
293-
reqLogger.V(consts.LogLevelDebug).Info("update OFED wait label for the node",
294-
"node", node, "value", value)
316+
patch = []byte(fmt.Sprintf(`{"metadata":{"labels":{%q: %q}}}`, label, value))
317+
reqLogger.V(consts.LogLevelDebug).Info("update given label for the node",
318+
"node", node, "label", label, "value", value)
295319
}
296320

297321
err := c.Patch(ctx, &corev1.Node{
@@ -301,8 +325,7 @@ func setOFEDWaitLabel(ctx context.Context, c client.Client, node, value string)
301325
}, client.RawPatch(types.StrategicMergePatchType, patch))
302326

303327
if err != nil {
304-
return errors.Wrapf(err, "unable to patch %s label for node %s", nodeinfo.NodeLabelWaitOFED,
305-
node)
328+
return errors.Wrapf(err, "unable to patch %s label for node %s", label, node)
306329
}
307330
return nil
308331
}

controllers/nicclusterpolicy_controller_test.go

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ var _ = Describe("NicClusterPolicyReconciler Controller", func() {
228228
})
229229
})
230230
Context("When NicClusterPolicy CR is deleted", func() {
231-
It("should set mofed.wait to false", func() {
231+
It("should set mofed.wait and nic-configuration.wait to false", func() {
232232
By("Create Node")
233233
node := &corev1.Node{
234234
ObjectMeta: metav1.ObjectMeta{
@@ -254,6 +254,20 @@ var _ = Describe("NicClusterPolicyReconciler Controller", func() {
254254
ImagePullSecrets: []string{},
255255
},
256256
},
257+
NicConfigurationOperator: &mellanoxv1alpha1.NicConfigurationOperatorSpec{
258+
Operator: &mellanoxv1alpha1.ImageSpec{
259+
Image: "nic-configuration-operator",
260+
Repository: "nvcr.io/nvidia/mellanox",
261+
Version: "1.0.0",
262+
ImagePullSecrets: []string{},
263+
},
264+
ConfigurationDaemon: &mellanoxv1alpha1.ImageSpec{
265+
Image: "nic-configuration-daemon",
266+
Repository: "nvcr.io/nvidia/mellanox",
267+
Version: "1.0.0",
268+
ImagePullSecrets: []string{},
269+
},
270+
},
257271
},
258272
}
259273

@@ -277,8 +291,8 @@ var _ = Describe("NicClusterPolicyReconciler Controller", func() {
277291
err = k8sClient.Get(context.TODO(), types.NamespacedName{Namespace: node.GetNamespace(), Name: node.GetName()}, n)
278292
Expect(err).NotTo(HaveOccurred())
279293

280-
patch := []byte(fmt.Sprintf(`{"metadata":{"labels":{%q:"true", %q:"true"}}}`,
281-
nodeinfo.NodeLabelWaitOFED, nodeinfo.NodeLabelMlnxNIC))
294+
patch := []byte(fmt.Sprintf(`{"metadata":{"labels":{%q:"true", %q:"true", %q:"true"}}}`,
295+
nodeinfo.NodeLabelWaitOFED, nodeinfo.NodeLabelMlnxNIC, nodeinfo.NodeLabelWaitNicConfig))
282296
err = k8sClient.Patch(context.TODO(), n, client.RawPatch(types.StrategicMergePatchType, patch))
283297
Expect(err).NotTo(HaveOccurred())
284298

@@ -288,7 +302,8 @@ var _ = Describe("NicClusterPolicyReconciler Controller", func() {
288302
if err != nil {
289303
return false
290304
}
291-
return n.ObjectMeta.Labels[nodeinfo.NodeLabelWaitOFED] == "true"
305+
return n.ObjectMeta.Labels[nodeinfo.NodeLabelWaitOFED] == "true" &&
306+
n.ObjectMeta.Labels[nodeinfo.NodeLabelWaitNicConfig] == "true"
292307
}, timeout, interval).Should(BeTrue())
293308

294309
By("Delete NicClusterPolicy")
@@ -305,6 +320,16 @@ var _ = Describe("NicClusterPolicyReconciler Controller", func() {
305320
return n.ObjectMeta.Labels[nodeinfo.NodeLabelWaitOFED] == "false"
306321
}, timeout*3, interval).Should(BeTrue())
307322

323+
By("Verify Nic Configuration Wait Label is false")
324+
Eventually(func() bool {
325+
n := &corev1.Node{}
326+
err = k8sClient.Get(context.TODO(), types.NamespacedName{Namespace: node.GetNamespace(), Name: node.GetName()}, n)
327+
if err != nil {
328+
return false
329+
}
330+
return n.ObjectMeta.Labels[nodeinfo.NodeLabelWaitNicConfig] == "false"
331+
}, timeout*3, interval).Should(BeTrue())
332+
308333
By("Delete Node")
309334
err = k8sClient.Delete(context.TODO(), node)
310335
Expect(err).NotTo(HaveOccurred())

deployment/network-operator/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ sriov-network-operator:
176176
configDaemonNodeSelector:
177177
beta.kubernetes.io/os: "linux"
178178
network.nvidia.com/operator.mofed.wait: "false"
179+
# Enable when using together with NIC Configuration Operator to wait until
180+
# all required FW parameters are successfully applied before configuring SR-IOV
181+
# network.nvidia.com/operator.nic-configuration.wait: "false"
179182

180183
# Maintenance Operator chart related values.
181184
maintenance-operator-chart:

hack/templates/values/values.template

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ sriov-network-operator:
176176
configDaemonNodeSelector:
177177
beta.kubernetes.io/os: "linux"
178178
network.nvidia.com/operator.mofed.wait: "false"
179+
# Enable when using together with NIC Configuration Operator to wait until
180+
# all required FW parameters are successfully applied before configuring SR-IOV
181+
# network.nvidia.com/operator.nic-configuration.wait: "false"
179182

180183
# Maintenance Operator chart related values.
181184
maintenance-operator-chart:

manifests/state-nic-configuration-operator/070-config-daemon.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ spec:
1414
annotations:
1515
kubectl.kubernetes.io/default-container: nic-configuration-daemon
1616
labels:
17+
nvidia.com/nic-configuration-daemon: ""
1718
app.kubernetes.io/name: nic-configuration-daemon
1819
spec:
1920
serviceAccountName: nic-configuration-operator

pkg/consts/consts.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ const (
3434
NicClusterPolicyResourceName = "nic-cluster-policy"
3535
// OfedDriverLabel is the label key for ofed driver Pods and DaemonSets.
3636
OfedDriverLabel = "nvidia.com/ofed-driver"
37+
// NicConfigurationDaemonLabel is the label key for nic-configuration-daemon Pods and DaemonSets.
38+
NicConfigurationDaemonLabel = "nvidia.com/nic-configuration-daemon"
3739
// StateLabel is the label key describing which state the operator created a Kubernetes object from.
3840
StateLabel = "nvidia.network-operator.state"
3941
// DefaultCniBinDirectory is the default location of the CNI binaries on a host.

pkg/nodeinfo/attributes.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ const (
3232
NodeLabelMlnxNIC = "feature.node.kubernetes.io/pci-15b3.present"
3333
NodeLabelNvGPU = "nvidia.com/gpu.present"
3434
NodeLabelWaitOFED = "network.nvidia.com/operator.mofed.wait"
35+
NodeLabelWaitNicConfig = "network.nvidia.com/operator.nic-configuration.wait"
3536
NodeLabelCudaVersionMajor = "nvidia.com/cuda.driver.major"
3637
NodeLabelOSTreeVersion = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"
3738
)

0 commit comments

Comments
 (0)