Skip to content

Commit 30601b5

Browse files
authored
fix: support env vars for NIC operator (#1827)
We need that for the DGX use case to support mlxfwreset before rebooting the node
2 parents 806fa45 + f42b07f commit 30601b5

11 files changed

+316
-0
lines changed

api/v1alpha1/nicclusterpolicy_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,8 @@ type NicConfigurationOperatorSpec struct {
322322
Operator *ImageSpec `json:"operator"`
323323
// Image information for nic-configuration-daemon
324324
ConfigurationDaemon *ImageSpec `json:"configurationDaemon"`
325+
// List of environment variables to set in the NIC Configuration Operator and NIC Configuration Daemon containers.
326+
Env []v1.EnvVar `json:"env,omitempty"`
325327
// NicFirmwareStorage contains configuration for the NIC firmware storage. If not provided, the NIC firmware storage will not be configured.
326328
NicFirmwareStorage *NicFirmwareStorageSpec `json:"nicFirmwareStorage,omitempty"`
327329
// LogLevel sets the verbosity level of the logs. info|debug

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/mellanox.com_nicclusterpolicies.yaml

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,127 @@ spec:
526526
- repository
527527
- version
528528
type: object
529+
env:
530+
description: List of environment variables to set in the NIC Configuration
531+
Operator and NIC Configuration Daemon containers.
532+
items:
533+
description: EnvVar represents an environment variable present
534+
in a Container.
535+
properties:
536+
name:
537+
description: Name of the environment variable. Must be a
538+
C_IDENTIFIER.
539+
type: string
540+
value:
541+
description: |-
542+
Variable references $(VAR_NAME) are expanded
543+
using the previously defined environment variables in the container and
544+
any service environment variables. If a variable cannot be resolved,
545+
the reference in the input string will be unchanged. Double $$ are reduced
546+
to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
547+
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
548+
Escaped references will never be expanded, regardless of whether the variable
549+
exists or not.
550+
Defaults to "".
551+
type: string
552+
valueFrom:
553+
description: Source for the environment variable's value.
554+
Cannot be used if value is not empty.
555+
properties:
556+
configMapKeyRef:
557+
description: Selects a key of a ConfigMap.
558+
properties:
559+
key:
560+
description: The key to select.
561+
type: string
562+
name:
563+
default: ""
564+
description: |-
565+
Name of the referent.
566+
This field is effectively required, but due to backwards compatibility is
567+
allowed to be empty. Instances of this type with an empty value here are
568+
almost certainly wrong.
569+
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
570+
type: string
571+
optional:
572+
description: Specify whether the ConfigMap or its
573+
key must be defined
574+
type: boolean
575+
required:
576+
- key
577+
type: object
578+
x-kubernetes-map-type: atomic
579+
fieldRef:
580+
description: |-
581+
Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
582+
spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
583+
properties:
584+
apiVersion:
585+
description: Version of the schema the FieldPath
586+
is written in terms of, defaults to "v1".
587+
type: string
588+
fieldPath:
589+
description: Path of the field to select in the
590+
specified API version.
591+
type: string
592+
required:
593+
- fieldPath
594+
type: object
595+
x-kubernetes-map-type: atomic
596+
resourceFieldRef:
597+
description: |-
598+
Selects a resource of the container: only resources limits and requests
599+
(limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
600+
properties:
601+
containerName:
602+
description: 'Container name: required for volumes,
603+
optional for env vars'
604+
type: string
605+
divisor:
606+
anyOf:
607+
- type: integer
608+
- type: string
609+
description: Specifies the output format of the
610+
exposed resources, defaults to "1"
611+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
612+
x-kubernetes-int-or-string: true
613+
resource:
614+
description: 'Required: resource to select'
615+
type: string
616+
required:
617+
- resource
618+
type: object
619+
x-kubernetes-map-type: atomic
620+
secretKeyRef:
621+
description: Selects a key of a secret in the pod's
622+
namespace
623+
properties:
624+
key:
625+
description: The key of the secret to select from. Must
626+
be a valid secret key.
627+
type: string
628+
name:
629+
default: ""
630+
description: |-
631+
Name of the referent.
632+
This field is effectively required, but due to backwards compatibility is
633+
allowed to be empty. Instances of this type with an empty value here are
634+
almost certainly wrong.
635+
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
636+
type: string
637+
optional:
638+
description: Specify whether the Secret or its key
639+
must be defined
640+
type: boolean
641+
required:
642+
- key
643+
type: object
644+
x-kubernetes-map-type: atomic
645+
type: object
646+
required:
647+
- name
648+
type: object
649+
type: array
529650
logLevel:
530651
default: info
531652
description: LogLevel sets the verbosity level of the logs. info|debug

deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,127 @@ spec:
526526
- repository
527527
- version
528528
type: object
529+
env:
530+
description: List of environment variables to set in the NIC Configuration
531+
Operator and NIC Configuration Daemon containers.
532+
items:
533+
description: EnvVar represents an environment variable present
534+
in a Container.
535+
properties:
536+
name:
537+
description: Name of the environment variable. Must be a
538+
C_IDENTIFIER.
539+
type: string
540+
value:
541+
description: |-
542+
Variable references $(VAR_NAME) are expanded
543+
using the previously defined environment variables in the container and
544+
any service environment variables. If a variable cannot be resolved,
545+
the reference in the input string will be unchanged. Double $$ are reduced
546+
to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
547+
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
548+
Escaped references will never be expanded, regardless of whether the variable
549+
exists or not.
550+
Defaults to "".
551+
type: string
552+
valueFrom:
553+
description: Source for the environment variable's value.
554+
Cannot be used if value is not empty.
555+
properties:
556+
configMapKeyRef:
557+
description: Selects a key of a ConfigMap.
558+
properties:
559+
key:
560+
description: The key to select.
561+
type: string
562+
name:
563+
default: ""
564+
description: |-
565+
Name of the referent.
566+
This field is effectively required, but due to backwards compatibility is
567+
allowed to be empty. Instances of this type with an empty value here are
568+
almost certainly wrong.
569+
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
570+
type: string
571+
optional:
572+
description: Specify whether the ConfigMap or its
573+
key must be defined
574+
type: boolean
575+
required:
576+
- key
577+
type: object
578+
x-kubernetes-map-type: atomic
579+
fieldRef:
580+
description: |-
581+
Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
582+
spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
583+
properties:
584+
apiVersion:
585+
description: Version of the schema the FieldPath
586+
is written in terms of, defaults to "v1".
587+
type: string
588+
fieldPath:
589+
description: Path of the field to select in the
590+
specified API version.
591+
type: string
592+
required:
593+
- fieldPath
594+
type: object
595+
x-kubernetes-map-type: atomic
596+
resourceFieldRef:
597+
description: |-
598+
Selects a resource of the container: only resources limits and requests
599+
(limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
600+
properties:
601+
containerName:
602+
description: 'Container name: required for volumes,
603+
optional for env vars'
604+
type: string
605+
divisor:
606+
anyOf:
607+
- type: integer
608+
- type: string
609+
description: Specifies the output format of the
610+
exposed resources, defaults to "1"
611+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
612+
x-kubernetes-int-or-string: true
613+
resource:
614+
description: 'Required: resource to select'
615+
type: string
616+
required:
617+
- resource
618+
type: object
619+
x-kubernetes-map-type: atomic
620+
secretKeyRef:
621+
description: Selects a key of a secret in the pod's
622+
namespace
623+
properties:
624+
key:
625+
description: The key of the secret to select from. Must
626+
be a valid secret key.
627+
type: string
628+
name:
629+
default: ""
630+
description: |-
631+
Name of the referent.
632+
This field is effectively required, but due to backwards compatibility is
633+
allowed to be empty. Instances of this type with an empty value here are
634+
almost certainly wrong.
635+
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
636+
type: string
637+
optional:
638+
description: Specify whether the Secret or its key
639+
must be defined
640+
type: boolean
641+
required:
642+
- key
643+
type: object
644+
x-kubernetes-map-type: atomic
645+
type: object
646+
required:
647+
- name
648+
type: object
649+
type: array
529650
logLevel:
530651
default: info
531652
description: LogLevel sets the verbosity level of the logs. info|debug

example/crs/mellanox.com_v1alpha1_nicclusterpolicy_cr-full-ocp.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ spec:
108108
image: nic-configuration-operator-daemon
109109
repository: nvcr.io/nvstaging/mellanox
110110
version: network-operator-v25.10.0-beta.5
111+
# Uncomment to explicitely reset the NIC's Firmware before the reboot and after updating its non-volatile configuration.
112+
# Might be required on DGX servers where configuration update is not successfully applied after the warm reboot.
113+
# env:
114+
# - name: "FW_RESET_AFTER_CONFIG_UPDATE"
115+
# value: "true"
111116
nicFirmwareStorage:
112117
create: true
113118
pvcName: nic-fw-storage-pvc

example/crs/mellanox.com_v1alpha1_nicclusterpolicy_cr-full.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ spec:
120120
image: nic-configuration-operator-daemon
121121
repository: nvcr.io/nvstaging/mellanox
122122
version: network-operator-v25.10.0-beta.5
123+
# Uncomment to explicitely reset the NIC's Firmware before the reboot and after updating its non-volatile configuration.
124+
# Might be required on DGX servers where configuration update is not successfully applied after the warm reboot.
125+
# env:
126+
# - name: "FW_RESET_AFTER_CONFIG_UPDATE"
127+
# value: "true"
123128
nicFirmwareStorage:
124129
create: true
125130
pvcName: nic-fw-storage-pvc

hack/templates/crs/mellanox.com_v1alpha1_nicclusterpolicy_cr-full-ocp.template

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ spec:
108108
image: {{ .NicConfigurationConfigDaemon.Image }}
109109
repository: {{ .NicConfigurationConfigDaemon.Repository }}
110110
version: {{ .NicConfigurationConfigDaemon.Version }}
111+
# Uncomment to explicitely reset the NIC's Firmware before the reboot and after updating its non-volatile configuration.
112+
# Might be required on DGX servers where configuration update is not successfully applied after the warm reboot.
113+
# env:
114+
# - name: "FW_RESET_AFTER_CONFIG_UPDATE"
115+
# value: "true"
111116
nicFirmwareStorage:
112117
create: true
113118
pvcName: nic-fw-storage-pvc

hack/templates/crs/mellanox.com_v1alpha1_nicclusterpolicy_cr-full.template

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ spec:
120120
image: {{ .NicConfigurationConfigDaemon.Image }}
121121
repository: {{ .NicConfigurationConfigDaemon.Repository }}
122122
version: {{ .NicConfigurationConfigDaemon.Version }}
123+
# Uncomment to explicitely reset the NIC's Firmware before the reboot and after updating its non-volatile configuration.
124+
# Might be required on DGX servers where configuration update is not successfully applied after the warm reboot.
125+
# env:
126+
# - name: "FW_RESET_AFTER_CONFIG_UPDATE"
127+
# value: "true"
123128
nicFirmwareStorage:
124129
create: true
125130
pvcName: nic-fw-storage-pvc

manifests/state-nic-configuration-operator/060-operator.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ spec:
8585
env:
8686
- name: LOG_LEVEL
8787
value: {{ .CrSpec.LogLevel }}
88+
{{- if .CrSpec.Env }}
89+
{{- range .CrSpec.Env }}
90+
{{ . | yaml | nindentPrefix 14 "- " }}
91+
{{- end }}
92+
{{- end }}
8893
livenessProbe:
8994
httpGet:
9095
path: /healthz

manifests/state-nic-configuration-operator/070-config-daemon.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ spec:
8080
fieldPath: metadata.namespace
8181
- name: LOG_LEVEL
8282
value: {{ .CrSpec.LogLevel }}
83+
{{- if .CrSpec.Env }}
84+
{{- range .CrSpec.Env }}
85+
{{ . | yaml | nindentPrefix 14 "- " }}
86+
{{- end }}
87+
{{- end }}
8388
volumeMounts:
8489
- name: sys
8590
mountPath: /sys

0 commit comments

Comments
 (0)