Skip to content

Commit a55b516

Browse files
authored
Merge pull request #390 from NVIDIA/add-r580
add driver 580.65.06
2 parents 8a313b5 + b8f5eaa commit a55b516

File tree

16 files changed

+285
-79
lines changed

16 files changed

+285
-79
lines changed

.common-ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ default:
1919
command: ["--experimental"]
2020

2121
variables:
22-
DRIVER_VERSIONS: 535.261.03 570.172.08
22+
DRIVER_VERSIONS: 535.261.03 570.172.08 580.65.06
2323
BUILD_MULTI_ARCH_IMAGES: "true"
2424

2525
stages:
@@ -81,13 +81,13 @@ trigger-pipeline:
8181
.driver-versions:
8282
parallel:
8383
matrix:
84-
- DRIVER_VERSION: [535.261.03, 570.172.08]
84+
- DRIVER_VERSION: [535.261.03, 570.172.08, 580.65.06]
8585

8686
# Define the driver versions for jobs that can be run in parallel
8787
.driver-versions-ubuntu24.04:
8888
parallel:
8989
matrix:
90-
- DRIVER_VERSION: [570.172.08]
90+
- DRIVER_VERSION: [570.172.08, 580.65.06]
9191

9292
# Define the matrix of precompiled jobs that can be run in parallel for ubuntu22.04
9393
.driver-versions-precompiled-ubuntu22.04:

.github/workflows/image.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ jobs:
3636
driver:
3737
- 535.261.03
3838
- 570.172.08
39+
- 580.65.06
3940
dist:
4041
- ubuntu20.04
4142
- ubuntu22.04
@@ -50,6 +51,8 @@ jobs:
5051
driver: 535.261.03
5152
- dist: ubuntu24.04
5253
driver: 535.261.03
54+
- dist: ubuntu20.04
55+
driver: 580.65.06
5356
fail-fast: false
5457
steps:
5558
- uses: actions/checkout@v4

.nvidia-ci.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -591,16 +591,16 @@ sign:ngc-ubuntu-rhel-rhcos:
591591
matrix:
592592
- SIGN_JOB_NAME: ["ubuntu"]
593593
VERSION: ["24.04"]
594-
DRIVER_VERSION: ["570.172.08"]
594+
DRIVER_VERSION: ["570.172.08", "580.65.06"]
595595
- SIGN_JOB_NAME: ["ubuntu"]
596596
VERSION: ["22.04"]
597-
DRIVER_VERSION: ["535.261.03", "570.172.08"]
597+
DRIVER_VERSION: ["535.261.03", "570.172.08", "580.65.06"]
598598
- SIGN_JOB_NAME: ["ubuntu"]
599599
VERSION: ["20.04"]
600-
DRIVER_VERSION: ["535.261.03", "570.172.08"]
600+
DRIVER_VERSION: ["535.261.03", "570.172.08", "580.65.06"]
601601
- SIGN_JOB_NAME: ["rhel"]
602602
VERSION: ["8.8", "8.10", "9.4", "9.5", "9.6"]
603-
DRIVER_VERSION: ["535.261.03", "570.172.08"]
603+
DRIVER_VERSION: ["535.261.03", "570.172.08", "580.65.06"]
604604
- SIGN_JOB_NAME: ["rhcos"]
605-
VERSION: ["4.12", "4.13", "4.14", "4.15", "4.16", "4.17", "4.18"]
606-
DRIVER_VERSION: ["535.261.03", "570.172.08"]
605+
VERSION: ["4.14", "4.15", "4.16", "4.17", "4.18"]
606+
DRIVER_VERSION: ["535.261.03", "570.172.08", "580.65.06"]

rhel8/Dockerfile

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -62,31 +62,14 @@ RUN sh /tmp/install.sh depinstall && \
6262

6363
ADD drivers drivers/
6464

65-
# Fetch the installer, fabricmanager and libnvidia-nscq automatically for passthrough/baremetal types
6665
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
6766
cd drivers && \
6867
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} && \
6968
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
70-
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
71-
versionArray=(${DRIVER_VERSION//./ }); \
72-
DRIVER_BRANCH=${versionArray[0]}; \
73-
if [ ${versionArray[0]} -ge 470 ] || ([ ${versionArray[0]} == 460 ] && [ ${versionArray[1]} -ge 91 ]); then \
74-
fmPackage=nvidia-fabric-manager-${DRIVER_VERSION}-1; \
75-
else \
76-
fmPackage=nvidia-fabricmanager-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; \
77-
fi; \
78-
nscqPackage=libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; \
79-
dnf install -y ${fmPackage} ${nscqPackage}; fi
80-
81-
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$DRIVER_BRANCH" -ge "550" ]; then \
82-
dnf install -y nvidia-imex-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi
83-
84-
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$DRIVER_BRANCH" -ge "570" ]; then \
85-
dnf install -y infiniband-diags nvlsm; fi
86-
87-
# libnvsdm packages are not available for arm64
88-
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$DRIVER_BRANCH" -ge "570" ] && [ "$TARGETARCH" != "arm64" ]; then \
89-
dnf install -y libnvsdm-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi
69+
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run; fi
70+
71+
# Fetch the installer, fabricmanager, libnvidia-nscq, libnvsdm, imex packages
72+
RUN sh /tmp/install.sh extrapkgsinstall
9073

9174
COPY nvidia-driver /usr/local/bin
9275
COPY ocp_dtk_entrypoint /usr/local/bin

rhel8/install.sh

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,67 @@ nvidia_installer () {
8888
fi
8989
}
9090

91+
fabricmanager_install() {
92+
if [ "$DRIVER_BRANCH" -ge "580" ]; then
93+
dnf install -y nvidia-fabricmanager-${DRIVER_VERSION}-1
94+
else
95+
dnf install -y nvidia-fabric-manager-${DRIVER_VERSION}-1
96+
fi
97+
}
98+
99+
nscq_install() {
100+
if [ "$DRIVER_BRANCH" -ge "580" ]; then
101+
dnf install -y libnvidia-nscq-${DRIVER_VERSION}-1
102+
else
103+
dnf install -y libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1
104+
fi
105+
}
106+
107+
nvsdm_install() {
108+
if [ "$TARGETARCH" = "amd64" ]; then
109+
if [ "$DRIVER_BRANCH" -ge "580" ]; then
110+
dnf install -y libnvsdm-${DRIVER_VERSION}-1
111+
return 0
112+
fi
113+
if [ "$DRIVER_BRANCH" -ge "570" ]; then
114+
dnf install -y libnvsdm-${DRIVER_BRANCH}-${DRIVER_VERSION}-1
115+
return 0
116+
fi
117+
fi
118+
}
119+
120+
nvlink5_pkgs_install() {
121+
if [ "$DRIVER_BRANCH" -ge "550" ]; then
122+
dnf install -y infiniband-diags nvlsm
123+
fi
124+
}
125+
126+
imex_install() {
127+
if [ "$DRIVER_BRANCH" -ge "580" ]; then
128+
dnf install -y nvidia-imex-${DRIVER_VERSION}-1
129+
elif [ "$DRIVER_BRANCH" -ge "550" ]; then
130+
dnf install -y nvidia-imex-${DRIVER_BRANCH}-${DRIVER_VERSION}-1
131+
fi
132+
}
133+
134+
extra_pkgs_install() {
135+
if [ "$DRIVER_TYPE" != "vgpu" ]; then
136+
dnf module enable -y nvidia-driver:${DRIVER_BRANCH}-dkms
137+
138+
fabricmanager_install
139+
nscq_install
140+
nvsdm_install
141+
nvlink5_pkgs_install
142+
imex_install
143+
fi
144+
}
145+
91146
if [ "$1" = "nvinstall" ]; then
92147
nvidia_installer
93148
elif [ "$1" = "depinstall" ]; then
94149
dep_installer
150+
elif [ "$1" = "extrapkgsinstall" ]; then
151+
extra_pkgs_install
95152
else
96153
echo "Unknown function: $1"
97154
fi

rhel8/nvidia-driver

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,12 @@ _ensure_nvlink5_prerequisites() (
304304
# the correct set of parameters are passed to 'modprobe'.
305305
_get_module_params() {
306306
local base_path="/drivers"
307+
308+
# Starting from R580, we need to enable the CDMM (Coherent Driver Memory Management) module parameter.
309+
# This prevents the GPU memory for coherent systems (GH200, GB200 etc) from being exposed as a NUMA node
310+
# and thereby preventing over-reporting of a Kubernetes node's memory. This is needed for Kubernetes use-cases
311+
NVIDIA_MODULE_PARAMS+=("NVreg_CoherentGPUMemoryMode=driver")
312+
307313
# nvidia
308314
if [ -f "${base_path}/nvidia.conf" ]; then
309315
while IFS="" read -r param || [ -n "$param" ]; do

rhel9/Dockerfile

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -66,21 +66,10 @@ RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
6666
cd drivers && \
6767
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} && \
6868
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
69-
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
70-
versionArray=(${DRIVER_VERSION//./ }); \
71-
DRIVER_BRANCH=${versionArray[0]}; \
72-
dnf module enable -y nvidia-driver:${DRIVER_BRANCH}-dkms && \
73-
dnf install -y nvidia-fabric-manager-${DRIVER_VERSION}-1 libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi
69+
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run; fi
7470

75-
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$DRIVER_BRANCH" -ge "550" ]; then \
76-
dnf install -y nvidia-imex-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi
77-
78-
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$DRIVER_BRANCH" -ge "570" ]; then \
79-
dnf install -y infiniband-diags nvlsm; fi
80-
81-
# libnvsdm packages are not available for arm64
82-
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$DRIVER_BRANCH" -ge "570" ] && [ "$TARGETARCH" != "arm64" ]; then \
83-
dnf install -y libnvsdm-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi
71+
# Fetch the installer, fabricmanager, libnvidia-nscq, libnvsdm, imex packages
72+
RUN sh /tmp/install.sh extrapkgsinstall
8473

8574
COPY nvidia-driver /usr/local/bin
8675
COPY ocp_dtk_entrypoint /usr/local/bin

rhel9/install.sh

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,68 @@ nvidia_installer () {
9090
fi
9191
}
9292

93+
fabricmanager_install() {
94+
if [ "$DRIVER_BRANCH" -ge "580" ]; then
95+
dnf install -y nvidia-fabricmanager-${DRIVER_VERSION}-1
96+
else
97+
dnf install -y nvidia-fabric-manager-${DRIVER_VERSION}-1
98+
fi
99+
}
100+
101+
nscq_install() {
102+
if [ "$DRIVER_BRANCH" -ge "580" ]; then
103+
dnf install -y libnvidia-nscq-${DRIVER_VERSION}-1
104+
else
105+
dnf install -y libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1
106+
fi
107+
}
108+
109+
# libnvsdm packages are not available for arm64
110+
nvsdm_install() {
111+
if [ "$TARGETARCH" = "amd64" ]; then
112+
if [ "$DRIVER_BRANCH" -ge "580" ]; then
113+
dnf install -y libnvsdm-${DRIVER_VERSION}-1
114+
return 0
115+
fi
116+
if [ "$DRIVER_BRANCH" -ge "570" ]; then
117+
dnf install -y libnvsdm-${DRIVER_BRANCH}-${DRIVER_VERSION}-1
118+
return 0
119+
fi
120+
fi
121+
}
122+
123+
nvlink5_pkgs_install() {
124+
if [ "$DRIVER_BRANCH" -ge "550" ]; then
125+
dnf install -y infiniband-diags nvlsm
126+
fi
127+
}
128+
129+
imex_install() {
130+
if [ "$DRIVER_BRANCH" -ge "580" ]; then
131+
dnf install -y nvidia-imex-${DRIVER_VERSION}-1
132+
elif [ "$DRIVER_BRANCH" -ge "550" ]; then
133+
dnf install -y nvidia-imex-${DRIVER_BRANCH}-${DRIVER_VERSION}-1
134+
fi
135+
}
136+
137+
extra_pkgs_install() {
138+
if [ "$DRIVER_TYPE" != "vgpu" ]; then
139+
dnf module enable -y nvidia-driver:${DRIVER_BRANCH}-dkms
140+
141+
fabricmanager_install
142+
nscq_install
143+
nvsdm_install
144+
nvlink5_pkgs_install
145+
imex_install
146+
fi
147+
}
148+
93149
if [ "$1" = "nvinstall" ]; then
94150
nvidia_installer
95151
elif [ "$1" = "depinstall" ]; then
96152
dep_installer
153+
elif [ "$1" = "extrapkgsinstall" ]; then
154+
extra_pkgs_install
97155
else
98156
echo "Unknown function: $1"
99157
fi

rhel9/nvidia-driver

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,12 @@ _ensure_nvlink5_prerequisites() (
318318
# the correct set of parameters are passed to 'modprobe'.
319319
_get_module_params() {
320320
local base_path="/drivers"
321+
322+
# Starting from R580, we need to enable the CDMM (Coherent Driver Memory Management) module parameter.
323+
# This prevents the GPU memory for coherent systems (GH200, GB200 etc) from being exposed as a NUMA node
324+
# and thereby preventing over-reporting of a Kubernetes node's memory. This is needed for Kubernetes use-cases
325+
NVIDIA_MODULE_PARAMS+=("NVreg_CoherentGPUMemoryMode=driver")
326+
321327
# nvidia
322328
if [ -f "${base_path}/nvidia.conf" ]; then
323329
while IFS="" read -r param || [ -n "$param" ]; do

ubuntu22.04/Dockerfile

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -78,23 +78,9 @@ ADD drivers drivers/
7878
# Fetch the installer, fabricmanager and libnvidia-nscq automatically for passthrough/baremetal types
7979
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
8080
cd drivers && \
81-
/tmp/install.sh download_installer && \
82-
apt-get update && \
83-
apt-get install -y --no-install-recommends nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
84-
libnvidia-nscq-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 && \
85-
rm -rf /var/lib/apt/lists/*; fi
86-
87-
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$DRIVER_BRANCH" -ge "550" ]; then \
88-
apt-get update && \
89-
apt-get install -y --no-install-recommends nvlsm infiniband-diags \
90-
nvidia-imex-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 && \
91-
rm -rf /var/lib/apt/lists/*; fi
92-
93-
# libnvsdm packages are not available for arm64
94-
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$DRIVER_BRANCH" -ge "560" ] && [ "$TARGETARCH" != "arm64" ]; then \
95-
apt-get update && \
96-
apt-get install -y --no-install-recommends libnvsdm-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 && \
97-
rm -rf /var/lib/apt/lists/*; fi
81+
/tmp/install.sh download_installer; fi
82+
83+
RUN /tmp/install.sh extra_pkgs_install
9884

9985
WORKDIR /drivers
10086

0 commit comments

Comments
 (0)