Skip to content

Commit 852dbed

Browse files
authored
Merge pull request #284 from NVIDIA/precompiled-b200
[ubuntu-precompiled] add support for HGX B200
2 parents 29cad1d + 72f1861 commit 852dbed

File tree

4 files changed

+111
-3
lines changed

4 files changed

+111
-3
lines changed

ubuntu22.04/precompiled/local-repo.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ download_driver_package_deps () {
4040
download_apt_with_dep nvidia-fabricmanager-${DRIVER_BRANCH} ${DRIVER_VERSION}-1
4141
download_apt_with_dep libnvidia-nscq-${DRIVER_BRANCH} ${DRIVER_VERSION}-1
4242

43+
if [ "$DRIVER_BRANCH" -ge "550" ]; then
44+
download_apt_with_dep nvlsm
45+
download_apt_with_dep infiniband-diags
46+
download_apt_with_dep nvidia-imex-${DRIVER_BRANCH} ${DRIVER_VERSION}-1
47+
fi
48+
49+
if [ "$DRIVER_BRANCH" -ge "560" ]; then
50+
download_apt_with_dep libnvsdm-${DRIVER_BRANCH} ${DRIVER_VERSION}-1
51+
fi
52+
4353
ls -al .
4454
popd
4555
}

ubuntu22.04/precompiled/nvidia-driver

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,31 @@ _assert_nvswitch_system() {
3131
return 0
3232
}
3333

34+
_assert_nvlink5_system() (
35+
for dir in /sys/class/infiniband/*/device; do
36+
# Define the path to the VPD file
37+
vpd_file="$dir/vpd"
38+
39+
# Check if the VPD file exists
40+
if [ -f "$vpd_file" ]; then
41+
# Search for 'SW_MNG' in the VPD file
42+
if grep -q "SW_MNG" "$vpd_file"; then
43+
echo "Detected NVLink5+ system"
44+
return 0
45+
fi
46+
fi
47+
done
48+
return 1
49+
)
50+
51+
_ensure_nvlink5_prerequisites() (
52+
until lsmod | grep mlx5_core > /dev/null 2>&1 && lsmod | grep ib_umad > /dev/null 2>&1;
53+
do
54+
echo "waiting for the mlx5_core and ib_umad kernel modules to be loaded"
55+
sleep 10
56+
done
57+
)
58+
3459
# Check if mellanox devices are present
3560
_mellanox_devices_present() {
3661
devices_found=0
@@ -143,12 +168,31 @@ _load_driver() {
143168
nvidia-persistenced --persistence-mode
144169

145170
DRIVER_VERSION=$(nvidia-smi -q | grep "Driver Version" | awk -F: '{print $2}' | xargs)
146-
if _assert_nvswitch_system; then
171+
if _assert_nvlink5_system; then
172+
_ensure_nvlink5_prerequisites || return 1
173+
174+
echo "Installing NVIDIA fabric manager, libnvsdm and nvlsm packages..."
175+
apt-get install -y --no-install-recommends \
176+
infiniband-diags \
177+
nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
178+
libnvsdm-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
179+
nvlsm
180+
181+
echo "Starting NVIDIA fabric manager daemon for NVLink5+..."
182+
183+
fm_config_file=/usr/share/nvidia/nvswitch/fabricmanager.cfg
184+
fm_pid_file=/var/run/nvidia-fabricmanager/nv-fabricmanager.pid
185+
nvlsm_config_file=/usr/share/nvidia/nvlsm/nvlsm.conf
186+
nvlsm_pid_file=/var/run/nvidia-fabricmanager/nvlsm.pid
187+
/usr/bin/nvidia-fabricmanager-start.sh $fm_config_file $fm_pid_file $nvlsm_config_file $nvlsm_pid_file
188+
189+
# If not a NVLink5+ switch, check for the presence of NVLink4 (or below) switches
190+
elif _assert_nvswitch_system; then
147191
echo "Installing NVIDIA fabric manager and libnvidia NSCQ packages..."
148192
apt-get install -y --no-install-recommends \
149193
nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
150194
libnvidia-nscq-${DRIVER_BRANCH}=${DRIVER_VERSION}-1
151-
195+
152196
echo "Starting NVIDIA fabric manager daemon..."
153197
nv-fabricmanager -c /usr/share/nvidia/nvswitch/fabricmanager.cfg
154198
fi

ubuntu24.04/precompiled/local-repo.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ download_driver_package_deps () {
4040
download_apt_with_dep nvidia-fabricmanager-${DRIVER_BRANCH} ${DRIVER_VERSION}-1
4141
download_apt_with_dep libnvidia-nscq-${DRIVER_BRANCH} ${DRIVER_VERSION}-1
4242

43+
if [ "$DRIVER_BRANCH" -ge "550" ]; then
44+
download_apt_with_dep nvlsm
45+
download_apt_with_dep infiniband-diags
46+
download_apt_with_dep nvidia-imex-${DRIVER_BRANCH} ${DRIVER_VERSION}-1
47+
fi
48+
49+
if [ "$DRIVER_BRANCH" -ge "560" ]; then
50+
download_apt_with_dep libnvsdm-${DRIVER_BRANCH} ${DRIVER_VERSION}-1
51+
fi
52+
4353
ls -al .
4454
popd
4555
}

ubuntu24.04/precompiled/nvidia-driver

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,31 @@ _assert_nvswitch_system() {
3131
return 0
3232
}
3333

34+
_assert_nvlink5_system() (
35+
for dir in /sys/class/infiniband/*/device; do
36+
# Define the path to the VPD file
37+
vpd_file="$dir/vpd"
38+
39+
# Check if the VPD file exists
40+
if [ -f "$vpd_file" ]; then
41+
# Search for 'SW_MNG' in the VPD file
42+
if grep -q "SW_MNG" "$vpd_file"; then
43+
echo "Detected NVLink5+ system"
44+
return 0
45+
fi
46+
fi
47+
done
48+
return 1
49+
)
50+
51+
_ensure_nvlink5_prerequisites() (
52+
until lsmod | grep mlx5_core > /dev/null 2>&1 && lsmod | grep ib_umad > /dev/null 2>&1;
53+
do
54+
echo "waiting for the mlx5_core and ib_umad kernel modules to be loaded"
55+
sleep 10
56+
done
57+
)
58+
3459
# Check if mellanox devices are present
3560
_mellanox_devices_present() {
3661
devices_found=0
@@ -143,7 +168,26 @@ _load_driver() {
143168
nvidia-persistenced --persistence-mode
144169

145170
DRIVER_VERSION=$(nvidia-smi -q | grep "Driver Version" | awk -F: '{print $2}' | xargs)
146-
if _assert_nvswitch_system; then
171+
if _assert_nvlink5_system; then
172+
_ensure_nvlink5_prerequisites || return 1
173+
174+
echo "Installing NVIDIA fabric manager, libnvsdm and nvlsm packages..."
175+
apt-get install -y --no-install-recommends \
176+
infiniband-diags \
177+
nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
178+
libnvsdm-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
179+
nvlsm
180+
181+
echo "Starting NVIDIA fabric manager daemon for NVLink5+..."
182+
183+
fm_config_file=/usr/share/nvidia/nvswitch/fabricmanager.cfg
184+
fm_pid_file=/var/run/nvidia-fabricmanager/nv-fabricmanager.pid
185+
nvlsm_config_file=/usr/share/nvidia/nvlsm/nvlsm.conf
186+
nvlsm_pid_file=/var/run/nvidia-fabricmanager/nvlsm.pid
187+
/usr/bin/nvidia-fabricmanager-start.sh $fm_config_file $fm_pid_file $nvlsm_config_file $nvlsm_pid_file
188+
189+
# If not a NVLink5+ switch, check for the presence of NVLink4 (or below) switches
190+
elif _assert_nvswitch_system; then
147191
echo "Installing NVIDIA fabric manager and libnvidia NSCQ packages..."
148192
apt-get install -y --no-install-recommends \
149193
nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \

0 commit comments

Comments
 (0)