Skip to content

Commit 00afcd9

Browse files
authored
No service interrupt for DPU-BMC flow (#289)
* Add bf-pldm-ver script to get all package version information from PLDM image * Update configure.ac based on comments * Update rshim.spec.in * Update bf-pldm-ver * Update bf-pldm-ver * Update bf-pldm-ver * Update rshim.spec.in * Update bf-pldm-ver * Fix CI/CD RPM build errors: /usr/sbin/bf-pldm-ver Installed (but unpackaged) file(s) found: /usr/sbin/bf-pldm-ver * no-service-interruption #4378041 * Update bfb-install Remove debug console output * Update bfb-install Update to removed debug code and missing if statement * Update bfb-install Remove unneeded code. * Update bfb-install Update cleanup to set RSHIM.SWINT.BIT2 for DPU mode. removed unneeded exit accidentally brough back.
1 parent 68e3c03 commit 00afcd9

File tree

2 files changed

+38
-37
lines changed

2 files changed

+38
-37
lines changed

rshim.spec.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ fi
155155
%{_sbindir}/bf-reg
156156
%{_sbindir}/bf-pldm-ver
157157
%{_sbindir}/fwpkg_unpack.py
158+
%{_sbindir}/bf-pldm-ver
158159
%{_mandir}/man1/mlx-mkbfb.1.gz
159160
%{_mandir}/man8/rshim.8.gz
160161
%{_mandir}/man8/bfb-install.8.gz

scripts/bfb-install

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/bin/bash
2-
32
# Copyright (c) 2020, NVIDIA Corporation
43
# All rights reserved.
54
#
@@ -48,18 +47,18 @@ usage ()
4847
{
4948
echo "Usage: $0 [options]"
5049
echo "Options:"
51-
echo " -a, --activate <0|1> Activate the upgrade."
50+
echo " -a, --apply-lfwp <0|1> Apply the upgrade. Default is 0: not to apply."
5251
echo " -b, --bfb <bfb_file> BFB image file to use."
5352
echo " -c, --config <config_file> Optional configuration file."
53+
echo " -d, --deferred Deferred activation (local rshim only: formerly runtime)."
5454
echo " -f, --rootfs <rootfs_file> Optional rootfs file."
5555
echo " -h, --help Show help message."
5656
echo " -k, --keep-log Do not clear the log after reading during bfb install."
57-
echo " -l, --lfwp Enable LFWP upgrade."
5857
echo " -m, --remote-mode <mode> Remote mode to use (scp, nc, ncpipe)."
59-
echo " -p, --pldm <pldm_file> PLDM image for runtime upgrade."
58+
echo " -p, --pldm <pldm_file> PLDM image for deferred upgrade."
6059
echo " -r, --rshim <device> Rshim device, format [<ip>:<port>:]rshim<N>."
6160
echo " -R, --reverse-nc Reverse netcat mode."
62-
echo " -u, --runtime Runtime upgrade (local rshim only)."
61+
echo " -u, --runtime Runtime upgrade (local rshim only: same as deferred)."
6362
echo " -v, --verbose Enable verbose output."
6463
}
6564

@@ -201,7 +200,7 @@ push_boot_stream_via_local_rshim()
201200
# Push the boot stream to local rshim
202201
echo "Pushing bfb${cfg:+ + cfg}${rootfs:+ + rootfs}"
203202

204-
if [ $runtime -eq 1 ]; then
203+
if [ $deferred -eq 1 ]; then
205204
# Skip reset when pushing bfb
206205
echo "BOOT_RESET_SKIP 1" > ${rshim_node}/misc
207206

@@ -223,9 +222,7 @@ push_boot_stream_via_local_rshim()
223222
echo "Error: Failed to push boot stream via local rshim"
224223
return
225224
fi
226-
227-
228-
if [ $runtime -eq 1 ]; then
225+
if [ $deferred -eq 1 ]; then
229226
echo "BOOT_RESET_SKIP 0" > ${rshim_node}/misc
230227
fi
231228
}
@@ -444,7 +441,7 @@ wait_for_update_to_finish()
444441

445442
filter0="Rebooting\.\.\.|finished|DPU is ready|Linux up|CRITICAL ERROR"
446443

447-
if [ $runtime -eq 0 ]; then
444+
if [ $deferred -eq 0 ]; then
448445
filter0="$filter0|In Enhanced NIC mode"
449446
if [ $nic_mode -eq 1 ]; then
450447
filter="In Enhanced NIC mode"
@@ -568,15 +565,14 @@ cleanup() {
568565
fi
569566
fi
570567

571-
if [ $runtime -eq 1 ]; then
568+
if [ $deferred -eq 1 ]; then
572569
# Reset to default state.
573570
echo "BOOT_RESET_SKIP 0" > ${rshim_node}/misc
574571

575572
# Cleanup SP2.BIT2.
576573
sp2=`${BF_REG} $(basename ${rshim_node}) ${RSH_SCRATCHPAD2}.64 | awk '{print $3}'`
577574
sp2=$((sp2 & ~4))
578575
${BF_REG} $(basename ${rshim_node}) ${RSH_SCRATCHPAD2}.64 $sp2 >/dev/null
579-
580576
# Set SWINT2.BIT2 for DPU mode
581577
if [ ${nic_mode} -eq 0 ]; then
582578
${BF_REG} $(basename ${rshim_node}) ${RSH_SWINT}.64 0x4 >/dev/null
@@ -592,7 +588,7 @@ cleanup() {
592588
fi
593589

594590
# Restore the original binding states for PF0 and PF1
595-
if [ $nic_mode -eq 1 -a -n "${pcie_bd}" -a ${runtime} -eq 0 ]; then
591+
if [ $nic_mode -eq 1 -a -n "${pcie_bd}" -a ${deferred} -eq 0 ]; then
596592
for i in 0 1; do
597593
if eval "[ \${pf${i}_bound} -eq 0 ]"; then
598594
[ $verbose -eq 1 ] && echo "Re-binding: skipping originally unbound pf${i} (${pcie_bd}.${i})"
@@ -618,16 +614,15 @@ rootfs=
618614
mode=local # Values can be local or remote
619615
remote_mode= # Values can be scp, nc, or ncpipe
620616
rshim= # rshim device string, format [<ip>:<port>:]rshim<N>
621-
runtime=0 # Values can be 0 or 1.
617+
deferred=0 # Values can be 0 or 1.
622618
verbose=0 # Values can be 0 or 1.
623619
reverse_nc=0 # Values can be 0 or 1.
624620
clear_on_read=1 # Values can be 0 or 1.
625621
num_bfb=0
626622
num_rshim=0
627623
max_bfb=1
628624
max_rshim=1
629-
lfwp=0
630-
activate=
625+
apply_lfwp=0 # Values can be 0 or 1.
631626

632627
rshim_node= # rshim device identifier, e.g. rshim0
633628
ip= # IP address for remote host
@@ -641,25 +636,25 @@ pcie_bd="" # PCIE Bus-Device
641636
pf0_bound=0 # PF0 is bound prior to the script run
642637
pf1_bound=0 # PF1 is bound prior to the script run
643638

644-
options=`getopt -n bfb-install -o a:b:c:f:hklm:p:r:Ruv \
645-
-l activate:,bfb:,config:,rootfs:,help,keep-log,lfwp,remote-mode:,reverse-nc,rshim:,pldm:,runtime,verbose \
639+
options=`getopt -n bfb-install -o a:b:c:f:dhkm:p:r:Ruv \
640+
-l apply-lfwp:,bfb:,config:,rootfs:,deferred,help,keep-log,remote-mode:,reverse-nc,rshim:,pldm:,runtime,verbose \
646641
-- "$@"`
647642
if [ $? != 0 ]; then echo "Command line error" >&2; exit 1; fi
648643
eval set -- $options
649644
while [ "$1" != -- ]; do
650645
case $1 in
651-
--activate|-a) shift; activate=$1 ;;
646+
--apply-lfwp|-a) shift; apply_lfwp=$1 ;;
652647
--bfb|-b) shift; bfb=$(readlink -f $1) num_bfb=$((num_bfb + 1));;
653648
--config|-c) shift; cfg=$1 ;;
649+
--deferred|-d) deferred=1 ;;
654650
--rootfs|-f) shift; rootfs=$1 ;;
655651
--help|-h) usage; exit 0 ;;
656652
--keep-log|-k) clear_on_read=0 ;;
657-
--lfwp|-l) lfwp=1; runtime=1 ;;
658653
--pldm|-p) shift; pldm=$(readlink -f $1) ;;
659654
--remote-mode|-m) shift; remote_mode=$1 ;;
660655
--rshim|-r) shift; rshim=$1 num_rshim=$((num_rshim + 1));;
661656
--reverse-nc|-R) reverse_nc=1 ;;
662-
--runtime|-u) runtime=1 ;;
657+
--runtime|-u) deferred=1 ;;
663658
--verbose|-v) verbose=1 ;;
664659
--) shift; break;;
665660
*) echo "Error: Invalid argument: $1" >&2; usage >&2; exit 1 ;;
@@ -669,10 +664,7 @@ done
669664

670665
# Parameter checks
671666

672-
# Default activate to the lfwp value.
673-
activate=${activate:-$lfwp}
674-
675-
if [ -z "${bfb}" -a -z "${pldm}" -a ${activate} -eq 0 ]; then
667+
if [ -z "${bfb}" -a -z "${pldm}" -a ${apply_lfwp} -eq 0 ]; then
676668
echo "Error: Need to provide either bfb or pldm file."
677669
usage >&2
678670
exit 1
@@ -752,8 +744,8 @@ if [ $mode == "local" ] && [ -n "$remote_mode" ]; then
752744
fi
753745

754746
if [ $mode == "remote" ] ; then
755-
if [ $runtime -eq 1 ]; then
756-
echo "Error: runtime upgrade is only supported for local rshim."
747+
if [ $deferred -eq 1 ]; then
748+
echo "Error: deferred upgrade is only supported for local rshim."
757749
exit 1
758750
fi
759751

@@ -845,8 +837,8 @@ if [ -n "${pldm}" ]; then
845837
exit 1
846838
fi
847839

848-
# PLDM automatically indicate runtime.
849-
runtime=1
840+
# PLDM automatically indicate deferred upgrade.
841+
deferred=1
850842

851843
pldm_nicfw=""
852844
pldm_bfb=""
@@ -885,12 +877,12 @@ if [ -n "${pldm}" ]; then
885877

886878
pldm=""
887879
bfb="${TMP_DIR}/pldm/pldm.bfb"
888-
elif [ ${runtime} -eq 1 -a -e "${bfb}" ]; then
880+
elif [ ${deferred} -eq 1 -a -e "${bfb}" ]; then
889881
# Convert bundle BFB to flat BFB if needed.
890882
# This conversion is only supported on PCIe host.
891883
is_bundle=$(mlx-mkbfb -d "${bfb}" | grep "In-memory filesystem")
892884
if [ -n "${is_bundle}" -a -n "$pcie_bd" ]; then
893-
echo "Convert $(basename "${bfb}") to flat format for runtime upgrade"
885+
echo "Convert $(basename "${bfb}") to flat format for deferred upgrade"
894886
if ! which flint &> /dev/null; then
895887
echo "Error: flint not found."
896888
exit 1
@@ -926,7 +918,7 @@ elif [ ${runtime} -eq 1 -a -e "${bfb}" ]; then
926918
fi
927919

928920
# Check again if bfb file exists (if not activate-only).
929-
if [ ! -e "${bfb}" -a ${activate} -eq 0 ]; then
921+
if [ ! -e "${bfb}" -a ${apply_lfwp} -eq 0 ]; then
930922
echo "Error: ${bfb} not found."
931923
exit 1
932924
fi
@@ -1007,7 +999,7 @@ if [ -z "${pv}" ]; then
1007999
echo "Warn: 'pv' command not found. Continue without showing BFB progress."
10081000
fi
10091001

1010-
if [ ${nic_mode} -eq 1 -a -n "${pcie_bd}" -a ${runtime} -eq 0 ]; then
1002+
if [ ${nic_mode} -eq 1 -a -n "${pcie_bd}" -a ${deferred} -eq 0 ]; then
10111003
# Set BREADCRUMB.BIT32 to indicate NIC mode.
10121004
breadcrumb1=$(${BF_REG} $(basename ${rshim_node}) ${RSH_BREADCRUMB1}.64 | awk '{print $3}')
10131005
breadcrumb1=$((breadcrumb1 | (0x1 << 32)))
@@ -1025,10 +1017,10 @@ if [ ${nic_mode} -eq 1 -a -n "${pcie_bd}" -a ${runtime} -eq 0 ]; then
10251017
done
10261018
fi
10271019

1028-
# Reactivate NIC_FW if runtime but not LFWP.
1029-
if [ ${lfwp} -eq 0 -a -n "${pcie_bd}" -a ${runtime} -eq 1 ]; then
1020+
# Reactivate NIC_FW if deferred.
1021+
if [ -n "${pcie_bd}" -a ${deferred} -eq 1 ]; then
10301022
if which flint &> /dev/null; then
1031-
# Suppress errors if already activated.
1023+
# Suppress errors if already applied.
10321024
flint -d ${pcie_bd}.0 ir >&/dev/null
10331025
else
10341026
echo "Flint not found. Skip NIC_FW reactivation."
@@ -1042,7 +1034,7 @@ if [ -e "${bfb}" ]; then
10421034
fi
10431035

10441036
# LFWP activation on PCIe host.
1045-
if [ ${lfwp} -eq 1 -a ${activate} -eq 1 ]; then
1037+
if [ ${apply_lfwp} -eq 1 ]; then
10461038
if [ -z "${pcie_bd}" ]; then
10471039
echo "ERROR: Failed to activate LFWP, PCIe device not found."
10481040
exit 1
@@ -1053,6 +1045,14 @@ if [ ${lfwp} -eq 1 -a ${activate} -eq 1 ]; then
10531045
exit 1
10541046
fi
10551047

1048+
# Set SP2.BIT6=1 to issue PLDM ActivateFirmware command that activate new image after reboot
1049+
sp2=`${BF_REG} $(basename ${rshim_node}) ${RSH_SCRATCHPAD2}.64 | awk '{print $3}'`
1050+
sp2=$((sp2 | 64))
1051+
${BF_REG} $(basename ${rshim_node}) ${RSH_SCRATCHPAD2}.64 $sp2 >/dev/null
1052+
# Set SWINT2.BIT2 for DPU mode
1053+
if [ ${nic_mode} -eq 0 ]; then
1054+
${BF_REG} $(basename ${rshim_node}) ${RSH_SWINT}.64 0x4 >/dev/null
1055+
fi
10561056
# Best-effort to check and apply L0 reset.
10571057
if (mlxfwreset -d ${pcie_bd}.0 q | grep live-Patch | grep -qw "\-Supported"); then
10581058
echo "Live Patch NIC Firmware reset is supported."

0 commit comments

Comments
 (0)