File tree Expand file tree Collapse file tree 2 files changed +3
-4
lines changed Expand file tree Collapse file tree 2 files changed +3
-4
lines changed Original file line number Diff line number Diff line change @@ -60,12 +60,12 @@ elif [[ "$NHC_RM" == "slurm" ]]; then
6060 # Slurm does not run the HealthCheckProgram on nodes in the DOWN state,
6161 # but if someone runs NHC by hand, we want to be able to do the right thing.
6262 case " $STATUS " in
63- * ' @' * |* ' #' * |boot * | * -* |plnd* )
63+ * ' @' * |* ' #' * |* -* |plnd* )
6464 # These states aren't handled yet.
6565 echo " $0 : State \" $STATUS \" not yet handled; ignoring."
6666 exit 0
6767 ;;
68- down* |drain* |drng* |fail* |maint* )
68+ down* |drain* |drng* |fail* |maint* |boot * )
6969 # If there is no old note, and we've not been told to ignore that, do not online the node.
7070 if [[ " $OLD_NOTE_LEADER " == " none" && " $IGNORE_EMPTY_NOTE " != " 1" ]]; then
7171 echo " $0 : Not onlining $HOSTNAME : No note set."
Original file line number Diff line number Diff line change @@ -25,7 +25,7 @@ NOTE="$*"
2525if [[ " $NHC_RM " == " slurm" ]]; then
2626 SLURM_SINFO=" ${SLURM_SINFO:- sinfo} "
2727 SLURM_SCONTROL=" ${SLURM_SCONTROL:- scontrol} "
28- SLURM_SC_REBOOT_ARGS=" ${SLURM_SC_REBOOT_ARGS:- reboot ASAP NextState=RESUME } "
28+ SLURM_SC_REBOOT_ARGS=" ${SLURM_SC_REBOOT_ARGS:- reboot ASAP NextState=DOWN } "
2929
3030 LINE=( $( $SLURM_SINFO -o ' %t %E' -hn $HOSTNAME ) )
3131 STATUS=" ${LINE[0]} "
@@ -47,7 +47,6 @@ if [[ "$NHC_RM" == "slurm" ]]; then
4747 if [[ " $OLD_NOTE_LEADER " != " none" && " $OLD_NOTE_LEADER " != " $LEADER " ]]; then
4848 LEADER=" $OLD_NOTE_LEADER "
4949 NOTE=" $OLD_NOTE "
50- SLURM_SC_REBOOT_ARGS=" reboot ASAP NextState=DOWN"
5150 fi
5251 echo " $0 : Marking $STATUS $HOSTNAME for reboot: $LEADER $NOTE "
5352 exec $SLURM_SCONTROL $SLURM_SC_REBOOT_ARGS Reason=" $LEADER $NOTE " $HOSTNAME
You can’t perform that action at this time.
0 commit comments