3737 cat /mnt/disks/certs/server/{{.Values.tls.certFilename | default "tls.key"}} /mnt/disks/certs/server/{{.Values.tls.certKeyFilename | default "tls.crt"}} > /dev/shm/server.cert
3838 export tls_servercertificate_filepath="/dev/shm/server.cert"
3939{{- end }}
40+ # Deal with the fact we cannot accept "-" in router names
41+ export routername=$(echo $(hostname) | sed 's/-//g')
4042{{- if .Values.solace.redundancy }}
41- # [TODO] KBARR not using correct method of finding ordinal until we bump min Kubernetes release above 1.8.1
42- # https://github.com/kubernetes/kubernetes/issues/40651
43- # node_ordinal=$(STATEFULSET_ORDINAL)
4443 IFS='-' read -ra host_array <<< $(hostname)
4544 node_ordinal=${host_array[-1]}
4645 if [[ ! -z `echo $STATEFULSET_NAMESPACE` ]]; then
4948 namespace=default
5049 fi
5150 service={{ template "solace.fullname" . }}
52- # Deal with the fact we cannot accept "-" in routre names
5351 service_name=$(echo ${service} | sed 's/-//g')
54- export routername=$(echo $(hostname) | sed 's/-//g')
5552 export redundancy_enable=yes
5653 export configsync_enable=yes
5754 export redundancy_authentication_presharedkey_key=`cat /mnt/disks/secrets/username_admin_password | awk '{x=$0;for(i=length;i<51;i++)x=x "0";}END{print x}' | base64` # Right-pad with 0s to 50 length
9289 loop_guard=60
9390 pause=10
9491 count=0
92+ # Wait for Solace Management API
9593 while [ ${count} -lt ${loop_guard} ]; do
9694 if /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 -t ; then
9795 break
@@ -131,6 +129,7 @@ data:
131129 resync_step_required=""
132130 role=""
133131 count=0
132+ # Determine node's primary or backup role
134133 while [ ${count} -lt ${loop_guard} ]; do
135134 role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
136135 -q "<rpc><show><redundancy><detail/></redundancy></show></rpc>" \
@@ -147,16 +146,16 @@ data:
147146 ;;
148147 esac
149148 ((count++))
150- echo "`date` INFO : ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's active-standby role"
149+ echo "`date` INFO : ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's primary or backup role"
151150 sleep ${pause}
152151 done
153152 if [ ${count} -eq ${loop_guard} ]; then
154- echo "`date` ERROR : ${APP}-Could not determine this node's active-standby role" >&2
153+ echo "`date` ERROR : ${APP}-Could not determine this node's primary or backup role" >&2
155154 exit 1
156155 fi
157- # Determine local activity
156+ echo "`date` INFO : ${APP}-Management API is up, determined that this node's role is: ${role}"
157+ # Determine activity (local or mate active)
158158 count=0
159- echo "`date` INFO : ${APP}-Management API is up, determined that this node's active-standby role is: ${role}"
160159 while [ ${count} -lt ${loop_guard} ]; do
161160 online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
162161 -q "<rpc><show><redundancy><detail/></redundancy></show></rpc>" \
@@ -172,7 +171,7 @@ data:
172171 echo "`date` INFO : ${APP}-Broker initial startup detected. This node will assert config-sync configuration over its mate"
173172 resync_step_required="true"
174173 else
175- echo "`date` WARN : ${APP}-Unexpected state: this is not an initial startup of the broker and this node reports Local Active. Normally expected nodes are Mate Active after restart "
174+ echo "`date` WARN : ${APP}-Unexpected state: this is not an initial startup of the broker and this node reports Local Active. Possibly a redeploy? "
176175 fi
177176 break
178177 ;;
@@ -182,15 +181,16 @@ data:
182181 ;;
183182 esac
184183 ((count++))
185- echo "`date` INFO : ${APP}-Waited ${run_time} seconds, Local activity state is: ${local_activity}"
184+ echo "`date` INFO : ${APP}-Waited ${run_time} seconds, node activity state is: ${local_activity}"
186185 sleep ${pause}
187186 done
188187 if [ ${count} -eq ${loop_guard} ]; then
189- echo "`date` ERROR : ${APP}-Local activity state never become Local Active or Mate Active" >&2
188+ echo "`date` ERROR : ${APP}-Node activity state never become Local Active or Mate Active" >&2
190189 exit 1
191190 fi
192- # If we need to assert leader, then we need to wait for mate to reconcile
191+ # If we need to assert leader, then first wait for mate to report Standby state
193192 if [ "${resync_step_required}" = "true" ]; then
193+ # This branch is AD-active only
194194 count=0
195195 echo "`date` INFO : ${APP}-Waiting for mate activity state to be 'Standby'"
196196 while [ ${count} -lt ${loop_guard} ]; do
@@ -214,7 +214,7 @@ data:
214214 exit 1
215215 fi
216216 fi # if assert-leader
217- # Ensure Config-sync connection state is Connected before proceeding
217+ # Ensure Config-sync connection state is Connected for both primary and backup before proceeding
218218 count=0
219219 echo "`date` INFO : ${APP}-Waiting for config-sync connected"
220220 while [ ${count} -lt ${loop_guard} ]; do
@@ -239,11 +239,12 @@ data:
239239 fi
240240 # Now can issue assert-leader command
241241 if [ "${resync_step_required}" = "true" ]; then
242- echo "`date` INFO : ${APP}-Initiating assert-leader"
243- /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
244- -q "<rpc><admin><config-sync><assert-leader><router/></assert-leader></config-sync></admin></rpc>"
245- /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
246- -q "<rpc><admin><config-sync><assert-leader><vpn-name>*</vpn-name></assert-leader></config-sync></admin></rpc>"
242+ # This branch is AD-active only
243+ echo "`date` INFO : ${APP}-Initiating assert-leader"
244+ /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
245+ -q "<rpc><admin><config-sync><assert-leader><router/></assert-leader></config-sync></admin></rpc>"
246+ /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
247+ -q "<rpc><admin><config-sync><assert-leader><vpn-name>*</vpn-name></assert-leader></config-sync></admin></rpc>"
247248 fi
248249 # Wait for config-sync results
249250 count=0
@@ -263,7 +264,7 @@ data:
263264 ((count++))
264265 echo "`date` INFO : ${APP}-Waited ${run_time} seconds, Config-sync is: ${confsyncstatus_results}, not yet Up"
265266
266- # Additional check to confirm config-sync
267+ # Additional checks to confirm config-sync (even if reported gloabally as not Up, it may be still up between local primary and backup in a DR setup)
267268 echo "`date` INFO : ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
268269 messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
269270 -q "<rpc><show><config-sync><database/><detail/></config-sync></show></rpc>" \
@@ -378,36 +379,15 @@ data:
378379 IFS='-' read -ra host_array <<< $(hostname)
379380 node_ordinal=${host_array[-1]}
380381 password=`cat /mnt/disks/secrets/username_admin_password`
381-
382- # For update (includes SolOS upgrade) purposes, additional checks are required for readiness state when the pod has been started
383- # This is an update if the LASTVERSION_FILE with K8s controller-revision-hash exists and contents differ from current value
384- LASTVERSION_FILE=/var/lib/solace/var/lastConfigRevisionBeforeReboot
385- if [ ! -f ${LASTVERSION_FILE} ] || [[ $(cat ${LASTVERSION_FILE}) != $(get_label "controller-revision-hash") ]] ; then
386- echo "`date` INFO : ${APP}-Initial startup or Upgrade detected, running additional checks..."
387- # Check redundancy
388- echo "`date` INFO : ${APP}-Running checks. Redundancy state check started..."
389- results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
390- -q "<rpc><show><redundancy/></show></rpc>" \
391- -v "/rpc-reply/rpc/show/redundancy/redundancy-status"`
392- redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
393- if [ "${redundancystatus_results}" != "Up" ]; then
394- echo "`date` INFO : ${APP}-Redundancy state is not yet up."
395- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
396- fi
397-
398- fi
399- # Record current version in LASTVERSION_FILE
400- echo $(get_label "controller-revision-hash") > ${LASTVERSION_FILE}
401382 # For monitor node just check for redundancy; active label will never be set
402383 if [ "${node_ordinal}" = "2" ]; then
403384 # Check redundancy
404- echo "`date` INFO : ${APP}-Running checks. Redundancy state check started..."
405385 results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
406386 -q "<rpc><show><redundancy/></show></rpc>" \
407387 -v "/rpc-reply/rpc/show/redundancy/redundancy-status"`
408388 redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
409389 if [ "${redundancystatus_results}" != "Up" ]; then
410- echo "`date` INFO : ${APP}-Redundancy state is not yet up."
390+ echo "`date` INFO : ${APP}-Waiting for redundancy up, redundancy state is not yet up."
411391 rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
412392 fi
413393 if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then
@@ -418,6 +398,7 @@ data:
418398 fi
419399 exit 0
420400 fi # End Monitor Node
401+ # From here only message routing nodes.
421402 # For Primary or Backup nodes set both service readiness (active label) and k8s readiness (exit return value)
422403 health_result=`curl -s -o /dev/null -w "%{http_code}" http://localhost:5550/health-check/guaranteed-active`
423404 case "${health_result}" in
@@ -467,54 +448,52 @@ data:
467448 echo "`date` INFO : ${APP}-Running checks.Redundancy state is not yet up."
468449 rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
469450 fi
470- # Additionally check config-sync status for non-monitoring nodes
471- if [ "${node_ordinal}" != "2" ]; then
472- results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
473- -q "<rpc><show><config-sync></config-sync></show></rpc>" \
474- -v "/rpc-reply/rpc/show/config-sync/status/oper-status"`
475- confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
476- if [ "${confsyncstatus_results}" != "Up" ]; then
451+ # Check config-sync status
452+ results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
453+ -q "<rpc><show><config-sync></config-sync></show></rpc>" \
454+ -v "/rpc-reply/rpc/show/config-sync/status/oper-status"`
455+ confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
456+ if [ "${confsyncstatus_results}" != "Up" ]; then
477457
478- # Additional check to confirm config-sync
479- echo "`date` INFO : ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
458+ # Additional check to confirm config-sync
459+ echo "`date` INFO : ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
480460
481- messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
482- -q "<rpc><show><config-sync><database/><detail/></config-sync></show></rpc>" \
483- -v "count(/rpc-reply/rpc/show/config-sync/database/local/tables/table)"`
484- messagevpn_total=`echo ${messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
461+ messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
462+ -q "<rpc><show><config-sync><database/><detail/></config-sync></show></rpc>" \
463+ -v "count(/rpc-reply/rpc/show/config-sync/database/local/tables/table)"`
464+ messagevpn_total=`echo ${messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
485465
486- # Count message_vpns in-sync and compare with total
487- localmessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
488- -q "<rpc><show><config-sync><database/></config-sync></show></rpc>" \
489- -v "count(//table[sync-state='In-Sync'])"`
490- local_messagevpn_total_insync=`echo ${localmessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
491- if [ "$messagevpn_total" -ne "$local_messagevpn_total_insync" ]; then
492- echo "`date` INFO : ${APP}-Config-sync state is not in-sync locally."
493- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
494- fi
466+ # Count message_vpns in-sync and compare with total
467+ localmessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
468+ -q "<rpc><show><config-sync><database/></config-sync></show></rpc>" \
469+ -v "count(//table[sync-state='In-Sync'])"`
470+ local_messagevpn_total_insync=`echo ${localmessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
471+ if [ "$messagevpn_total" -ne "$local_messagevpn_total_insync" ]; then
472+ echo "`date` INFO : ${APP}-Config-sync state is not in-sync locally."
473+ rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
474+ fi
495475
496- echo "`date` INFO : ${APP}-Checking Config-sync Setup. Remote config-sync state check starting..."
497- vpnremotehamate_result=$(get_router_remote_config_state "name")
476+ echo "`date` INFO : ${APP}-Checking Config-sync Setup. Remote config-sync state check starting..."
477+ vpnremotehamate_result=$(get_router_remote_config_state "name")
498478
499- remote_messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
500- -q "<rpc><show><config-sync><database/><remote/></config-sync></show></rpc>" \
501- -v "count(//table/source-router[name='$vpnremotehamate_result'])"`
502- remote_messagevpn_total=`echo ${remote_messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
479+ remote_messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
480+ -q "<rpc><show><config-sync><database/><remote/></config-sync></show></rpc>" \
481+ -v "count(//table/source-router[name='$vpnremotehamate_result'])"`
482+ remote_messagevpn_total=`echo ${remote_messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
503483
504- # Count message_vpns in-sync, not stale and compare with total
505- remotemessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
506- -q "<rpc><show><config-sync><database/><remote/></config-sync></show></rpc>" \
507- -v "count(//table/source-router[name='$vpnremotehamate_result' and sync-state='In-Sync' and stale='No'])"`
508- remote_messagevpn_total_insync=`echo ${remotemessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
509- if [ "$remote_messagevpn_total" -ne "$remote_messagevpn_total_insync" ]; then
510- echo "`date` INFO : ${APP}-Config-sync state is not in-sync for remote."
511- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
512- fi
484+ # Count message_vpns in-sync, not stale and compare with total
485+ remotemessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
486+ -q "<rpc><show><config-sync><database/><remote/></config-sync></show></rpc>" \
487+ -v "count(//table/source-router[name='$vpnremotehamate_result' and sync-state='In-Sync' and stale='No'])"`
488+ remote_messagevpn_total_insync=`echo ${remotemessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
489+ if [ "$remote_messagevpn_total" -ne "$remote_messagevpn_total_insync" ]; then
490+ echo "`date` INFO : ${APP}-Config-sync state is not in-sync for remote."
491+ rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
513492 fi
514493 fi
515494 # Pass readiness check
516495 if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then
517- echo "`date` INFO : ${APP}-Redundancy is up and node is mate Active"
496+ echo "`date` INFO : ${APP}-Redundancy is up and node is Mate Active"
518497 touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}
519498 echo "`date` INFO : ${APP}-Server status check complete for this broker node"
520499 exit 1
0 commit comments