@@ -98,7 +98,7 @@ assert_not_empty_list() {
9898assert_true () {
9999 run --separate-stderr " $@ "
100100 assert_success || return
101- is_true " $output " || return
101+ assert_output --regexp ' ^([Tt]rue|1)$ ' || return
102102}
103103
104104# Given namespace and app name, assert that a log line contains the given string.
@@ -124,20 +124,17 @@ pull_rancher_image() {
124124 if ! load_var rancher_chart_version; then
125125 fail " Could not restore Rancher chart version"
126126 fi
127- if using_docker; then
128- try docker pull --quiet " rancher/rancher:v$rancher_chart_version "
129- else
130- try nerdctl pull --namespace k8s.io --quiet " rancher/rancher:v$rancher_chart_version "
131- fi
127+ local CONTAINERD_NAMESPACE=k8s.io
128+ try ctrctl pull --quiet " rancher/rancher:v$rancher_chart_version "
132129}
133130
134131wait_for_rancher_pod () {
135- try --max 60 --delay 10 assert_pod_log_line cattle-system rancher Listening on :443
136- try --max 60 --delay 10 assert_pod_log_line cattle-system rancher Starting catalog controller
132+ try assert_pod_log_line cattle-system rancher Listening on :443
133+ try assert_pod_log_line cattle-system rancher Starting catalog controller
137134 try --max 60 --delay 10 assert_pod_log_line cattle-system rancher Watching metadata for rke-machine-config.cattle.io/v1
138135 try --max 60 --delay 10 assert_pod_log_line cattle-system rancher ' Creating clusterRole for roleTemplate Cluster Owner (cluster-owner).'
139- try --max 60 --delay 10 assert_pod_log_line cattle-system rancher Rancher startup complete
140- try --max 120 --delay 10 assert_pod_log_line cattle-system rancher Created machine for node
136+ try assert_pod_log_line cattle-system rancher Rancher startup complete
137+ try assert_pod_log_line cattle-system rancher Created machine for node
141138}
142139
143140wait_for_webhook_pod () {
@@ -175,48 +172,53 @@ deploy_rancher() {
175172 host=$( traefik_hostname) || return
176173
177174 comment " Installing rancher $rancher_chart_version "
178- # The helm install can take a long time, especially on CI
175+ # The helm install can take a long time, especially on CI. Therefore we
176+ # avoid using --wait / --timeout, and instead check for forward progress
177+ # at each step.
179178 helm upgrade \
180179 --install rancher rancher-latest/rancher \
181180 --version " $rancher_chart_version " \
182181 --namespace cattle-system \
183182 --set hostname=" $host " \
184183 --set replicas=1 \
185184 --create-namespace
185+
186186 try assert_not_empty_list helm list --all --output json --namespace cattle-system --selector name=rancher
187187 try assert_not_empty_list helm list --deployed --output json --namespace cattle-system --selector name=rancher
188188 try kubectl get ingress --namespace cattle-system rancher
189189 try assert_not_empty_list kubectl get ingress --namespace cattle-system rancher --output jsonpath=' {.status.loadBalancer.ingress}'
190190
191191 try --max 60 --delay 10 kubectl get namespace fleet-local
192- try --max 120 --delay 10 kubectl get namespace local
193- try --max 120 --delay 10 kubectl get namespace cattle-global-data
192+ try --max 60 --delay 10 kubectl get namespace local
193+ try --max 60 --delay 10 kubectl get namespace cattle-global-data
194194 try --max 60 --delay 10 kubectl get namespace fleet-default
195195
196196 try assert_not_empty_list kubectl get pods --namespace cattle-system --selector app=rancher --output jsonpath=' {.items}'
197197
198- # Unfortunately, the Rancher pod could get restarted, so we need to put this in a loop :(
198+ # Unfortunately, the Rancher pod could get restarted; this may lead to the
199+ # wait steps to fail and we need to start again from the top.
199200 try wait_for_rancher_pod
200201
201- try --max 60 --delay 10 assert_true kubectl get APIServices v3.project.cattle.io --output=jsonpath=' {.status.conditions[?(@.type=="Available")].status}'
202+ try assert_true kubectl get APIServices v3.project.cattle.io --output=jsonpath=' {.status.conditions[?(@.type=="Available")].status}'
202203
203- try --max 60 --delay 10 kubectl get namespace cattle-fleet-system
204- try --max 60 --delay 10 kubectl get namespace cattle-system
204+ try kubectl get namespace cattle-fleet-system
205+ try kubectl get namespace cattle-system
205206
206- try --max 60 --delay 10 kubectl get deployment --namespace cattle-fleet-system fleet-controller
207+ try --max 48 kubectl get deployment --namespace cattle-fleet-system fleet-controller
207208 try assert_kube_deployment_available --namespace cattle-fleet-system gitjob
208209 try assert_kube_deployment_available --namespace cattle-fleet-system fleet-controller
209210
210- try --max 120 --delay 10 assert_not_empty_list kubectl get pods --namespace cattle-system --selector app=rancher-webhook --output jsonpath=' {.items}'
211+ try --max 60 --delay 10 assert_not_empty_list kubectl get pods --namespace cattle-system --selector app=rancher-webhook --output jsonpath=' {.items}'
211212
212213 # Unfortunately, the webhook pod might restart too :(
213214 try wait_for_webhook_pod
214215
215- try --max 60 --delay 10 assert_kube_deployment_available --namespace cattle-system rancher
216- try --max 60 --delay 10 assert_kube_deployment_available --namespace cattle-fleet-local-system fleet-agent
217- try --max 120 --delay 10 assert_kube_deployment_available --namespace cattle-system rancher-webhook
216+ try --max 120 assert_kube_deployment_available --namespace cattle-system rancher
217+ try --max 120 assert_kube_deployment_available --namespace cattle-fleet-local-system fleet-agent
218+ try --max 60 assert_kube_deployment_available --namespace cattle-system rancher-webhook
218219
219- # The rancher pod sometimes falls over on its own; retry in a loop
220+ # The rancher pod sometimes falls over on its own; retry in a loop to
221+ # detect flapping.
220222 local i
221223 for i in {1..10}; do
222224 sleep 1
@@ -233,11 +235,13 @@ verify_rancher() {
233235 # Get k3s logs if possible before things fail
234236 kubectl get deployments --all-namespaces || :
235237 kubectl get pods --all-namespaces || :
238+
236239 local name
237240 name=" $( kubectl get pod -n cattle-system --selector app=rancher --output=jsonpath=' {.items[].metadata.name}' || echo ' ' ) "
238241 if [[ -n $name ]]; then
239242 kubectl logs -n cattle-system " $name " || :
240243 fi
244+
241245 name=" $( kubectl get pod -n cattle-system --selector app=rancher-webhook --output=jsonpath=' {.items[].metadata.name}' || echo ' ' ) "
242246 if [[ -n $name ]]; then
243247 kubectl logs -n cattle-system " $name " || :
0 commit comments