Skip to content

Commit f9103a7

Browse files
Normalize retry/timeouts for kubernetes installation
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent 8bbf21e commit f9103a7

File tree

2 files changed

+14
-11
lines changed

2 files changed

+14
-11
lines changed

pkg/provisioner/provisioner.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ func (p *Provisioner) waitForNodeReboot() error {
9393
}
9494

9595
// Wait for the node to come back up
96-
maxRetries := 30
97-
retryInterval := 10 * time.Second
96+
maxRetries := 10
97+
retryInterval := 30 * time.Second
9898

9999
for i := 0; i < maxRetries; i++ {
100100
p.log.Info("Waiting for node to come back online...")

pkg/provisioner/templates/kubernetes.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -101,25 +101,28 @@ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
101101
sudo chown $(id -u):$(id -g) $HOME/.kube/config
102102
export KUBECONFIG="${HOME}/.kube/config"
103103
104+
# Wait explicitly for kube-apiserver availability
105+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG version
106+
104107
# Install Calico
105108
# based on https://docs.tigera.io/calico/latest/getting-started/kubernetes/quickstart
106-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml
109+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml
107110
108111
# Wait for Tigera operator to be ready
109-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
112+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
110113
111114
# Wait for all necessary CRDs to be established
112-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/installations.operator.tigera.io
113-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/apiservers.operator.tigera.io
114-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/tigerastatuses.operator.tigera.io
115+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/installations.operator.tigera.io
116+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/apiservers.operator.tigera.io
117+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/tigerastatuses.operator.tigera.io
115118
116119
# Apply custom resources with increased retry attempts
117-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml
120+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml
118121
119122
# Make single-node cluster schedulable
120-
kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule-
121-
kubectl label node --all node-role.kubernetes.io/worker=
122-
kubectl label node --all nvidia.com/holodeck.managed=true
123+
with_retry 10 30s kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule-
124+
with_retry 10 30s kubectl label node --all node-role.kubernetes.io/worker=
125+
with_retry 10 30s kubectl label node --all nvidia.com/holodeck.managed=true
123126
124127
# Wait for cluster to be ready
125128
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=ready --timeout=300s nodes --all

0 commit comments

Comments
 (0)