Normalize retry/timeouts for kubernetes installation

ArangoGutierrez · ArangoGutierrez · commit f9103a7ba093 · 2025-06-02T12:12:00.000+02:00
Signed-off-by: Carlos Eduardo Arango Gutierrez &lt;eduardoa@nvidia.com&gt;
diff --git a/pkg/provisioner/provisioner.go b/pkg/provisioner/provisioner.go
@@ -93,8 +93,8 @@ func (p *Provisioner) waitForNodeReboot() error {
 	}
 
 	// Wait for the node to come back up
-	maxRetries := 30
-	retryInterval := 10 * time.Second
+	maxRetries := 10
+	retryInterval := 30 * time.Second
 
 	for i := 0; i < maxRetries; i++ {
 		p.log.Info("Waiting for node to come back online...")
diff --git a/pkg/provisioner/templates/kubernetes.go b/pkg/provisioner/templates/kubernetes.go
@@ -101,25 +101,28 @@ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
 sudo chown $(id -u):$(id -g) $HOME/.kube/config
 export KUBECONFIG="${HOME}/.kube/config"
 
+# Wait explicitly for kube-apiserver availability
+with_retry 10 30s kubectl --kubeconfig $KUBECONFIG version
+
 # Install Calico
 # based on https://docs.tigera.io/calico/latest/getting-started/kubernetes/quickstart
-with_retry 10 20s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml
+with_retry 10 30s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml
 
 # Wait for Tigera operator to be ready
-with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
+with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
 
 # Wait for all necessary CRDs to be established
-with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/installations.operator.tigera.io
-with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/apiservers.operator.tigera.io
-with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/tigerastatuses.operator.tigera.io
+with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/installations.operator.tigera.io
+with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/apiservers.operator.tigera.io
+with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/tigerastatuses.operator.tigera.io
 
 # Apply custom resources with increased retry attempts
-with_retry 10 20s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml
+with_retry 10 30s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml
 
 # Make single-node cluster schedulable
-kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule-
-kubectl label node --all node-role.kubernetes.io/worker=
-kubectl label node --all nvidia.com/holodeck.managed=true
+with_retry 10 30s kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule-
+with_retry 10 30s kubectl label node --all node-role.kubernetes.io/worker=
+with_retry 10 30s kubectl label node --all nvidia.com/holodeck.managed=true
 
 # Wait for cluster to be ready
 with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=ready --timeout=300s nodes --all

Original file line number	Diff line number	Diff line change
`@@ -93,8 +93,8 @@ func (p *Provisioner) waitForNodeReboot() error {`
`93`	`93`	`}`
`94`	`94`
`95`	`95`	`// Wait for the node to come back up`
`96`		`- maxRetries := 30`
`97`		`- retryInterval := 10 * time.Second`
	`96`	`+ maxRetries := 10`
	`97`	`+ retryInterval := 30 * time.Second`
`98`	`98`
`99`	`99`	`for i := 0; i < maxRetries; i++ {`
`100`	`100`	`p.log.Info("Waiting for node to come back online...")`