Skip to content

Commit c1c5e8c

Browse files
Merge pull request #385 from ArangoGutierrez/v0212
Normalize retry/timeouts for kubernetes installation
2 parents 8bbf21e + 13fa2be commit c1c5e8c

File tree

3 files changed

+29
-21
lines changed

3 files changed

+29
-21
lines changed

cmd/cli/.main.go.swp

12 KB
Binary file not shown.

pkg/provisioner/provisioner.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ func (p *Provisioner) waitForNodeReboot() error {
9393
}
9494

9595
// Wait for the node to come back up
96-
maxRetries := 30
97-
retryInterval := 10 * time.Second
96+
maxRetries := 10
97+
retryInterval := 30 * time.Second
9898

9999
for i := 0; i < maxRetries; i++ {
100100
p.log.Info("Waiting for node to come back online...")

pkg/provisioner/templates/kubernetes.go

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -101,25 +101,28 @@ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
101101
sudo chown $(id -u):$(id -g) $HOME/.kube/config
102102
export KUBECONFIG="${HOME}/.kube/config"
103103
104+
# Wait explicitly for kube-apiserver availability
105+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG version
106+
104107
# Install Calico
105108
# based on https://docs.tigera.io/calico/latest/getting-started/kubernetes/quickstart
106-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml
109+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml
107110
108111
# Wait for Tigera operator to be ready
109-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
112+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
110113
111114
# Wait for all necessary CRDs to be established
112-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/installations.operator.tigera.io
113-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/apiservers.operator.tigera.io
114-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/tigerastatuses.operator.tigera.io
115+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/installations.operator.tigera.io
116+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/apiservers.operator.tigera.io
117+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/tigerastatuses.operator.tigera.io
115118
116119
# Apply custom resources with increased retry attempts
117-
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml
120+
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml
118121
119122
# Make single-node cluster schedulable
120-
kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule-
121-
kubectl label node --all node-role.kubernetes.io/worker=
122-
kubectl label node --all nvidia.com/holodeck.managed=true
123+
with_retry 10 30s kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule-
124+
with_retry 10 30s kubectl label node --all node-role.kubernetes.io/worker=
125+
with_retry 10 30s kubectl label node --all nvidia.com/holodeck.managed=true
123126
124127
# Wait for cluster to be ready
125128
with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=ready --timeout=300s nodes --all
@@ -168,11 +171,14 @@ echo "ssh -i <your-private-key> ubuntu@${INSTANCE_ENDPOINT_HOST}"
168171

169172
const microk8sTemplate = `
170173
: ${INSTANCE_ENDPOINT_HOST:={{.K8sEndpointHost}}}
174+
: ${K8S_VERSION:={{.Version}}}
175+
176+
# Remove leading 'v' from version if present for microk8s snap channel
177+
MICROK8S_VERSION="${K8S_VERSION#v}"
171178
172179
# Install microk8s
173180
sudo apt-get update
174-
175-
sudo snap install microk8s --classic --channel={{.Version}}
181+
sudo snap install microk8s --classic --channel=${MICROK8S_VERSION}
176182
sudo microk8s enable gpu dashboard dns registry
177183
sudo usermod -a -G microk8s ubuntu
178184
mkdir -p ~/.kube
@@ -181,7 +187,7 @@ sudo microk8s config > ~/.kube/config
181187
sudo chown -f -R ubuntu ~/.kube
182188
sudo snap alias microk8s.kubectl kubectl
183189
184-
echo "Microk8s {{.Version}} installed successfully"
190+
echo "Microk8s ${MICROK8S_VERSION} installed successfully"
185191
echo "you can now access the cluster with:"
186192
echo "ssh -i <your-private-key> ubuntu@${INSTANCE_ENDPOINT_HOST}"
187193
`
@@ -269,14 +275,16 @@ type KubeadmConfig struct {
269275
}
270276

271277
func NewKubernetes(env v1alpha1.Environment) (*Kubernetes, error) {
272-
kubernetes := &Kubernetes{
273-
Version: env.Spec.Kubernetes.KubernetesVersion,
274-
}
275-
// check if env.Spec.Kubernetes.KubernetesVersion is in the format of vX.Y.Z
276-
// if not, set the default version
277-
if !strings.HasPrefix(env.Spec.Kubernetes.KubernetesVersion, "v") && env.Spec.Kubernetes.KubernetesInstaller != "microk8s" {
278-
fmt.Printf("Kubernetes version %s is not in the format of vX.Y.Z, setting default version v1.32.1\n", env.Spec.Kubernetes.KubernetesVersion)
278+
kubernetes := &Kubernetes{}
279+
280+
// Normalize Kubernetes version: always ensure it starts with 'v'
281+
switch {
282+
case env.Spec.Kubernetes.KubernetesVersion == "":
279283
kubernetes.Version = defaultKubernetesVersion
284+
case !strings.HasPrefix(env.Spec.Kubernetes.KubernetesVersion, "v"):
285+
kubernetes.Version = "v" + env.Spec.Kubernetes.KubernetesVersion
286+
default:
287+
kubernetes.Version = env.Spec.Kubernetes.KubernetesVersion
280288
}
281289
if env.Spec.Kubernetes.KubeletReleaseVersion != "" {
282290
kubernetes.KubeletReleaseVersion = env.Spec.Kubernetes.KubeletReleaseVersion

0 commit comments

Comments
 (0)