From f0a21ec6df44230b30aeb681743c2a6f0629cff3 Mon Sep 17 00:00:00 2001 From: Shuyang Xin Date: Fri, 14 Nov 2025 14:44:41 +0800 Subject: [PATCH] Enhance GKE cluster deletion by cleaning up Node Pools Adds a fallback mechanism to explicitly delete Node Pools when the initial cluster deletion attempt fails. Signed-off-by: Shuyang Xin --- ci/test-conformance-gke.sh | 56 +++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/ci/test-conformance-gke.sh b/ci/test-conformance-gke.sh index c08ddfa9192..cebfbfe395c 100755 --- a/ci/test-conformance-gke.sh +++ b/ci/test-conformance-gke.sh @@ -346,23 +346,59 @@ function run_conformance() { kubectl delete -f ${GIT_CHECKOUT_DIR}/build/yamls/antrea-gke-node-init.yml --ignore-not-found=true || true } -function cleanup_cluster() { - echo '=== Cleaning up GKE cluster ${cluster} ===' - # Do not exit automatically on error (to enable retries below) - set +e - retry=5 +delete_gke_cluster_with_retry() { + local cluster_name=$1 + local zone_name=$2 + local retry=$3 + + echo "Attempting to delete GKE cluster: ${cluster_name}..." + while [[ "${retry}" -gt 0 ]]; do - gcloud container clusters delete ${CLUSTER} --zone ${GKE_ZONE} + gcloud container clusters delete "${cluster_name}" --zone "${zone_name}" --quiet if [[ $? -eq 0 ]]; then - break + echo "Successfully deleted GKE cluster ${cluster_name}." + return 0 fi sleep 10 retry=$((retry-1)) done - if [[ "${retry}" -eq 0 ]]; then - echo "=== Failed to delete GKE cluster ${CLUSTER}! ===" - exit 1 + + echo "Cluster deletion failed after ${retries} attempts." + return 1 +} + +function cleanup_cluster() { + echo '=== Cleaning up GKE cluster ${cluster} ===' + # Do not exit automatically on error (to enable retries below) + set +e + retry=5 + delete_gke_cluster_with_retry "${CLUSTER}" "${GKE_ZONE}" "${retry}" + initial_delete_success=$? + + if [[ "${initial_delete_success}" -ne 0 ]]; then + echo "Initial cluster deletion failed. Attempting Node Pool cleanup..." + + NODE_POOLS=$(gcloud container node-pools list --cluster "${CLUSTER}" --zone "${GKE_ZONE}" --format="value(name)" 2>/dev/null || true) + + if [[ -n "${NODE_POOLS}" ]]; then + echo "Found Node Pools to delete: ${NODE_POOLS}" + for node_pool in ${NODE_POOLS}; do + echo "Deleting Node Pool: ${node_pool}..." + gcloud container node-pools delete "${node_pool}" --cluster "${CLUSTER}" --zone "${GKE_ZONE}" --quiet || echo "Warning: Failed to initiate deletion for node pool ${node_pool}. Continuing..." + done + + else + echo "No separate Node Pools found or failed to list them. Moving to final cluster delete." fi + + echo "Final attempt to delete GKE cluster ${CLUSTER} after Node Pool cleanup." + delete_gke_cluster_with_retry "${CLUSTER}" "${GKE_ZONE}" 1 + + if [[ $? -ne 0 ]]; then + echo "=== Failed to delete GKE cluster ${CLUSTER} even after Node Pool cleanup. ===" + exit 1 + fi + rm -f ${KUBECONFIG_PATH}/kubeconfig set -e echo "=== Cleanup cluster ${CLUSTER} succeeded ==="