diff --git a/.github/scripts/cleanups-on-ci-failure.sh b/.github/scripts/cleanups-on-ci-failure.sh
new file mode 100755
index 00000000..3e645cf6
--- /dev/null
+++ b/.github/scripts/cleanups-on-ci-failure.sh
@@ -0,0 +1,324 @@
+#!/bin/bash
+
+# Function to check if a command exists
+command_exists() {
+  command -v "$1" >/dev/null 2>&1
+}
+
+# Find the Git repository root directory
+find_repo_root() {
+  local dir="$PWD"
+  while [[ "$dir" != "/" ]]; do
+    if [[ -d "$dir/.git" ]]; then
+      echo "$dir"
+      return 0
+    fi
+    dir="$(dirname "$dir")"
+  done
+  
+  echo "Error: Could not find repository root. Make sure you're running this script from within a Git repository."
+  exit 1
+}
+
+# check if AWS CLI is available
+if ! command_exists aws; then
+  echo "Error: AWS CLI is not installed. Please install it and try again."
+  exit 1
+fi
+
+# check if Git is available (for finding repo root)
+if ! command_exists git; then
+  echo "Error: Git is not installed. Please install it and try again."
+  exit 1
+fi
+
+# check if Terraform is available
+if ! command_exists terraform; then
+  echo "Error: Terraform is not installed. Please install it and try again."
+  exit 1
+fi
+
+# check if kubectl is available
+if ! command_exists kubectl; then
+  echo "Warning: kubectl is not installed - needed for Kubernetes resource cleanup."
+  exit 1
+fi
+
+# check if jq is available (for JSON parsing)
+if ! command_exists jq; then
+  echo "Warning: jq is not installed. It's recommended for better error handling."
+fi
+
+# AWS environment variables that might interfere with AWS CLI
+AWS_ENV_VARS=(
+  "AWS_ACCESS_KEY_ID"
+  "AWS_SECRET_ACCESS_KEY"
+  "AWS_SESSION_TOKEN"
+  "AWS_SECURITY_TOKEN"
+  "AWS_DEFAULT_REGION"
+)
+
+ENV_VARS_SET=false
+for var in "${AWS_ENV_VARS[@]}"; do
+  if [ -n "${!var}" ]; then
+    echo "Warning: $var is set, which may interfere with AWS SSO login."
+    ENV_VARS_SET=true
+  fi
+done
+
+if [ "$ENV_VARS_SET" = true ]; then
+  echo "Please unset these variables before continuing or they may interfere with the AWS profile."
+  read -p "Do you want to continue anyway? (y/n): " continue_anyway
+  if [ "$continue_anyway" != "y" ] && [ "$continue_anyway" != "Y" ]; then
+    echo "Exiting script. Please unset the environment variables and try again."
+    exit 1
+  fi
+  echo "Continuing with environment variables set..."
+else
+  echo "No interfering AWS environment variables detected."
+fi
+
+# AWS profile self-hosted and us-west-1 (that's where all the test infra is spun up)
+export AWS_PROFILE=self-hosted
+echo "AWS profile set to: $AWS_PROFILE"
+
+# AWS SSO login ....
+echo "Performing AWS SSO login..."
+aws sso login --sso-session devzero --profile $AWS_PROFILE
+if [ $? -ne 0 ]; then
+  echo "Error: AWS SSO login failed. Please try again."
+  exit 1
+fi
+
+# S3 bucket for terraform state
+S3_BUCKET="dsh-tf-state"
+echo "Using S3 bucket: $S3_BUCKET"
+
+export AWS_REGION=us-west-1
+echo "AWS region set to: $AWS_REGION"
+
+# find self-hosted repository root
+REPO_ROOT=$(find_repo_root)
+echo "Repository root found at: $REPO_ROOT"
+
+# define terraform state paths relative to repo root
+BASE_CLUSTER_PATH="$REPO_ROOT/terraform/examples/aws/base-cluster"
+CLUSTER_EXTENSIONS_PATH="$REPO_ROOT/terraform/examples/aws/cluster-extensions"
+
+# Verify terraform directories exist
+if [ ! -d "$BASE_CLUSTER_PATH" ]; then
+  echo "Error: Base cluster directory '$BASE_CLUSTER_PATH' does not exist."
+  exit 1
+fi
+
+if [ ! -d "$CLUSTER_EXTENSIONS_PATH" ]; then
+  echo "Error: Cluster extensions directory '$CLUSTER_EXTENSIONS_PATH' does not exist."
+  exit 1
+fi
+
+echo "Terraform directories verified."
+
+# check and clean up existing state files
+check_and_clean_tfstate() {
+  local dir="$1"
+  local files_exist=false
+  
+  # Check for any terraform state files
+  if ls "$dir"/terraform.tfstate* 2>/dev/null || ls "$dir"/.terraform.lock.hcl 2>/dev/null || [ -d "$dir"/.terraform ]; then
+    echo "Existing Terraform state files found in: $dir"
+    ls -la "$dir"/terraform.tfstate* "$dir"/.terraform.lock.hcl 2>/dev/null
+    if [ -d "$dir/.terraform" ]; then
+      echo "Directory $dir/.terraform exists"
+    fi
+    files_exist=true
+  fi
+  
+  if [ "$files_exist" = true ]; then
+    read -p "Do you want to clean up these files before downloading? (y/n): " cleanup
+    if [ "$cleanup" = "y" ] || [ "$cleanup" = "Y" ]; then
+      echo "Removing Terraform state files from $dir"
+      rm -f "$dir"/terraform.tfstate*
+      rm -f "$dir"/.terraform.lock.hcl
+      rm -rf "$dir"/.terraform
+      echo "Cleanup complete."
+    else
+      echo "Warning: Existing files may be overwritten or cause conflicts."
+    fi
+  else
+    echo "No existing Terraform state files found in: $dir"
+  fi
+}
+
+# Function to configure kubectl for a cluster
+configure_kubectl() {
+  local cluster_name="$1"
+  
+  echo "Configuring kubectl for cluster: $cluster_name"
+  
+  # Update kubeconfig for the cluster
+  aws eks update-kubeconfig --name "$cluster_name" --profile "$AWS_PROFILE" --region "$AWS_REGION"
+  if [ $? -ne 0 ]; then
+    echo "Warning: Failed to update kubeconfig for cluster $cluster_name"
+    return 1
+  fi
+
+  aws eks create-access-entry --cluster-name "$cluster_name" --profile "$AWS_PROFILE" --region "$AWS_REGION" --principal-arn arn:aws:iam::484907513542:role/aws-reserved/sso.amazonaws.com/us-west-2/AWSReservedSSO_AWSAdministratorAccess_cdb3218a34dc613b --type STANDARD
+  if [ $? -ne 0 ]; then
+    echo "Warning: Failed to create access-entry for cluster $cluster_name"
+    return 1
+  fi
+
+  aws eks associate-access-policy --cluster-name "$cluster_name" --profile "$AWS_PROFILE" --region "$AWS_REGION" --principal-arn arn:aws:iam::484907513542:role/aws-reserved/sso.amazonaws.com/us-west-2/AWSReservedSSO_AWSAdministratorAccess_cdb3218a34dc613b --access-scope type=cluster --policy-arn arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy
+  if [ $? -ne 0 ]; then
+    echo "Warning: Failed to associate policy with access-entry for cluster $cluster_name"
+    return 1
+  fi
+
+  # Test kubectl connection
+  echo "Testing kubectl connection..."
+  kubectl get nodes
+  if [ $? -ne 0 ]; then
+    echo "Warning: kubectl cannot connect to cluster $cluster_name"
+    return 1
+  fi
+  
+  echo "Successfully configured kubectl for cluster $cluster_name"
+  return 0
+}
+
+# Function to run terraform destroy with explicit variable
+run_terraform_destroy() {
+  local dir="$1"
+  local name="$2"
+  local cluster_name="$3"
+  
+  echo "=== Running terraform destroy in $name ==="
+  
+  # Change to the directory and run terraform destroy
+  cd "$dir"
+  
+  # Initialize terraform if needed
+  echo "Initializing Terraform..."
+  terraform init
+  if [ $? -ne 0 ]; then
+    echo "Error: Terraform init failed in $name."
+    return 1
+  fi
+  
+  # Run terraform plan to see what would be destroyed
+  echo "Running terraform plan with explicit cluster_name=\"$cluster_name\"..."
+  terraform plan -destroy -var="cluster_name=$cluster_name"
+  
+  # Confirm before destroying
+  read -p "Do you want to proceed with terraform destroy? (y/n): " proceed
+  if [ "$proceed" != "y" ] && [ "$proceed" != "Y" ]; then
+    echo "Skipping destroy operation."
+    return 0
+  fi
+  
+  # Run terraform destroy with explicit variable
+  echo "Running terraform destroy with explicit cluster_name=\"$cluster_name\"..."
+  terraform destroy -auto-approve -var="cluster_name=$cluster_name"
+  if [ $? -ne 0 ]; then
+    echo "Error: Terraform destroy failed in $name."
+    return 1
+  fi
+  
+  echo "Successfully destroyed resources in $name."
+  return 0
+}
+
+# get list of failed job identifiers from user
+echo "Enter a comma-separated list of job identifiers to process (check env_var 'JOB_IDENTIFIER' in a failed GitHub Action workflow run; e.g.: 'gh-1-30-al2023-c74f'):"
+read -r dir_list
+
+# convert comma-separated list to array cuz loop
+IFS=',' read -ra DIRS <<< "$dir_list"
+
+# Track overall success
+CLEANUP_SUCCESS=true
+
+# Process each job identifier
+for dir in "${DIRS[@]}"; do
+  # trim whitespace cuz user's input random shit
+  dir=$(echo "$dir" | xargs)
+  echo "=========================================================="
+  echo "Processing job identifier: $dir"
+  echo "=========================================================="
+  
+  # Process cluster-extensions first (child resources)
+  echo "Working on cluster-extensions..."
+  
+  # Check and clean up cluster-extensions directory
+  echo "Checking cluster-extensions directory for existing state files..."
+  check_and_clean_tfstate "$CLUSTER_EXTENSIONS_PATH"
+  
+  # pull terraform.tfstate for cluster-extensions
+  echo "Downloading terraform state for $dir/cluster-extensions"
+  aws s3 cp "s3://$S3_BUCKET/$dir/cluster-extensions/terraform.tfstate" "$CLUSTER_EXTENSIONS_PATH/terraform.tfstate"
+  if [ $? -ne 0 ]; then
+    echo "Warning: Failed to download terraform.tfstate for $dir/cluster-extensions"
+    CLEANUP_SUCCESS=false
+  else
+    echo "Successfully downloaded terraform.tfstate for $dir/cluster-extensions"
+    
+    # Check and clean up any existing .terraform directory
+    if [ -d "$CLUSTER_EXTENSIONS_PATH/.terraform" ]; then
+      echo "Removing existing .terraform directory to ensure clean initialization..."
+      rm -rf "$CLUSTER_EXTENSIONS_PATH/.terraform"
+    fi
+    
+    # configure kubectl for this cluster
+    configure_kubectl "$dir"
+    
+    # run terraform destroy for cluster-extensions
+    if ! run_terraform_destroy "$CLUSTER_EXTENSIONS_PATH" "cluster-extensions" "$dir"; then
+      CLEANUP_SUCCESS=false
+    fi
+  fi
+  
+  # Now process base-cluster (parent resources)
+  echo "Working on base-cluster..."
+  
+  # Check and clean up base-cluster directory
+  echo "Checking base-cluster directory for existing state files..."
+  check_and_clean_tfstate "$BASE_CLUSTER_PATH"
+  
+  # pull terraform.tfstate for base-cluster
+  echo "Downloading terraform state for $dir/base-cluster"
+  aws s3 cp "s3://$S3_BUCKET/$dir/base-cluster/terraform.tfstate" "$BASE_CLUSTER_PATH/terraform.tfstate"
+  if [ $? -ne 0 ]; then
+    echo "Warning: Failed to download terraform.tfstate for $dir/base-cluster"
+    CLEANUP_SUCCESS=false
+  else
+    echo "Successfully downloaded terraform.tfstate for $dir/base-cluster"
+    
+    # Check and clean up any existing .terraform directory
+    if [ -d "$BASE_CLUSTER_PATH/.terraform" ]; then
+      echo "Removing existing .terraform directory to ensure clean initialization..."
+      rm -rf "$BASE_CLUSTER_PATH/.terraform"
+    fi
+    
+    # Run terraform destroy for base-cluster
+    if ! run_terraform_destroy "$BASE_CLUSTER_PATH" "base-cluster" "$dir"; then
+      CLEANUP_SUCCESS=false
+    fi
+  fi
+  
+  echo "Completed processing job identifier: $dir"
+  echo "----------------------------------------------------------"
+done
+
+# Return to the original directory
+cd "$REPO_ROOT"
+
+# Final status report
+echo ""
+echo "=========================================================="
+if [ "$CLEANUP_SUCCESS" = true ]; then
+  echo "✅ All cleanup operations completed successfully!"
+else
+  echo "⚠️ Some cleanup operations failed. Please check the logs above for details."
+  echo "You may need to manually inspect and clean up some resources."
+fi
+echo "=========================================================="
diff --git a/.github/workflows/dsh-testing.yaml b/.github/workflows/dsh-testing.yaml
index dd0b041d..04e1bf12 100644
--- a/.github/workflows/dsh-testing.yaml
+++ b/.github/workflows/dsh-testing.yaml
@@ -23,9 +23,20 @@ on:
 jobs:
   setup-and-test:
     runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        # TODO (debo/zvonimir)
+        # eks_version: ["1.25", "1.30", "1.31"]
+        eks_version: ["1.30", "1.31"]
+      fail-fast: false
+
+    name: '(base_image: ${{ github.event.inputs.base_image }}) (eks_version: ${{ matrix.eks_version }})'
+
     permissions:
       id-token: write
       contents: read
+
     steps:
       - name: Checkout Repository
         uses: actions/checkout@v4
@@ -37,12 +48,6 @@ jobs:
           aws-region: us-west-1
           role-duration-seconds: 7200
 
-      - name: Clone DevZero Self-Hosted Repository
-        env:
-          GH_PAT: ${{ secrets.GH_TOKEN }}
-        run: |
-          git clone https://$GH_PAT@github.com/devzero-inc/self-hosted.git
-
       - name: Set up Terraform
         uses: hashicorp/setup-terraform@v3
         with:
@@ -56,16 +61,21 @@ jobs:
       - name : Add SHORT_SHA Environment Variable
         id   : short-sha
         shell: bash      
-        run  : echo "SHORT_SHA=`git rev-parse --short HEAD`" >> $GITHUB_ENV
+        run  : |
+          # creating a 4-char long SHA
+          echo "SHORT_SHA=`git rev-parse --short=3 HEAD`" >> $GITHUB_ENV
       
       - name : Generate unique job identifier
         id   : job-identifier
         shell: bash      
-        run  : echo "JOB_IDENTIFIER=gh-ci-${{ github.event.inputs.base_image }}-${SHORT_SHA}" >> $GITHUB_ENV
+        run  : |
+          # replace `.` in k8s version with `-` so that the same job identifier can be used in various places
+          K8S_VERSION=$(echo ${{ matrix.eks_version }} | sed 's/\./-/')
+          echo "JOB_IDENTIFIER=gh-${K8S_VERSION}-${{ github.event.inputs.base_image }}-${SHORT_SHA}" >> $GITHUB_ENV
 
       - name: Add Backend Override (Base Cluster)
         run: |
-          cd self-hosted/terraform/examples/aws/base-cluster
+          cd terraform/examples/aws/base-cluster
           cat <<EOF > backend_override.tf
           terraform {
             backend "s3" {
@@ -76,9 +86,23 @@ jobs:
           }
           EOF
 
+      - name: Set EKS version v${{ matrix.eks_version }} (Base Cluster)
+        run: |
+          echo "" >> terraform/examples/aws/base-cluster/terraform.tfvars
+          echo "# Setting eks cluster version" >> terraform/examples/aws/base-cluster/terraform.tfvars
+          echo "cluster_version = \"${{ matrix.eks_version }}\"" >> terraform/examples/aws/base-cluster/terraform.tfvars
+
+          # DevZero currently doesnt publish a base AMI for Kubernetes 1.25, but local testing has indicated that we can use the 1.30 version
+          # AMIs available https://us-west-1.console.aws.amazon.com/ec2/home?region=us-west-1#Images:visibility=public-images;imageName=:devzero;v=3
+          # TODO (debo): this is currently kind of a hack to make sure that the 1.25 test uses the 1.30 node
+          if [ "${{ matrix.eks_version }}" == "1.25" ]; then
+            echo "# Using ami_version 1.30 for EKS 1.25 as a workaround" >> terraform/examples/aws/base-cluster/terraform.tfvars
+            echo "ami_version = \"1.30\"" >> terraform/examples/aws/base-cluster/terraform.tfvars
+          fi
+  
       - name: Initialize and Apply Terraform (Base Cluster)
         run: |
-          cd self-hosted/terraform/examples/aws/base-cluster
+          cd terraform/examples/aws/base-cluster
           terraform init
           if [ "${{ github.event.inputs.base_image }}" == "al2023" ]; then
             terraform apply -auto-approve -var="cluster_name=$JOB_IDENTIFIER"
@@ -88,7 +112,7 @@ jobs:
 
       - name: Update Cluster-Extensions tfvars
         run: |
-          cat <<EOT > self-hosted/terraform/examples/aws/cluster-extensions/terraform.tfvars
+          cat <<EOT > terraform/examples/aws/cluster-extensions/terraform.tfvars
           region = "us-west-1"
           enable_cluster_autoscaler = false
           cluster_name = "$JOB_IDENTIFIER"
@@ -97,7 +121,7 @@ jobs:
 
       - name: Add Backend Override (Cluster Extensions)
         run: |
-          cd self-hosted/terraform/examples/aws/cluster-extensions
+          cd terraform/examples/aws/cluster-extensions
           cat <<EOF > backend_override.tf
           terraform {
             backend "s3" {
@@ -110,7 +134,7 @@ jobs:
 
       - name: Initialize and Apply Cluster-Extensions
         run: |
-          cd self-hosted/terraform/examples/aws/cluster-extensions
+          cd terraform/examples/aws/cluster-extensions
           terraform init
           terraform apply -auto-approve
 
@@ -120,7 +144,7 @@ jobs:
 
       - name: Deploy Control Plane Dependencies (and modify domains)
         run: |
-          cd self-hosted/charts/dz-control-plane-deps
+          cd charts/dz-control-plane-deps
           find values -type f -exec sed -i'.bak' "s/example\.com/$JOB_IDENTIFIER\.ci\.selfzero\.net/g" {} \; && find values -name "*.bak" -delete
           make install
 
@@ -132,14 +156,14 @@ jobs:
           # also setting image.pullsecrets to empty to make sure that each of the deployments dont try to pull their relevant OCI images from this registry
           # backend license key is ... needed
 
-          yq e '.credentials.enable = false | .backend.licenseKey = strenv(BACKEND_LICENSE_KEY) | .image.pullSecrets = []' -i self-hosted/charts/dz-control-plane/values.yaml
+          yq e '.credentials.enable = false | .backend.licenseKey = strenv(BACKEND_LICENSE_KEY) | .image.pullSecrets = []' -i charts/dz-control-plane/values.yaml
 
       - name: Deploy DevZero Control Plane (after configuring kubernetes to use dockerhub creds, and patching all the deployments to point to the right domain)
         env:
           DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
           DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          cd self-hosted/charts/dz-control-plane
+          cd charts/dz-control-plane
           make add-docker-creds
           find . -name "values.yaml" -exec sed -i'.bak' "s/example\.com/$JOB_IDENTIFIER\.ci\.selfzero\.net/g" {} \; && find . -name "values.yaml.bak" -delete
           make install
@@ -148,20 +172,20 @@ jobs:
         run: |
           echo -e "\nPods in namespace  devzero:"
           kubectl get pods -n devzero
-          chmod +x self-hosted/.github/scripts/dsh-pod-test.sh
-          self-hosted/.github/scripts/dsh-pod-test.sh
+          chmod +x .github/scripts/dsh-pod-test.sh
+          .github/scripts/dsh-pod-test.sh
           echo -e "\nIngress in namespace  devzero:"
           kubectl get ingress -n devzero
       
       - name: Deploy Data Plane Dependencies
         run: |
-          cd self-hosted/charts/dz-data-plane-deps
+          cd charts/dz-data-plane-deps
           find values -type f -exec sed -i'.bak' "s/example\.com/$JOB_IDENTIFIER\.ci\.selfzero\.net/g" {} \; && find values -name "*.bak" -delete
           make install
 
       - name: Deploy DevZero Data Plane
         run: |
-          cd self-hosted/charts/dz-data-plane
+          cd charts/dz-data-plane
           find . -name "values.yaml" -exec sed -i'.bak' "s/example\.com/$JOB_IDENTIFIER\.ci\.selfzero\.net/g" {} \; && find . -name "values.yaml.bak" -delete
           make install
 
@@ -173,37 +197,37 @@ jobs:
       - name: '[helm] Destroy data-plane'
         if: always()
         run: |
-          cd self-hosted/charts/dz-data-plane
+          cd charts/dz-data-plane
           make delete
       
       - name: '[helm] Destroy data-plane-deps'
         if: always()
         run: |
-          cd self-hosted/charts/dz-data-plane-deps
+          cd charts/dz-data-plane-deps
           make delete
 
       - name: '[helm] Destroy control-plane'
         if: always()
         run: |
-          cd self-hosted/charts/dz-control-plane
+          cd charts/dz-control-plane
           make delete
       
       - name: '[helm] Destroy control-plane-deps'
         if: always()
         run: |
-          cd self-hosted/charts/dz-control-plane-deps
+          cd charts/dz-control-plane-deps
           make delete
       
       - name: '[terraform] Destroy cluster-extensions'
         if: always()
         run: |
-          cd self-hosted/terraform/examples/aws/cluster-extensions
+          cd terraform/examples/aws/cluster-extensions
           terraform destroy -auto-approve
     
       - name: '[terraform] Destroy base-cluster'
         if: always()
         run: |
-          cd self-hosted/terraform/examples/aws/base-cluster
+          cd terraform/examples/aws/base-cluster
           terraform destroy -auto-approve
       
       - name: '[aws-cli] clean up volumes explicitly'
diff --git a/terraform/examples/aws/base-cluster/main.tf b/terraform/examples/aws/base-cluster/main.tf
index cec53913..8dbef33a 100644
--- a/terraform/examples/aws/base-cluster/main.tf
+++ b/terraform/examples/aws/base-cluster/main.tf
@@ -21,6 +21,9 @@ locals {
   vpc_dns_resolver = cidrhost(local.effective_vpc_cidr_block, 2)
   # Calculates the +2 host of the CIDR for VPN DNS resolving
 
+  # if ami_version is explicitly set, use that since the user wants to be specific about the AMI being used; if not, use the cluster_version
+  ami_version = length(var.ami_version) > 0 ? var.ami_version : var.cluster_version
+
 }
 
 data "aws_availability_zones" "available" {}
@@ -255,7 +258,7 @@ data "aws_ami" "devzero_amazon_eks_node_al2023" {
 
   filter {
     name   = "name"
-    values = ["devzero-amazon-eks-node-al2023-x86_64-standard-${var.cluster_version}-*"]
+    values = ["devzero-amazon-eks-node-al2023-x86_64-standard-${local.ami_version}-*"]
   }
   owners      = ["710271940431"] # Devzero public AMIs account
   most_recent = true
@@ -266,7 +269,7 @@ data "aws_ami" "devzero_ubuntu_eks_node_22_04" {
   
   filter {
     name   = "name"
-    values = ["devzero-ubuntu-eks-node-22.04-x86_64-standard-${var.cluster_version}-*"]
+    values = ["devzero-ubuntu-eks-node-22.04-x86_64-standard-${local.ami_version}-*"]
   }
   owners      = ["484907513542"]
   most_recent = true
diff --git a/terraform/examples/aws/base-cluster/variables.tf b/terraform/examples/aws/base-cluster/variables.tf
index c3e7dcb4..eb41a28e 100644
--- a/terraform/examples/aws/base-cluster/variables.tf
+++ b/terraform/examples/aws/base-cluster/variables.tf
@@ -161,6 +161,12 @@ variable "cluster_version" {
   default     = "1.30"
 }
 
+variable "ami_version" {
+  type        = string
+  description = "AMI version to use for nodes in the EKS deployment"
+  default     = ""
+}
+
 variable "region" {
   type        = string
   description = "AWS region"