@@ -35,6 +35,24 @@ BASE_TEMPLATE_FILE="${SCRIPT_DIR}/eks-cluster-config.yaml.template"
3535GPU_TEMPLATE_FILE=" ${SCRIPT_DIR} /eks-gpu-nodegroup-config.yaml.template"
3636CONFIG_FILE=" /tmp/eks-cluster-config-${CLUSTER_NAME} .yaml"
3737
38+ # Validate that required tools are installed
39+ if ! command -v eksctl & > /dev/null; then
40+ error " eksctl could not be found. Please install eksctl to proceed."
41+ fi
42+
43+ if ! command -v envsubst & > /dev/null; then
44+ error " envsubst could not be found. Please install gettext package."
45+ fi
46+
47+ # Validate the template files exist
48+ if [[ ! -f " $BASE_TEMPLATE_FILE " ]]; then
49+ error " Base template file not found: $BASE_TEMPLATE_FILE "
50+ fi
51+
52+ if [[ ! -f " $GPU_TEMPLATE_FILE " ]]; then
53+ error " GPU template file not found: $GPU_TEMPLATE_FILE "
54+ fi
55+
3856create_base_cluster () {
3957 log " Creating base EKS cluster (CPU nodes only)..."
4058 log " This will take approximately 15-20 minutes..."
@@ -95,8 +113,20 @@ create_gpu_subnet() {
95113
96114 log " Creating private subnet in $az ..." >&2
97115
98- local subnet_cidr=" 192.168.128.0/19"
99- log " Using CIDR: $subnet_cidr " >&2
116+ # Find available CIDR block to avoid conflicts
117+ local subnet_cidr
118+ for i in {128..192}; do
119+ subnet_cidr=" 192.168.${i} .0/19"
120+ if ! aws ec2 describe-subnets \
121+ --filters " Name=vpc-id,Values=$vpc_id " \
122+ " Name=cidr-block,Values=$subnet_cidr " \
123+ --region " $AWS_REGION " \
124+ --query ' Subnets[0]' \
125+ --output text 2> /dev/null | grep -q . ; then
126+ break
127+ fi
128+ done
129+ log " Using available CIDR: $subnet_cidr " >&2
100130
101131 local subnet_id
102132 subnet_id=$( aws ec2 create-subnet \
@@ -117,7 +147,7 @@ create_gpu_subnet() {
117147 local route_table_id
118148 route_table_id=$( aws ec2 describe-route-tables \
119149 --filters " Name=vpc-id,Values=$vpc_id " \
120- " Name=tag:Name,Values=*rivate *" \
150+ " Name=tag:Name,Values=*Private *" \
121151 --query ' RouteTables[0].RouteTableId' \
122152 --output text \
123153 --region " $AWS_REGION " 2> /dev/null || echo " None" )
@@ -150,8 +180,16 @@ generate_cluster_config() {
150180 export CAPACITY_RESERVATION_ID
151181 export GPU_SUBNET_ID=" $gpu_subnet_id "
152182
183+ # Generate base config
153184 envsubst < " $GPU_TEMPLATE_FILE " > " $CONFIG_FILE "
154185
186+ # Remove capacity reservation section if not specified
187+ if [[ -z " $CAPACITY_RESERVATION_ID " ]]; then
188+ log " Removing capacity reservation section (no reservation specified)"
189+ # Remove the entire capacityReservation block
190+ sed -i ' /capacityReservation:/,/capacityReservationID:/d' " $CONFIG_FILE "
191+ fi
192+
155193 log " Cluster configuration generated: $CONFIG_FILE "
156194}
157195
0 commit comments