@@ -35,6 +35,7 @@ DRIVER_NAME=$(from_versions_mk "DRIVER_NAME")
3535
3636NETWORK_NAME=" ${DRIVER_NAME} -net"
3737CLUSTER_NAME=" ${DRIVER_NAME} -cluster"
38+ NODE_VERSION=" 1.31.1"
3839
3940# # Create the Network for the cluster
4041gcloud compute networks create " ${NETWORK_NAME} " \
@@ -52,16 +53,18 @@ gcloud container clusters create "${CLUSTER_NAME}" \
5253 --no-enable-autorepair \
5354 --no-enable-autoupgrade \
5455 --region us-west1 \
56+ --num-nodes " 1" \
5557 --network " ${NETWORK_NAME} " \
56- --node-labels=nvidia.com/dra.controller=true
58+ --cluster-version " ${NODE_VERSION} " \
59+ --node-version " ${NODE_VERSION} "
5760
5861# Create t4 node pool
5962gcloud beta container node-pools create " pool-1" \
6063 --quiet \
6164 --project " ${PROJECT_NAME} " \
6265 --cluster " ${CLUSTER_NAME} " \
6366 --region " us-west1" \
64- --node-version " 1.27.3-gke.100 " \
67+ --node-version " ${NODE_VERSION} " \
6568 --machine-type " n1-standard-8" \
6669 --accelerator " type=nvidia-tesla-t4,count=1" \
6770 --image-type " UBUNTU_CONTAINERD" \
@@ -79,15 +82,15 @@ gcloud beta container node-pools create "pool-1" \
7982 --max-surge-upgrade 1 \
8083 --max-unavailable-upgrade 0 \
8184 --node-locations " us-west1-a" \
82- --node-labels=gke-no-default-nvidia-gpu-device-plugin=true,nvidia.com/gpu=present,nvidia.com/dra.kubelet-plugin =true
85+ --node-labels=gke-no-default-nvidia-gpu-device-plugin=true,nvidia.com/gpu.present =true
8386
8487# Create v100 node pool
8588gcloud beta container node-pools create " pool-2" \
8689 --quiet \
8790 --project " ${PROJECT_NAME} " \
8891 --cluster " ${CLUSTER_NAME} " \
8992 --region " us-west1" \
90- --node-version " 1.27.3-gke.100 " \
93+ --node-version " ${NODE_VERSION} " \
9194 --machine-type " n1-standard-8" \
9295 --accelerator " type=nvidia-tesla-v100,count=1" \
9396 --image-type " UBUNTU_CONTAINERD" \
@@ -105,7 +108,7 @@ gcloud beta container node-pools create "pool-2" \
105108 --max-surge-upgrade 1 \
106109 --max-unavailable-upgrade 0 \
107110 --node-locations " us-west1-a" \
108- --node-labels=gke-no-default-nvidia-gpu-device-plugin=true,nvidia.com/gpu=present,nvidia.com/dra.kubelet-plugin =true
111+ --node-labels=gke-no-default-nvidia-gpu-device-plugin=true,nvidia.com/gpu.present =true
109112
110113# # Allow the GPU nodes access to the internet
111114gcloud compute routers create ${NETWORK_NAME} -nat-router \
@@ -126,10 +129,11 @@ gcloud compute routers nats create "${NETWORK_NAME}-nat-config" \
126129gcloud container clusters get-credentials " ${CLUSTER_NAME} " --location=" us-west1"
127130
128131# # Launch the nvidia-driver-installer daemonset to install the GPU drivers on any GPU nodes that come online:
132+ kubectl label node --overwrite -l nvidia.com/gpu.present=true cloud.google.com/gke-gpu-driver-version-
129133kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/ubuntu/daemonset-preloaded.yaml
130134
131135# # Create the nvidia namespace
132136kubectl create namespace nvidia
133137
134138# # Deploy a custom daemonset that prepares a node for use with DRA
135- kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-dra-driver/456d097feb452cca1351817bab2ccd0782e96c9f /demo/prepare-gke-nodes-for-dra.yaml
139+ kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-dra-driver/3498c9a91cb594af94c9e8d65177b131e380e116 /demo/prepare-gke-nodes-for-dra.yaml
0 commit comments