Skip to content

Commit 1d1433f

Browse files
committed
chore: added cluster bringup
1 parent ae1d7b8 commit 1d1433f

File tree

4 files changed

+198
-11
lines changed

4 files changed

+198
-11
lines changed

.github/workflows/integration-gcp.yml

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@ jobs:
3939
SERVICE_ACCOUNT: "[email protected]"
4040
PROJECT_ID: "proj-dgxc-nvsentinel"
4141
steps:
42+
# Checkout Repo
4243
- name: Checkout
4344
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
4445

46+
# Configure GCP AuthN
4547
- name: Get AuthN Token
4648
id: auth
4749
uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3
@@ -50,22 +52,14 @@ jobs:
5052
workload_identity_provider: ${{ env.IDENTITY_PROVIDER }}
5153
service_account: ${{ env.SERVICE_ACCOUNT }}
5254

55+
# Copy Images to GCP Artifact Registry
5356
- name: Authenticate to GCP Artifact Registry
5457
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
5558
with:
5659
registry: ${{ env.TARGET_REG }}
5760
username: oauth2accesstoken
5861
password: ${{ steps.auth.outputs.access_token }}
5962

60-
- name: Setup gcloud CLI
61-
uses: google-github-actions/setup-gcloud@aa5489c8933f4cc7a4f7d45035b3b1440c9c10db # v3.0.1
62-
with:
63-
version: '>= 543.0.0'
64-
65-
- name: Show gcloud CLI Info
66-
run: |
67-
gcloud info
68-
6963
- name: Install crane
7064
shell: bash
7165
env:
@@ -90,12 +84,36 @@ jobs:
9084
env:
9185
CI_COMMIT_REF_NAME: ${{ env.IMAGE_TAG }}
9286
run: |
93-
./scripts/build-image-list.sh
87+
scripts/build-image-list.sh
9488
cat versions.txt
9589
9690
- name: Copy Images to GCP Artifact Registry
9791
shell: bash
9892
env:
9993
TARGET_REG: "${{ env.TARGET_REG }}/${{ env.PROJECT_ID }}/${{ env.TARGET_REPO }}"
10094
run: |
101-
./scripts/copy-images.sh "$TARGET_REG" versions.txt
95+
scripts/copy-images.sh "$TARGET_REG" versions.txt
96+
97+
# Create GKE Cluster
98+
- name: Setup gcloud CLI
99+
uses: google-github-actions/setup-gcloud@aa5489c8933f4cc7a4f7d45035b3b1440c9c10db # v3.0.1
100+
with:
101+
version: '>= 543.0.0'
102+
103+
- name: Show gcloud CLI Info
104+
run: |
105+
gcloud info
106+
107+
- name: Create Cluster
108+
shell: bash
109+
env:
110+
TARGET_REG: "${{ env.TARGET_REG }}/${{ env.PROJECT_ID }}/${{ env.TARGET_REPO }}"
111+
run: |
112+
scripts/gcp-cluster-up.sh
113+
114+
- name: Destroy Cluster
115+
shell: bash
116+
env:
117+
TARGET_REG: "${{ env.TARGET_REG }}/${{ env.PROJECT_ID }}/${{ env.TARGET_REPO }}"
118+
run: |
119+
scripts/gcp-cluster-down.sh

scripts/gcp-cluster-down.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
set -euo pipefail
19+
20+
DIR="$(dirname "$0")"
21+
. "${DIR}/gcp-cluster-env.sh"
22+
23+
echo "Deleting GKE cluster: $CLUSTER_NAME in region $REGION"
24+
25+
# Delete regional GKE cluster
26+
gcloud container clusters delete "$CLUSTER_NAME" \
27+
--region="$REGION" \
28+
--quiet
29+
30+
echo "✅ Cluster deletion complete!"

scripts/gcp-cluster-env.sh

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
set -euo pipefail
19+
20+
# validation
21+
gcloud=$(which gcloud) || ( echo "gcloud not found" && exit 1 )
22+
23+
# Check gcloud is authenticated.
24+
ACCOUNT=$(gcloud auth list --filter=status:ACTIVE --format="value(account)")
25+
if [[ -z "${ACCOUNT}" ]]; then
26+
echo "Run 'gcloud auth login' to authenticate to GCP first."
27+
exit 1
28+
fi;
29+
30+
# Check project is set
31+
export PROJECT_ID=$(gcloud config list --format 'value(core.project)')
32+
if [[ -z "${PROJECT_ID}" ]]; then
33+
echo "`gcloud config set project YOUR_PROJECT_ID` note set."
34+
exit 1
35+
fi;
36+
37+
# Check region is set
38+
export REGION=$(gcloud config list --format 'value(compute.region)')
39+
if [[ -z "${REGION}" ]]; then
40+
echo "Warning: \`gcloud config set compute/region YOUR_REGION\` not set, using default."
41+
export REGION="us-central1"
42+
fi
43+
44+
# Config
45+
export CLUSTER_NAME="${CLUSTER_NAME:-validation}"
46+
export CLUSTER_CHANNEL="${CLUSTER_CHANNEL:-regular}"
47+
export SYSTEM_NODE_TYPE="${SYSTEM_NODE_TYPE:-e2-standard-4}"
48+
export SYSTEM_NODE_COUNT="${SYSTEM_NODE_COUNT:-3}"
49+
50+
# SERVICE_ACCOUNT is optional - set by workflow or provide manually
51+
export SERVICE_ACCOUNT="${SERVICE_ACCOUNT:-}"
52+
53+
# Print variables
54+
cat << EOF
55+
56+
Configuration:
57+
PROJECT_ID: ${PROJECT_ID}
58+
ACCOUNT: ${ACCOUNT}
59+
REGION: ${REGION}
60+
CLUSTER_NAME: ${CLUSTER_NAME}
61+
CLUSTER_CHANNEL: ${CLUSTER_CHANNEL}
62+
NODE_TYPE: ${SYSTEM_NODE_TYPE}
63+
NODE_COUNT: ${SYSTEM_NODE_COUNT}
64+
SERVICE_ACCOUNT: ${SERVICE_ACCOUNT:-<not set>}
65+
66+
EOF

scripts/gcp-cluster-up.sh

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
set -euo pipefail
19+
20+
DIR="$(dirname "$0")"
21+
. "${DIR}/gcp-cluster-env.sh"
22+
23+
# Assumptions:
24+
# - gcloud is installed and configured
25+
# - OIDC configured (see https://github.com/mchmarny/oidc-for-gcp-using-terraform)
26+
27+
echo "Creating GKE cluster..."
28+
29+
# Create regional cluster
30+
gcloud container clusters create "$CLUSTER_NAME" \
31+
--scopes=cloud-platform \
32+
--disk-size="200" \
33+
--disk-type="pd-standard" \
34+
--enable-cloud-logging \
35+
--enable-cloud-monitoring \
36+
--enable-image-streaming \
37+
--enable-ip-alias \
38+
--enable-shielded-nodes \
39+
--enable-autorepair \
40+
--enable-network-policy \
41+
--image-type="COS_CONTAINERD" \
42+
--labels=source=github,environment=validation \
43+
--logging=SYSTEM,WORKLOAD \
44+
--machine-type="$SYSTEM_NODE_TYPE" \
45+
--monitoring=SYSTEM \
46+
--num-nodes="$SYSTEM_NODE_COUNT" \
47+
--region="$REGION" \
48+
--release-channel="$CLUSTER_CHANNEL" \
49+
--workload-metadata="GKE_METADATA" \
50+
--workload-pool="${PROJECT_ID}.svc.id.goog" \
51+
--addons=HttpLoadBalancing,HorizontalPodAutoscaling
52+
53+
# Get cluster version
54+
echo "Cluster version:"
55+
gcloud container clusters describe "$CLUSTER_NAME" \
56+
--region="$REGION" \
57+
--format="value(currentMasterVersion)"
58+
59+
# Create policy binding between service account and k8s service account (optional)
60+
if [[ -n "${SERVICE_ACCOUNT}" ]]; then
61+
echo "Creating IAM policy binding for service account..."
62+
gcloud iam service-accounts add-iam-policy-binding "${SERVICE_ACCOUNT}" \
63+
--member="serviceAccount:${PROJECT_ID}.svc.id.goog[cnrm-system/cnrm-controller-manager]" \
64+
--role="roles/iam.workloadIdentityUser"
65+
else
66+
echo "SERVICE_ACCOUNT not set, skipping IAM policy binding"
67+
fi
68+
69+
# Get cluster credentials
70+
echo "Getting cluster credentials..."
71+
gcloud container clusters get-credentials "$CLUSTER_NAME" --region="$REGION"
72+
73+
echo "✅ Cluster creation complete!"

0 commit comments

Comments
 (0)