Skip to content
Closed
123 changes: 123 additions & 0 deletions .github/workflows/integration-gcp.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Integration Tests - GCP

on:
workflow_dispatch: {} # allow manual runs for testing
push:
branches:
- main
- feature/oidc-gcp

permissions:
contents: read
actions: read
id-token: write

jobs:
copy-images:
runs-on: ubuntu-latest
timeout-minutes: 30
env:
IMAGE_TAG: main-ddc3fc4
TARGET_REG: us-docker.pkg.dev
TARGET_REPO: nvsentinel
CRANE_VERSION: "0.20.6"
IDENTITY_PROVIDER: "projects/868575635057/locations/global/workloadIdentityPools/github-pool/providers/github-provider"
SERVICE_ACCOUNT: "[email protected]"
PROJECT_ID: "proj-dgxc-nvsentinel"
steps:
# Checkout Repo
- name: Checkout
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

# Configure GCP AuthN
- name: Get AuthN Token
id: auth
uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3
with:
token_format: access_token
workload_identity_provider: ${{ env.IDENTITY_PROVIDER }}
service_account: ${{ env.SERVICE_ACCOUNT }}

# Copy Images to GCP Artifact Registry
- name: Authenticate to GCP Artifact Registry
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
with:
registry: ${{ env.TARGET_REG }}
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}

- name: Install crane
shell: bash
env:
CRANE_VERSION: ${{ env.CRANE_VERSION }}
REPO_URL: "https://github.com/google/go-containerregistry"
run: |
set -euo pipefail
URL="$REPO_URL/releases/download/v${CRANE_VERSION}/go-containerregistry_Linux_x86_64.tar.gz"
curl -sSL "$URL" | sudo tar -xz -C /usr/local/bin crane
crane version

- name: Auth crane Source
run: |
echo "${{ secrets.GITHUB_TOKEN }}" | crane auth login ghcr.io --username=${{ github.actor }} --password-stdin

- name: Auth crane Target
run: |
echo "${{ steps.auth.outputs.access_token }}" | crane auth login ${{ env.TARGET_REG }} --username=oauth2accesstoken --password-stdin

- name: Build Image List
shell: bash
env:
CI_COMMIT_REF_NAME: ${{ env.IMAGE_TAG }}
run: |
scripts/build-image-list.sh
cat versions.txt

- name: Copy Images to GCP Artifact Registry
shell: bash
env:
TARGET_REG: "${{ env.TARGET_REG }}/${{ env.PROJECT_ID }}/${{ env.TARGET_REPO }}"
run: |
scripts/copy-images.sh "$TARGET_REG" versions.txt

# Create GKE Cluster
- name: Setup gcloud CLI
uses: google-github-actions/setup-gcloud@aa5489c8933f4cc7a4f7d45035b3b1440c9c10db # v3.0.1
with:
version: '>= 543.0.0'

- name: Show gcloud CLI Info
run: |
gcloud info

- name: Create Cluster
id: create-cluster
shell: bash
env:
TARGET_REG: "${{ env.TARGET_REG }}/${{ env.PROJECT_ID }}/${{ env.TARGET_REPO }}"
run: |
scripts/gcp-cluster-up.sh

# TODO: Add integration tests here that use the cluster

- name: Destroy Cluster
if: always() && steps.create-cluster.outcome != 'skipped'
shell: bash
env:
TARGET_REG: "${{ env.TARGET_REG }}/${{ env.PROJECT_ID }}/${{ env.TARGET_REPO }}"
run: |
scripts/gcp-cluster-down.sh
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -434,3 +434,6 @@ health-monitors/syslog-health-monitor/syslog-health-monitor
labeler/labeler
node-drainer/node-drainer
platform-connectors/platform-connectors

# Ignore generated credentials from google-github-actions/auth
gha-creds-*.json
184 changes: 184 additions & 0 deletions scripts/copy-images.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#!/usr/bin/env bash
#
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -euo pipefail

# Variables
TARGET_REG_URI="${1:-}"
IMAGE_LIST_FILE="${2:-versions.txt}"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Helper functions
log_info() {
echo -e "${BLUE}ℹ️ $*${NC}"
}

log_success() {
echo -e "${GREEN}✅ $*${NC}"
}

log_warning() {
echo -e "${YELLOW}⚠️ $*${NC}"
}

log_error() {
echo -e "${RED}❌ $*${NC}"
}

command_exists() {
command -v "$1" >/dev/null 2>&1
}

# Validate prerequisites
if ! command_exists crane; then
log_error "crane is not installed. Please install crane to proceed."
exit 1
fi

# Validate arguments
if [ -z "$TARGET_REG_URI" ]; then
log_error "Usage: $0 <target-registry-uri> [image-list-file]"
log_error "Example: $0 us-docker.pkg.dev/my-project/my-repo versions.txt"
exit 1
fi

if [ ! -f "$IMAGE_LIST_FILE" ]; then
log_error "Image list file not found: $IMAGE_LIST_FILE"
exit 1
fi

# Info
log_info "Source image list: $IMAGE_LIST_FILE"
log_info "Target registry URI: $TARGET_REG_URI"
log_info "Reading images from $IMAGE_LIST_FILE..."

# Count total images (excluding empty lines and comments)
TOTAL_IMAGES=$(grep -v '^#' "$IMAGE_LIST_FILE" | grep -v '^[[:space:]]*$' | wc -l | tr -d '[:space:]')
log_info "Found $TOTAL_IMAGES images to copy"

# Counters
SUCCESS_COUNT=0
FAILURE_COUNT=0
SKIPPED_COUNT=0

# Copy single image function
copy_image() {
local src_image_uri=$1
local image_num=$2

log_info "[$image_num/$TOTAL_IMAGES] Processing: $src_image_uri"

# Extract image name and tag from URI
# Format: registry/org/image:tag
local image_base=$(echo "$src_image_uri" | sed -E 's|^(.*/)([^/]+):(.*)$|\2|')
local image_tag=$(echo "$src_image_uri" | sed -E 's|^.*:(.*)$|\1|')

# Build target URI
local target_uri="$TARGET_REG_URI/$image_base:$image_tag"

log_info " Source: $src_image_uri"
log_info " Target: $target_uri"

# Get source digest
local src_digest
if ! src_digest=$(crane digest "$src_image_uri" 2>&1); then
log_error " Failed to get digest for $src_image_uri: $src_digest"
return 1
fi

log_info " Source digest: $src_digest"

# Check if image already exists at target with same digest
local target_digest
if target_digest=$(crane digest "$target_uri" 2>/dev/null); then
if [ "$target_digest" = "$src_digest" ]; then
log_warning " Image already exists at target with same digest, skipping"
return 2
else
log_info " Image exists but digest differs, will overwrite"
fi
fi

# Copy image
log_info " Copying image..."
if ! crane copy "$src_image_uri" "$target_uri"; then
log_error " Failed to copy image"
return 1
fi

# Verify digest after copy
local new_digest
if ! new_digest=$(crane digest "$target_uri" 2>&1); then
log_error " Failed to verify target digest: $new_digest"
return 1
fi

if [ "$new_digest" != "$src_digest" ]; then
log_error " Digest mismatch! Source: $src_digest, Target: $new_digest"
return 1
fi

log_success " Successfully copied and verified: $target_uri"
return 0
}

# Process each image in the list
IMAGE_NUM=0
while IFS= read -r src_image_uri; do
# Skip empty lines and comments
[[ -z "$src_image_uri" || "$src_image_uri" =~ ^[[:space:]]*# ]] && continue

IMAGE_NUM=$((IMAGE_NUM + 1))

if copy_image "$src_image_uri" "$IMAGE_NUM"; then
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
elif [ $? -eq 2 ]; then
SKIPPED_COUNT=$((SKIPPED_COUNT + 1))
else
FAILURE_COUNT=$((FAILURE_COUNT + 1))
log_warning "Continuing with next image..."
fi

echo "" # Blank line between images
done < "$IMAGE_LIST_FILE"

# Summary
echo "=================================================="
log_info "Image Copy Summary"
echo "=================================================="
log_success "Successfully copied: $SUCCESS_COUNT"
log_warning "Skipped (already exist): $SKIPPED_COUNT"
if [ $FAILURE_COUNT -gt 0 ]; then
log_error "Failed: $FAILURE_COUNT"
else
log_info "Failed: $FAILURE_COUNT"
fi
log_info "Total processed: $TOTAL_IMAGES"
echo "=================================================="

# Exit with error if any failures
if [ $FAILURE_COUNT -gt 0 ]; then
exit 1
fi

exit 0
30 changes: 30 additions & 0 deletions scripts/gcp-cluster-down.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
#
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -euo pipefail

DIR="$(dirname "$0")"
. "${DIR}/gcp-cluster-env.sh"

echo "Deleting GKE cluster: $CLUSTER_NAME in region $REGION"

# Delete regional GKE cluster
gcloud container clusters delete "$CLUSTER_NAME" \
--region="$REGION" \
--quiet

echo "✅ Cluster deletion complete!"
Loading
Loading