Skip to content

Commit 3942958

Browse files
authored
[chore]: streamline makefiles to address minimize cognitive load, lower maintenance burden, and remove duplication #135 (#137)
1 parent b70aab0 commit 3942958

File tree

55 files changed

+2180
-1954
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2180
-1954
lines changed

.github/actions/build-container/action.yml

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,6 @@ name: 'Build Container'
1616
description: 'Build container for validation without publishing'
1717

1818
inputs:
19-
safe_ref_name:
20-
description: 'Safe reference name for container tags'
21-
required: true
22-
nvcr_container_repo:
23-
description: 'Container registry URL'
24-
required: true
25-
container_org:
26-
description: 'Container organization/namespace'
27-
required: true
2819
make_command:
2920
description: 'Make command to run for building (should use docker-build, not docker-publish)'
3021
required: true
@@ -56,14 +47,14 @@ runs:
5647
- name: Build container (validation only)
5748
shell: bash
5849
env:
59-
SAFE_REF_NAME: ${{ inputs.safe_ref_name }}
60-
NVCR_CONTAINER_REPO: ${{ inputs.nvcr_container_repo }}
61-
NGC_ORG: ${{ inputs.container_org }}
6250
DOCKER_BUILDKIT: 1
6351
BUILDX_CACHE_FROM: type=local,src=/tmp/.buildx-cache
6452
BUILDX_CACHE_TO: type=local,dest=/tmp/.buildx-cache-new,mode=max
6553
run: |
66-
echo "Building container for validation..."
54+
# Compute SAFE_REF_NAME from CI_COMMIT_REF_NAME or fallback to GITHUB_REF_NAME
55+
SAFE_REF_NAME=$(echo "${CI_COMMIT_REF_NAME:-${GITHUB_REF_NAME}}" | sed 's/\//-/g')
56+
export SAFE_REF_NAME
57+
echo "Building container for validation with ref: ${SAFE_REF_NAME}..."
6758
${{ inputs.make_command }}
6859
6960
# Clean up built images to save space

.github/actions/prepare-vars/action.yml

Lines changed: 0 additions & 45 deletions
This file was deleted.

.github/actions/publish-container/action.yml

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,9 @@ name: 'Publish Container'
1616
description: 'Complete container publishing workflow with Docker setup, login, and build'
1717

1818
inputs:
19-
safe_ref_name:
20-
description: 'Safe reference name for container tags'
21-
required: true
22-
nvcr_container_repo:
23-
description: 'Container registry URL'
24-
required: true
25-
container_org:
26-
description: 'Container organization/namespace'
27-
required: true
2819
make_command:
2920
description: 'Make command to run for building and publishing'
3021
required: true
31-
registry:
32-
description: 'Container registry (default: ghcr.io)'
33-
required: false
34-
default: 'ghcr.io'
35-
registry_username:
36-
description: 'Registry username (default: github.actor)'
37-
required: false
38-
default: ${{ github.actor }}
3922
registry_password:
4023
description: 'Registry password'
4124
required: true
@@ -65,26 +48,25 @@ runs:
6548
- name: Log in to Container Registry
6649
uses: docker/login-action@v3
6750
with:
68-
registry: ${{ inputs.registry }}
69-
username: ${{ inputs.registry_username }}
51+
registry: ghcr.io
52+
username: ${{ github.actor }}
7053
password: ${{ inputs.registry_password }}
7154

7255
- name: Build and publish container
7356
shell: bash
7457
id: image
7558
env:
76-
SAFE_REF_NAME: ${{ inputs.safe_ref_name }}
77-
NVCR_CONTAINER_REPO: ${{ inputs.nvcr_container_repo }}
78-
NGC_ORG: ${{ inputs.container_org }}
7959
DOCKER_BUILDKIT: 1
8060
BUILDX_CACHE_FROM: type=local,src=/tmp/.buildx-cache
8161
BUILDX_CACHE_TO: type=local,dest=/tmp/.buildx-cache-new,mode=max
8262
run: |
8363
${{ inputs.make_command }}
8464
85-
DIGEST="$(crane digest ${{ inputs.nvcr_container_repo }}/${{ inputs.container_org }}/${{ inputs.container_name }}:${{ inputs.safe_ref_name }}${{ inputs.tag_suffix }})"
65+
# Compute SAFE_REF_NAME the same way the Makefile does
66+
SAFE_REF_NAME=$(echo "${CI_COMMIT_REF_NAME:-${GITHUB_REF_NAME}}" | sed 's/\//-/g')
67+
DIGEST="$(crane digest ghcr.io/nvidia/${{ inputs.container_name }}:${SAFE_REF_NAME}${{ inputs.tag_suffix }})"
8668
echo "digest=$DIGEST" >> "$GITHUB_OUTPUT"
87-
echo "name=${{ inputs.nvcr_container_repo }}/${{ inputs.container_org }}/${{ inputs.container_name }}" >> "$GITHUB_OUTPUT"
69+
echo "name=ghcr.io/nvidia/${{ inputs.container_name }}" >> "$GITHUB_OUTPUT"
8870
8971
# Move cache to prevent it from growing indefinitely
9072
if [ -d "/tmp/.buildx-cache-new" ]; then
File renamed without changes.
File renamed without changes.
File renamed without changes.

.github/workflows/container-build-test.yml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ on:
3434
- '.github/workflows/container-build-test.yml'
3535
- '.github/actions/build-container/**'
3636
- '.github/actions/setup-build-env/**'
37-
# Build scripts
38-
- 'build_image_list.sh'
3937
workflow_dispatch:
4038
inputs:
4139
components:
@@ -131,18 +129,23 @@ jobs:
131129
if: steps.should-build.outputs.build == 'false'
132130
run: echo "Skipping ${{ matrix.component }} (not in selected components list)"
133131

132+
- name: Compute ref name with short SHA
133+
if: steps.should-build.outputs.build == 'true'
134+
id: ref-name
135+
run: |
136+
SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7)
137+
echo "value=${{ github.ref_name }}-${SHORT_SHA}" >> $GITHUB_OUTPUT
138+
134139
- name: Execute build
135140
if: steps.should-build.outputs.build == 'true'
136141
uses: ./.github/actions/build-container
137142
env:
143+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
138144
# Disable registry cache for pull requests to avoid permission issues
139145
DISABLE_REGISTRY_CACHE: ${{ (github.event_name == 'pull_request' || startsWith(github.ref, 'refs/heads/pull-request/')) && 'true' || 'false' }}
140146
# Disable --load flag in CI builds (causes issues with multi-platform builds)
141147
DOCKER_LOAD_ARG: ''
142148
with:
143-
safe_ref_name: ${{ needs.prepare-environment.outputs.safe_ref_name }}
144-
nvcr_container_repo: ${{ needs.prepare-environment.outputs.nvcr_container_repo }}
145-
container_org: ${{ needs.prepare-environment.outputs.container_org }}
146149
make_command: ${{ matrix.make_command }}
147150

148151
container-build-summary:

.github/workflows/e2e-test.yml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,15 @@ jobs:
203203
chmod +x scripts/configure-ctlptl-registry.sh
204204
./scripts/configure-ctlptl-registry.sh
205205
206+
- name: Compute ref name with short SHA
207+
id: ref-name
208+
run: |
209+
SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7)
210+
echo "value=${{ github.ref_name }}-${SHORT_SHA}" >> $GITHUB_OUTPUT
211+
206212
- name: Create cluster for E2E tests
207213
env:
208-
SAFE_REF_NAME: ${{ needs.prepare-environment.outputs.safe_ref_name }}
209-
NVCR_CONTAINER_REPO: ${{ needs.prepare-environment.outputs.nvcr_container_repo }}
210-
NGC_ORG: ${{ needs.prepare-environment.outputs.container_org }}
214+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
211215
CTLPTL_YAML: ctlptl-config.yaml
212216
# Make cluster names unique per architecture to avoid conflicts in parallel runs
213217
CLUSTER_NAME_SUFFIX: "-${{ matrix.arch }}"
@@ -222,9 +226,7 @@ jobs:
222226
223227
- name: Run E2E tests
224228
env:
225-
SAFE_REF_NAME: ${{ needs.prepare-environment.outputs.safe_ref_name }}
226-
NVCR_CONTAINER_REPO: ${{ needs.prepare-environment.outputs.nvcr_container_repo }}
227-
NGC_ORG: ${{ needs.prepare-environment.outputs.container_org }}
229+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
228230
CTLPTL_YAML: ctlptl-config.yaml
229231
# Use same cluster name suffix for consistency
230232
CLUSTER_NAME_SUFFIX: "-${{ matrix.arch }}"

.github/workflows/helm-e2e-test.yml

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,22 @@ jobs:
132132
helm version --short
133133
kwok --version
134134
135+
- name: Compute ref name with short SHA
136+
id: ref-name
137+
run: |
138+
SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7)
139+
echo "value=${{ github.ref_name }}-${SHORT_SHA}" >> $GITHUB_OUTPUT
140+
135141
- name: Build container images locally
136142
env:
137-
SAFE_REF_NAME: ${{ needs.prepare-environment.outputs.safe_ref_name }}
138-
NVCR_CONTAINER_REPO: localhost
139-
NGC_ORG: nvsentinel-e2e
143+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
144+
CONTAINER_REGISTRY: localhost
145+
CONTAINER_ORG: nvsentinel-e2e
140146
PLATFORMS: linux/amd64
141147
DISABLE_REGISTRY_CACHE: true
142148
run: |
143149
make docker-all
150+
SAFE_REF_NAME=$(echo "${CI_COMMIT_REF_NAME}" | sed 's/\//-/g')
144151
docker images | grep "localhost/nvsentinel-e2e/nvsentinel-.*:${SAFE_REF_NAME}" || docker images | grep nvsentinel
145152
146153
- name: Create Kind cluster
@@ -182,8 +189,9 @@ jobs:
182189
183190
- name: Load images into Kind cluster
184191
env:
185-
SAFE_REF_NAME: ${{ needs.prepare-environment.outputs.safe_ref_name }}
192+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
186193
run: |
194+
SAFE_REF_NAME=$(echo "${CI_COMMIT_REF_NAME}" | sed 's/\//-/g')
187195
mapfile -t images < <(docker images --format "{{.Repository}}:{{.Tag}}" | grep "localhost/nvsentinel-e2e/nvsentinel-.*:.*${SAFE_REF_NAME}")
188196
[ ${#images[@]} -eq 0 ] && { echo "No images found"; exit 1; }
189197
@@ -223,8 +231,10 @@ jobs:
223231
224232
- name: Patch values for E2E testing
225233
env:
226-
SAFE_REF_NAME: ${{ needs.prepare-environment.outputs.safe_ref_name }}
234+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
227235
run: |
236+
SAFE_REF_NAME=$(echo "${CI_COMMIT_REF_NAME}" | sed 's/\//-/g')
237+
228238
# Replace repository URLs in chart files
229239
sed -i 's|ghcr\.io/nvidia/nvsentinel-|localhost/nvsentinel-e2e/nvsentinel-|g' \
230240
distros/kubernetes/nvsentinel/charts/*/values.yaml \
@@ -255,20 +265,24 @@ jobs:
255265
EOF
256266
257267
- name: Install NVSentinel via Helm
268+
env:
269+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
258270
run: |
271+
SAFE_REF_NAME=$(echo "${CI_COMMIT_REF_NAME}" | sed 's/\//-/g')
259272
helm upgrade --install nvsentinel ./distros/kubernetes/nvsentinel \
260273
--create-namespace \
261274
--namespace ${{ env.NVSENTINEL_NAMESPACE }} \
262275
--values /tmp/values-patched.yaml \
263-
--set global.image.tag="${{ needs.prepare-environment.outputs.safe_ref_name }}" \
276+
--set global.image.tag="${SAFE_REF_NAME}" \
264277
--debug
265278
266279
kubectl get pods -n ${{ env.NVSENTINEL_NAMESPACE }}
267280
268281
- name: Validate deployment with retry
269282
env:
270-
SAFE_REF_NAME: ${{ needs.prepare-environment.outputs.safe_ref_name }}
283+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
271284
run: |
285+
SAFE_REF_NAME=$(echo "${CI_COMMIT_REF_NAME}" | sed 's/\//-/g')
272286
chmod +x scripts/validate-nvsentinel.sh
273287
max_attempts=$((${{ env.VALIDATION_TIMEOUT_MINUTES }} * 60 / ${{ env.VALIDATION_INTERVAL_SECONDS }}))
274288
@@ -293,13 +307,16 @@ jobs:
293307
294308
- name: Collect debug artifacts
295309
if: failure()
310+
env:
311+
CI_COMMIT_REF_NAME: ${{ steps.ref-name.outputs.value }}
296312
run: |
313+
SAFE_REF_NAME=$(echo "${CI_COMMIT_REF_NAME}" | sed 's/\//-/g')
297314
mkdir -p /tmp/debug-artifacts
298315
kubectl get all --all-namespaces > /tmp/debug-artifacts/all-resources.yaml || true
299316
kubectl get events --all-namespaces --sort-by='.lastTimestamp' > /tmp/debug-artifacts/all-events.yaml || true
300317
kubectl get pods -n ${{ env.NVSENTINEL_NAMESPACE }} -o yaml > /tmp/debug-artifacts/nvsentinel-pods.yaml || true
301318
kubectl logs -n ${{ env.NVSENTINEL_NAMESPACE }} --all-containers=true --tail=500 > /tmp/debug-artifacts/nvsentinel-logs.txt || true
302-
./scripts/validate-nvsentinel.sh --version "${{ needs.prepare-environment.outputs.safe_ref_name }}" --namespace "${{ env.NVSENTINEL_NAMESPACE }}" --image-pattern "localhost/nvsentinel-e2e/nvsentinel" --verbose > /tmp/debug-artifacts/validation-output.txt 2>&1 || true
319+
./scripts/validate-nvsentinel.sh --version "${SAFE_REF_NAME}" --namespace "${{ env.NVSENTINEL_NAMESPACE }}" --image-pattern "localhost/nvsentinel-e2e/nvsentinel" --verbose > /tmp/debug-artifacts/validation-output.txt 2>&1 || true
303320
docker images > /tmp/debug-artifacts/docker-images.txt
304321
df -h > /tmp/debug-artifacts/disk-usage.txt
305322

0 commit comments

Comments
 (0)