Skip to content

Commit 19bba14

Browse files
authored
Merge pull request #1483 from NVIDIA/consolidate-images
Consolidate operator and validator into one image
2 parents dfbb6c5 + a4782da commit 19bba14

29 files changed

+132
-691
lines changed

.common-ci.yml

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ trigger-pipeline:
8181

8282
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes'
8383

84-
# Define targets for the gpu-operator and gpu-operator-validator images
84+
# Define targets for the gpu-operator image
8585
.dist-ubi9:
8686
variables:
8787
DIST: ubi9
@@ -93,14 +93,6 @@ trigger-pipeline:
9393
IN_IMAGE_NAME: "gpu-operator"
9494
IMAGE_ARCHIVE: "gpu-operator.tar"
9595

96-
.target-gpu-operator-validator:
97-
variables:
98-
SUBCOMPONENT: "validator"
99-
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/gpu-operator-validator"
100-
IN_IMAGE_NAME: "gpu-operator-validator"
101-
IMAGE_ARCHIVE: "gpu-operator-validator.tar"
102-
IN_REGISTRY: "${STAGING_REGISTRY}/gpu-operator"
103-
10496
# .release forms the base of the deployment jobs which push images to the CI registry.
10597
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
10698
# target os.
@@ -187,14 +179,6 @@ release:staging-gpu-operator:
187179
variables:
188180
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/gpu-operator"
189181

190-
release:staging-gpu-operator-validator:
191-
extends:
192-
- .release:staging
193-
- .dist-ubi9
194-
- .target-gpu-operator-validator
195-
variables:
196-
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/gpu-operator-validator"
197-
198182
release:staging-latest-gpu-operator:
199183
extends:
200184
- .release:staging
@@ -206,17 +190,6 @@ release:staging-latest-gpu-operator:
206190
rules:
207191
- if: $CI_COMMIT_BRANCH
208192

209-
release:staging-latest-gpu-operator-validator:
210-
extends:
211-
- .release:staging
212-
- .dist-ubi9
213-
- .target-gpu-operator-validator
214-
variables:
215-
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/gpu-operator-validator"
216-
OUT_IMAGE_VERSION: ${CI_COMMIT_BRANCH}-latest
217-
rules:
218-
- if: $CI_COMMIT_BRANCH
219-
220193
release:gpu-operator-bundle-image:
221194
extends:
222195
- .release-bundle

.github/workflows/ci.yaml

Lines changed: 6 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -173,49 +173,6 @@ jobs:
173173
run: |
174174
echo "${VERSION}"
175175
make build-${{ matrix.dist }}
176-
build-gpu-operator-validator-arm64:
177-
needs: [go-check, go-test, go-build]
178-
runs-on: ubuntu-24.04-arm
179-
strategy:
180-
matrix:
181-
dist: [ubi9]
182-
steps:
183-
- uses: actions/checkout@v4
184-
name: Check out code
185-
- name: Calculate build vars
186-
id: vars
187-
run: |
188-
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
189-
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
190-
191-
GENERATE_ARTIFACTS="false"
192-
if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
193-
GENERATE_ARTIFACTS="false"
194-
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
195-
GENERATE_ARTIFACTS="true"
196-
elif [[ "${{ github.event_name }}" == "push" ]]; then
197-
GENERATE_ARTIFACTS="true"
198-
fi
199-
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
200-
echo "DOCKER_BUILD_PLATFORM_OPTIONS=--platform=linux/arm64" >> $GITHUB_ENV
201-
- name: Set up Docker Buildx
202-
uses: docker/setup-buildx-action@v3
203-
- name: Login to GitHub Container Registry
204-
uses: docker/login-action@v3
205-
with:
206-
registry: ghcr.io
207-
username: ${{ github.actor }}
208-
password: ${{ secrets.GITHUB_TOKEN }}
209-
- name: Build image
210-
env:
211-
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator
212-
VERSION: ${COMMIT_SHORT_SHA}-arm64
213-
SUBCOMPONENT: validator
214-
run: |
215-
echo "${VERSION}"
216-
make build-${{ matrix.dist }}
217-
218-
### Image builds ###
219176
build-gpu-operator-amd64:
220177
needs: [go-check, go-test, go-build]
221178
runs-on: ubuntu-latest
@@ -260,50 +217,8 @@ jobs:
260217
echo "${VERSION}"
261218
make build-${{ matrix.dist }}
262219
263-
build-gpu-operator-validator-amd64:
264-
needs: [go-check, go-test, go-build]
265-
runs-on: ubuntu-latest
266-
strategy:
267-
matrix:
268-
dist: [ubi9]
269-
steps:
270-
- uses: actions/checkout@v4
271-
name: Check out code
272-
- name: Calculate build vars
273-
id: vars
274-
run: |
275-
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
276-
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
277-
278-
GENERATE_ARTIFACTS="false"
279-
if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
280-
GENERATE_ARTIFACTS="false"
281-
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
282-
GENERATE_ARTIFACTS="true"
283-
elif [[ "${{ github.event_name }}" == "push" ]]; then
284-
GENERATE_ARTIFACTS="true"
285-
fi
286-
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
287-
echo "DOCKER_BUILD_PLATFORM_OPTIONS=--platform=linux/amd64" >> $GITHUB_ENV
288-
- name: Set up Docker Buildx
289-
uses: docker/setup-buildx-action@v3
290-
- name: Login to GitHub Container Registry
291-
uses: docker/login-action@v3
292-
with:
293-
registry: ghcr.io
294-
username: ${{ github.actor }}
295-
password: ${{ secrets.GITHUB_TOKEN }}
296-
- name: Build image
297-
env:
298-
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator
299-
VERSION: ${COMMIT_SHORT_SHA}-amd64
300-
SUBCOMPONENT: validator
301-
run: |
302-
echo "${VERSION}"
303-
make build-${{ matrix.dist }}
304-
305220
build-multi-arch-images:
306-
needs: [build-gpu-operator-arm64, build-gpu-operator-validator-arm64, build-gpu-operator-amd64, build-gpu-operator-validator-amd64]
221+
needs: [build-gpu-operator-arm64, build-gpu-operator-amd64]
307222
runs-on: ubuntu-latest
308223
strategy:
309224
matrix:
@@ -326,21 +241,13 @@ jobs:
326241
env:
327242
OPERATOR_IMAGE_ARM: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator:${{ env.COMMIT_SHORT_SHA }}-arm64
328243
OPERATOR_IMAGE_AMD: ghcr.io/${{ env.LOWERCASE_REPO_OWNER}}/gpu-operator:${{ env.COMMIT_SHORT_SHA }}-amd64
329-
VALIDATOR_IMAGE_ARM: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator/gpu-operator-validator:${{ env.COMMIT_SHORT_SHA }}-arm64
330-
VALIDATOR_IMAGE_AMD: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator/gpu-operator-validator:${{ env.COMMIT_SHORT_SHA }}-amd64
331244
OPERATOR_MULTIARCH_IMAGE: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator:${{ env.COMMIT_SHORT_SHA }}
332-
VALIDATOR_MULTIARCH_IMAGE: ghcr.io/${{ env.LOWERCASE_REPO_OWNER }}/gpu-operator/gpu-operator-validator:${{ env.COMMIT_SHORT_SHA }}
333245
run: |
334246
docker manifest create \
335247
${OPERATOR_MULTIARCH_IMAGE} \
336248
${OPERATOR_IMAGE_AMD} \
337249
${OPERATOR_IMAGE_ARM}
338250
docker manifest push ${OPERATOR_MULTIARCH_IMAGE}
339-
docker manifest create \
340-
${VALIDATOR_MULTIARCH_IMAGE} \
341-
${VALIDATOR_IMAGE_AMD} \
342-
${VALIDATOR_IMAGE_ARM}
343-
docker manifest push ${VALIDATOR_MULTIARCH_IMAGE}
344251
345252
### e2e tests ###
346253
e2e-tests-containerd:
@@ -371,8 +278,6 @@ jobs:
371278
372279
echo "OPERATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
373280
echo "OPERATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator" >> $GITHUB_ENV
374-
echo "VALIDATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
375-
echo "VALIDATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator" >> $GITHUB_ENV
376281
377282
echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
378283
echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
@@ -384,7 +289,7 @@ jobs:
384289
TEST_CASE: "./tests/cases/defaults.sh"
385290
run: |
386291
echo "${{ secrets.AWS_SSH_KEY }}" > ${private_key} && chmod 400 ${private_key}
387-
./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${VALIDATOR_IMAGE} ${VALIDATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
292+
./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
388293
./tests/scripts/pull.sh /tmp/logs logs
389294
exit $rc
390295
- name: Archive test logs
@@ -423,8 +328,6 @@ jobs:
423328
424329
echo "OPERATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
425330
echo "OPERATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator" >> $GITHUB_ENV
426-
echo "VALIDATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
427-
echo "VALIDATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator" >> $GITHUB_ENV
428331
429332
echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
430333
echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
@@ -436,7 +339,7 @@ jobs:
436339
TEST_CASE: "./tests/cases/nvidia-driver.sh"
437340
run: |
438341
echo "${{ secrets.AWS_SSH_KEY }}" > ${private_key} && chmod 400 ${private_key}
439-
./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${VALIDATOR_IMAGE} ${VALIDATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
342+
./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
440343
./tests/scripts/pull.sh /tmp/logs logs
441344
exit $rc
442345
- name: Archive test logs
@@ -447,7 +350,7 @@ jobs:
447350
path: ./logs/
448351
retention-days: 15
449352

450-
release-latest-gpu-operator-and-validator-image:
353+
release-latest-gpu-operator-image:
451354
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} # Runs only if the event is a push to the main branch
452355
needs: [e2e-tests-containerd, e2e-tests-nvidiadriver]
453356
runs-on: linux-amd64-cpu4
@@ -468,17 +371,14 @@ jobs:
468371
LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')
469372
echo "OPERATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
470373
echo "OPERATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator" >> $GITHUB_ENV
471-
echo "VALIDATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
472-
echo "VALIDATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator" >> $GITHUB_ENV
473-
- name: Retag gpu-operator and gpu-operator-validator
374+
- name: Retag gpu-operator
474375
run: |
475376
regctl registry login ghcr.io -u $GITHUB_ACTOR -p ${{ secrets.GITHUB_TOKEN }}
476377
regctl image copy ${OPERATOR_IMAGE}:${OPERATOR_VERSION} ${OPERATOR_IMAGE}:main-latest
477-
regctl image copy ${VALIDATOR_IMAGE}:${VALIDATOR_VERSION} ${VALIDATOR_IMAGE}:main-latest
478378
479379
push-gpu-operator-bundle-image:
480380
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} # Runs only if the event is a push to the main branch
481-
needs: [release-latest-gpu-operator-and-validator-image]
381+
needs: [release-latest-gpu-operator-image]
482382
runs-on: linux-amd64-cpu4
483383
steps:
484384
- uses: actions/checkout@v4

.gitlab-ci.yml

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,6 @@ build:gpu-operator:
9898
- .dist-ubi9
9999
- .target-gpu-operator
100100

101-
build:gpu-operator-validator:
102-
extends:
103-
- .image-build
104-
- .dist-ubi9
105-
- .target-gpu-operator-validator
106-
107101
.e2e_defaults:
108102
variables:
109103
TF_VAR_project_name: "gpu-operator"
@@ -113,8 +107,6 @@ build:gpu-operator-validator:
113107
# TODO: Should these use the staging release instead?
114108
OPERATOR_VERSION: "${CI_COMMIT_SHORT_SHA}"
115109
OPERATOR_IMAGE: "${CI_REGISTRY_IMAGE}"
116-
VALIDATOR_VERSION: "${CI_COMMIT_SHORT_SHA}"
117-
VALIDATOR_IMAGE: "${CI_REGISTRY_IMAGE}/gpu-operator-validator"
118110
GPU_PRODUCT_NAME: "Tesla-T4"
119111
extends:
120112
- .dist-ubi9
@@ -139,7 +131,7 @@ build:gpu-operator-validator:
139131
- export instance_hostname="${instance_hostname}"
140132
- export TEST_CASE="${TEST_CASE}"
141133
- rc=0
142-
- ${CI_PROJECT_DIR}/tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${VALIDATOR_IMAGE} ${VALIDATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
134+
- ${CI_PROJECT_DIR}/tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
143135
- ${CI_PROJECT_DIR}/tests/scripts/pull.sh /tmp/logs logs
144136
- exit $rc
145137
artifacts:

.nvidia-ci.yml

Lines changed: 2 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,6 @@ image:gpu-operator:
5858
- .dist-ubi9
5959
- .target-gpu-operator
6060

61-
image:gpu-operator-validator:
62-
extends:
63-
- .image-pull
64-
- .dist-ubi9
65-
- .target-gpu-operator-validator
66-
variables:
67-
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/gpu-operator-validator"
68-
6961
# We skip the integration tests for the internal CI:
7062
.integration:
7163
stage: test
@@ -126,50 +118,21 @@ scan:gpu-operator-arm64:
126118
needs:
127119
- scan:gpu-operator-amd64
128120

129-
.scan:gpu-operator-validator:
130-
extends:
131-
- .scan
132-
- .dist-ubi9
133-
- .target-gpu-operator-validator
134-
needs:
135-
- image:gpu-operator-validator
136-
137-
scan:gpu-operator-validator-amd64:
138-
extends:
139-
- .scan:gpu-operator-validator
140-
- .platform-amd64
141-
142-
scan:gpu-operator-validator-arm64:
143-
extends:
144-
- .scan:gpu-operator-validator
145-
- .platform-arm64
146-
needs:
147-
- scan:gpu-operator-validator-amd64
148-
149121
# Define the external release steps for NGC and Dockerhub
150122
.release:ngc:
151123
extends: .release:external
152124
variables:
153125
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
154126
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
155127
OUT_REGISTRY: "${NGC_REGISTRY}"
156-
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}" # This needs to change for the gpu-operator and gpu-operator-validator
128+
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
157129

158130
release:ngc-gpu-operator:
159131
extends:
160132
- .release:ngc
161133
- .dist-ubi9
162134
- .target-gpu-operator
163135

164-
release:ngc-gpu-operator-validator:
165-
extends:
166-
- .release:ngc
167-
- .dist-ubi9
168-
- .target-gpu-operator-validator
169-
variables:
170-
IN_IMAGE_NAME: "gpu-operator-validator"
171-
OUT_IMAGE_NAME: "${NGC_PROD_VALIDATOR_IMAGE}"
172-
173136
# Define the external image signing steps for NGC
174137
# Download the ngc cli binary for use in the sign steps
175138
.ngccli-setup:
@@ -216,15 +179,6 @@ sign:ngc-gpu-operator:
216179
variables:
217180
IMAGE_TAG: "${CI_COMMIT_TAG}"
218181

219-
sign:ngc-gpu-operator-validator:
220-
extends:
221-
- .sign:ngc
222-
needs:
223-
- release:ngc-gpu-operator-validator
224-
variables:
225-
IMAGE_NAME: "${NGC_PROD_VALIDATOR_IMAGE}"
226-
IMAGE_TAG: "${CI_COMMIT_TAG}"
227-
228182
.schedule_defaults:
229183
rules:
230184
- if: $CI_PIPELINE_SOURCE == "schedule"
@@ -235,8 +189,6 @@ sign:ngc-gpu-operator-validator:
235189
# TODO: Should these use the staging release instead?
236190
OPERATOR_VERSION: "${CI_COMMIT_SHORT_SHA}"
237191
OPERATOR_IMAGE: "${STAGING_REGISTRY}/gpu-operator"
238-
VALIDATOR_VERSION: "${CI_COMMIT_SHORT_SHA}"
239-
VALIDATOR_IMAGE: "${STAGING_REGISTRY}/gpu-operator-validator"
240192
TARGET_DRIVER_VERSION: "550.163.01"
241193

242194
.e2e_tests:
@@ -255,7 +207,7 @@ sign:ngc-gpu-operator-validator:
255207
- export private_key="${VSPHERE_SSH_PRIVATE_KEY}"
256208
- export instance_hostname="${instance_hostname}"
257209
- rc=0
258-
- ${CI_PROJECT_DIR}/tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${VALIDATOR_IMAGE} ${VALIDATOR_VERSION} ${GPU_DEVICE} || rc=$?
210+
- ${CI_PROJECT_DIR}/tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${GPU_DEVICE} || rc=$?
259211
- ${CI_PROJECT_DIR}/tests/scripts/pull.sh /tmp/logs logs
260212
- exit $rc
261213
artifacts:

0 commit comments

Comments
 (0)