diff --git a/.github/workflows/container-build-test.yml b/.github/workflows/container-build-test.yml index e32335356..1c4c95d14 100644 --- a/.github/workflows/container-build-test.yml +++ b/.github/workflows/container-build-test.yml @@ -132,6 +132,9 @@ jobs: - name: Execute build if: steps.should-build.outputs.build == 'true' uses: ./.github/actions/build-container + env: + # Disable registry cache for pull requests to avoid permission issues + DISABLE_REGISTRY_CACHE: ${{ github.event_name == 'pull_request' && 'true' || 'false' }} with: safe_ref_name: ${{ needs.prepare-environment.outputs.safe_ref_name }} nvcr_container_repo: ${{ needs.prepare-environment.outputs.nvcr_container_repo }} diff --git a/common.mk b/common.mk index 5f77c8223..aab69f103 100644 --- a/common.mk +++ b/common.mk @@ -29,6 +29,11 @@ SAFE_REF_NAME := $(if $(SAFE_REF_NAME),$(SAFE_REF_NAME),local) BUILDX_BUILDER ?= nvsentinel-builder PLATFORMS ?= linux/arm64,linux/amd64 +# Cache configuration (can be disabled via environment variables) +DISABLE_REGISTRY_CACHE ?= false +CACHE_FROM_ARG := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)) +CACHE_TO_ARG := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME),mode=max) + # Auto-detect current module name from directory MODULE_NAME := $(shell basename $(CURDIR)) @@ -170,11 +175,12 @@ setup-buildx: # Standardized Docker build (always from repo root for consistency) docker-build: setup-buildx @echo "Building Docker image for $(MODULE_NAME) (local development)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") cd $(REPO_ROOT) && docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME) \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME),mode=max \ + $(CACHE_FROM_ARG) \ + $(CACHE_TO_ARG) \ $(DOCKER_EXTRA_ARGS) \ --load \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-$(MODULE_NAME):$(SAFE_REF_NAME) \ @@ -196,11 +202,12 @@ docker-build-local: setup-buildx # Standardized Docker publish docker-publish: setup-buildx @echo "Building and publishing Docker image for $(MODULE_NAME) (production)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") cd $(REPO_ROOT) && docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME) \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME),mode=max \ + $(CACHE_FROM_ARG) \ + $(CACHE_TO_ARG) \ $(DOCKER_EXTRA_ARGS) \ --push \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-$(MODULE_NAME):$(SAFE_REF_NAME) \ diff --git a/health-monitors/gpu-health-monitor/Makefile b/health-monitors/gpu-health-monitor/Makefile index b55927910..821243928 100644 --- a/health-monitors/gpu-health-monitor/Makefile +++ b/health-monitors/gpu-health-monitor/Makefile @@ -41,6 +41,12 @@ LINT_EXTRA_FLAGS := include ../../common.mk +# Cache configuration for specialized builds (respect DISABLE_REGISTRY_CACHE) +CACHE_FROM_ARG_DCGM3 := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-3) +CACHE_TO_ARG_DCGM3 := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-3,mode=max) +CACHE_FROM_ARG_DCGM4 := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-4) +CACHE_TO_ARG_DCGM4 := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-4,mode=max) + # ============================================================================= # DEFAULT TARGET # ============================================================================= @@ -104,11 +110,12 @@ docker-build: setup-buildx docker-build-dcgm3 docker-build-dcgm4 docker-build-dcgm3: setup-buildx @echo "Building Docker image for $(MODULE_NAME) with DCGM 3.x (local development)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") cd $(REPO_ROOT) && docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-3 \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-3,mode=max \ + $(CACHE_FROM_ARG_DCGM3) \ + $(CACHE_TO_ARG_DCGM3) \ $(DOCKER_EXTRA_ARGS) \ --load \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-$(MODULE_NAME):$(SAFE_REF_NAME)-dcgm-3.x \ @@ -119,11 +126,12 @@ docker-build-dcgm3: setup-buildx docker-build-dcgm4: setup-buildx @echo "Building Docker image for $(MODULE_NAME) with DCGM 4.x (local development)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") cd $(REPO_ROOT) && docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-4 \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-4,mode=max \ + $(CACHE_FROM_ARG_DCGM4) \ + $(CACHE_TO_ARG_DCGM4) \ $(DOCKER_EXTRA_ARGS) \ --load \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-$(MODULE_NAME):$(SAFE_REF_NAME)-dcgm-4.x \ @@ -135,11 +143,12 @@ docker-publish: setup-buildx docker-publish-dcgm3 docker-publish-dcgm4 docker-publish-dcgm3: setup-buildx @echo "Building and publishing Docker image for $(MODULE_NAME) with DCGM 3.x (production)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") cd $(REPO_ROOT) && docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-3 \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-3,mode=max \ + $(CACHE_FROM_ARG_DCGM3) \ + $(CACHE_TO_ARG_DCGM3) \ $(DOCKER_EXTRA_ARGS) \ --push \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-$(MODULE_NAME):$(SAFE_REF_NAME)-dcgm-3.x \ @@ -150,11 +159,12 @@ docker-publish-dcgm3: setup-buildx docker-publish-dcgm4: setup-buildx @echo "Building and publishing Docker image for $(MODULE_NAME) with DCGM 4.x (production)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") cd $(REPO_ROOT) && docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-4 \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME)-4,mode=max \ + $(CACHE_FROM_ARG_DCGM4) \ + $(CACHE_TO_ARG_DCGM4) \ $(DOCKER_EXTRA_ARGS) \ --push \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-$(MODULE_NAME):$(SAFE_REF_NAME)-dcgm-4.x \ diff --git a/node-drainer-module/Makefile b/node-drainer-module/Makefile index ffa959ced..f473b2ed9 100644 --- a/node-drainer-module/Makefile +++ b/node-drainer-module/Makefile @@ -49,11 +49,12 @@ publish: docker-publish # Additional target for CI with registry cache docker-build-with-cache: setup-buildx @echo "Building Docker image for $(MODULE_NAME) (with registry cache)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") cd $(REPO_ROOT) && docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME) \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:$(MODULE_NAME),mode=max \ + $(CACHE_FROM_ARG) \ + $(CACHE_TO_ARG) \ $(DOCKER_EXTRA_ARGS) \ --load \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-$(MODULE_NAME):$(SAFE_REF_NAME) \ diff --git a/nvsentinel-log-collector/Makefile b/nvsentinel-log-collector/Makefile index 65afe6276..be77c07ee 100644 --- a/nvsentinel-log-collector/Makefile +++ b/nvsentinel-log-collector/Makefile @@ -10,6 +10,13 @@ SAFE_REF_NAME := $(if $(SAFE_REF_NAME),$(SAFE_REF_NAME),local) BUILDX_BUILDER ?= nvsentinel-builder PLATFORMS ?= linux/arm64,linux/amd64 +# Cache configuration (can be disabled via environment variables) +DISABLE_REGISTRY_CACHE ?= false +CACHE_FROM_ARG_LOG := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:log-collector) +CACHE_TO_ARG_LOG := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:log-collector,mode=max) +CACHE_FROM_ARG_CLEANUP := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:file-server-cleanup) +CACHE_TO_ARG_CLEANUP := $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),,--cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:file-server-cleanup,mode=max) + # Default target .PHONY: all all: lint @@ -77,11 +84,12 @@ docker-build: setup-buildx docker-build-log-collector docker-build-file-server-c .PHONY: docker-build-log-collector docker-build-log-collector: setup-buildx @echo "Building Docker image for log-collector (local development)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:log-collector \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:log-collector,mode=max \ + $(CACHE_FROM_ARG_LOG) \ + $(CACHE_TO_ARG_LOG) \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-log-collector:$(SAFE_REF_NAME) \ . @@ -89,11 +97,12 @@ docker-build-log-collector: setup-buildx .PHONY: docker-build-file-server-cleanup docker-build-file-server-cleanup: setup-buildx @echo "Building Docker image for file-server-cleanup (local development)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:file-server-cleanup \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:file-server-cleanup,mode=max \ + $(CACHE_FROM_ARG_CLEANUP) \ + $(CACHE_TO_ARG_CLEANUP) \ -f Dockerfile.cleanup \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-file-server-cleanup:$(SAFE_REF_NAME) \ . @@ -106,11 +115,12 @@ docker-publish: setup-buildx docker-publish-log-collector docker-publish-file-se .PHONY: docker-publish-log-collector docker-publish-log-collector: setup-buildx @echo "Building and publishing Docker image for log-collector (production)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:log-collector \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:log-collector,mode=max \ + $(CACHE_FROM_ARG_LOG) \ + $(CACHE_TO_ARG_LOG) \ --push \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-log-collector:$(SAFE_REF_NAME) \ . @@ -119,11 +129,12 @@ docker-publish-log-collector: setup-buildx .PHONY: docker-publish-file-server-cleanup docker-publish-file-server-cleanup: setup-buildx @echo "Building and publishing Docker image for file-server-cleanup (production)..." + $(if $(filter true,$(DISABLE_REGISTRY_CACHE)),@echo "Registry cache disabled for this build") docker buildx build \ --platform $(PLATFORMS) \ --network=host \ - --cache-from=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:file-server-cleanup \ - --cache-to=type=registry,ref=$(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-buildcache:file-server-cleanup,mode=max \ + $(CACHE_FROM_ARG_CLEANUP) \ + $(CACHE_TO_ARG_CLEANUP) \ --push \ -f Dockerfile.cleanup \ -t $(NVCR_CONTAINER_REPO)/$(NGC_ORG)/nvsentinel-file-server-cleanup:$(SAFE_REF_NAME) \