Skip to content

Commit 557c801

Browse files
committed
make the GH actions and Dockerfile architecture agnostic
Signed-off-by: Davanum Srinivas <[email protected]>
1 parent 49c561b commit 557c801

File tree

3 files changed

+58
-16
lines changed

3 files changed

+58
-16
lines changed

.github/actions/setup-ci-tools/action.yml

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,13 @@ runs:
130130
echo "Installing Protocol Buffers ${{ inputs.protobuf-version }}"
131131
PROTOBUF_VERSION="${{ inputs.protobuf-version }}"
132132
PROTOBUF_VERSION_NO_V="${PROTOBUF_VERSION#v}"
133-
wget -q https://github.com/protocolbuffers/protobuf/releases/download/${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64.zip
134-
unzip protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64.zip -d protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64
135-
sudo cp protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64/bin/protoc /usr/local/bin/
133+
ARCH=$(case $(uname -m) in x86_64) echo x86_64;; aarch64) echo aarch_64;; *) echo $(uname -m);; esac)
134+
wget -q https://github.com/protocolbuffers/protobuf/releases/download/${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION_NO_V}-linux-${ARCH}.zip
135+
unzip protoc-${PROTOBUF_VERSION_NO_V}-linux-${ARCH}.zip -d protoc-${PROTOBUF_VERSION_NO_V}-linux-${ARCH}
136+
sudo cp protoc-${PROTOBUF_VERSION_NO_V}-linux-${ARCH}/bin/protoc /usr/local/bin/
136137
sudo mkdir -p /usr/local/include/google
137-
sudo cp -r protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64/include/google /usr/local/include/
138-
rm -rf protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64*
138+
sudo cp -r protoc-${PROTOBUF_VERSION_NO_V}-linux-${ARCH}/include/google /usr/local/include/
139+
rm -rf protoc-${PROTOBUF_VERSION_NO_V}-linux-${ARCH}*
139140
fi
140141
go install google.golang.org/protobuf/cmd/protoc-gen-go@${{ inputs.protoc-gen-go-version }}
141142
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@${{ inputs.protoc-gen-go-grpc-version }}
@@ -145,7 +146,8 @@ runs:
145146
if: inputs.install-dcgm == 'true'
146147
shell: bash
147148
run: |
148-
wget -q https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb
149+
ARCH=$(case $(uname -m) in x86_64) echo x86_64;; aarch64) echo arm64;; *) echo $(uname -m);; esac)
150+
wget -q https://developer.download.nvidia.com/compute/cuda/repos/debian12/${ARCH}/cuda-keyring_1.1-1_all.deb
149151
sudo dpkg -i cuda-keyring_1.1-1_all.deb
150152
rm cuda-keyring_1.1-1_all.deb
151153
sudo apt-get update -qq
@@ -168,7 +170,8 @@ runs:
168170
echo "shellcheck already installed from cache"
169171
else
170172
echo "Installing shellcheck ${{ inputs.shellcheck-version }}"
171-
curl -sSL "https://github.com/koalaman/shellcheck/releases/download/${{ inputs.shellcheck-version }}/shellcheck-${{ inputs.shellcheck-version }}.linux.x86_64.tar.xz" | \
173+
ARCH=$(case $(uname -m) in x86_64) echo x86_64;; aarch64) echo aarch64;; *) echo $(uname -m);; esac)
174+
curl -sSL "https://github.com/koalaman/shellcheck/releases/download/${{ inputs.shellcheck-version }}/shellcheck-${{ inputs.shellcheck-version }}.linux.${ARCH}.tar.xz" | \
172175
sudo tar -xJ --wildcards -C /usr/local/bin/ --strip-components=1 "*/shellcheck"
173176
sudo chmod +x /usr/local/bin/shellcheck
174177
fi

.github/workflows/e2e-test.yml

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@
1414

1515
name: E2E Tests
1616

17+
# This workflow runs end-to-end tests on both AMD64 and ARM64 architectures in parallel
18+
# to ensure compatibility across different hardware platforms.
19+
#
20+
# Configuration:
21+
# - Set RUNNER_ARCH_LARGE_AMD64 variable to override default AMD64 runner
22+
# - Set RUNNER_ARCH_LARGE_ARM64 variable to override default ARM64 runner
23+
# - Each architecture gets its own isolated cluster and test artifacts
24+
1725
on:
1826
push:
1927
branches:
@@ -40,7 +48,20 @@ jobs:
4048
uses: ./.github/workflows/prepare-environment.yml
4149

4250
e2e-test:
43-
runs-on: linux-amd64-cpu32
51+
# Run E2E tests on both AMD64 and ARM64 architectures in parallel
52+
strategy:
53+
fail-fast: false # Allow both architectures to complete even if one fails
54+
matrix:
55+
include:
56+
- arch: amd64
57+
runner: ${{ vars.RUNNER_ARCH_LARGE_AMD64 || 'linux-amd64-cpu32' }}
58+
arch_name: "AMD64"
59+
- arch: arm64
60+
runner: ${{ vars.RUNNER_ARCH_LARGE_ARM64 || 'linux-arm64-cpu32' }}
61+
arch_name: "ARM64"
62+
63+
name: "E2E Tests (${{ matrix.arch_name }})"
64+
runs-on: ${{ matrix.runner }}
4465
timeout-minutes: 90
4566
needs: prepare-environment
4667
steps:
@@ -100,9 +121,18 @@ jobs:
100121
/usr/local/bin/kind
101122
/usr/local/bin/kubectl
102123
/usr/local/bin/tilt
103-
key: ${{ runner.os }}-e2e-tools-${{ env.KIND_VERSION }}-${{ env.CTLPTL_VERSION }}-${{ env.TILT_VERSION }}
124+
key: ${{ runner.os }}-${{ runner.arch }}-e2e-tools-${{ env.KIND_VERSION }}-${{ env.CTLPTL_VERSION }}-${{ env.TILT_VERSION }}
104125
restore-keys: |
105-
${{ runner.os }}-e2e-tools-
126+
${{ runner.os }}-${{ runner.arch }}-e2e-tools-
127+
128+
- name: Detect runner architecture
129+
run: |
130+
echo "Matrix configuration: ${{ matrix.arch_name }} (${{ matrix.arch }})"
131+
echo "Runner: ${{ matrix.runner }}"
132+
echo "Runner OS: ${{ runner.os }}"
133+
echo "Runner architecture: ${{ runner.arch }}"
134+
echo "System architecture (uname -m): $(uname -m)"
135+
echo "Cache key will be: ${{ runner.os }}-${{ runner.arch }}-e2e-tools-${{ env.KIND_VERSION }}-${{ env.CTLPTL_VERSION }}-${{ env.TILT_VERSION }}"
106136
107137
- name: Install E2E testing tools
108138
run: |
@@ -112,7 +142,8 @@ jobs:
112142
echo "ctlptl v${CTLPTL_VERSION} already installed from cache"
113143
else
114144
echo "Installing ctlptl v${CTLPTL_VERSION}..."
115-
curl -fsSL https://github.com/tilt-dev/ctlptl/releases/download/v${CTLPTL_VERSION}/ctlptl.${CTLPTL_VERSION}.linux.x86_64.tar.gz | sudo tar -xzv -C /usr/local/bin ctlptl
145+
ARCH=$(case $(uname -m) in x86_64) echo x86_64;; aarch64|arm64) echo arm64;; *) echo $(uname -m);; esac)
146+
curl -fsSL https://github.com/tilt-dev/ctlptl/releases/download/v${CTLPTL_VERSION}/ctlptl.${CTLPTL_VERSION}.linux.${ARCH}.tar.gz | sudo tar -xzv -C /usr/local/bin ctlptl
116147
fi
117148
118149
# Install Kind (if not cached)
@@ -121,7 +152,8 @@ jobs:
121152
echo "Kind v${KIND_VERSION} already installed from cache"
122153
else
123154
echo "Installing Kind v${KIND_VERSION}..."
124-
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v${KIND_VERSION}/kind-linux-amd64
155+
ARCH=$(case $(uname -m) in x86_64) echo amd64;; aarch64|arm64) echo arm64;; *) echo $(uname -m);; esac)
156+
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v${KIND_VERSION}/kind-linux-${ARCH}
125157
chmod +x ./kind
126158
sudo mv ./kind /usr/local/bin/kind
127159
fi
@@ -131,7 +163,8 @@ jobs:
131163
echo "kubectl already installed from cache"
132164
else
133165
echo "Installing kubectl..."
134-
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
166+
ARCH=$(case $(uname -m) in x86_64) echo amd64;; aarch64|arm64) echo arm64;; *) echo $(uname -m);; esac)
167+
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl"
135168
chmod +x kubectl
136169
sudo mv ./kubectl /usr/local/bin/kubectl
137170
fi
@@ -142,8 +175,9 @@ jobs:
142175
echo "Tilt v${TILT_VERSION} already installed from cache"
143176
else
144177
echo "Installing Tilt v${TILT_VERSION}..."
178+
ARCH=$(case $(uname -m) in x86_64) echo x86_64;; aarch64|arm64) echo arm64;; *) echo $(uname -m);; esac)
145179
TEMP_DIR=$(mktemp -d)
146-
curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v${TILT_VERSION}/tilt.${TILT_VERSION}.linux.x86_64.tar.gz | tar -xzv -C "$TEMP_DIR" tilt
180+
curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v${TILT_VERSION}/tilt.${TILT_VERSION}.linux.${ARCH}.tar.gz | tar -xzv -C "$TEMP_DIR" tilt
147181
sudo mv "$TEMP_DIR/tilt" /usr/local/bin/
148182
rm -rf "$TEMP_DIR"
149183
fi
@@ -174,6 +208,8 @@ jobs:
174208
NVCR_CONTAINER_REPO: ${{ needs.prepare-environment.outputs.nvcr_container_repo }}
175209
NGC_ORG: ${{ needs.prepare-environment.outputs.container_org }}
176210
CTLPTL_YAML: ctlptl-config.yaml
211+
# Make cluster names unique per architecture to avoid conflicts in parallel runs
212+
CLUSTER_NAME_SUFFIX: "-${{ matrix.arch }}"
177213
run: |
178214
make cluster-create
179215
@@ -183,13 +219,15 @@ jobs:
183219
NVCR_CONTAINER_REPO: ${{ needs.prepare-environment.outputs.nvcr_container_repo }}
184220
NGC_ORG: ${{ needs.prepare-environment.outputs.container_org }}
185221
CTLPTL_YAML: ctlptl-config.yaml
222+
# Use same cluster name suffix for consistency
223+
CLUSTER_NAME_SUFFIX: "-${{ matrix.arch }}"
186224
run: |
187225
make e2e-test-ci
188226
189227
- name: Upload test results
190228
uses: ./.github/actions/upload-test-artifacts
191229
with:
192-
component-name: e2e-test
230+
component-name: e2e-test-${{ matrix.arch }}
193231
file-paths: |
194232
tests/results/
195233
tests/*.log

nvsentinel-log-collector/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
3333
sudo \
3434
&& rm -rf /var/lib/apt/lists/*
3535

36-
RUN curl -fsSL -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl \
36+
RUN ARCH=$(case $(uname -m) in x86_64) echo amd64;; aarch64) echo arm64;; *) echo $(uname -m);; esac) && \
37+
curl -fsSL -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/${ARCH}/kubectl \
3738
&& chmod +x /usr/local/bin/kubectl
3839

3940
RUN useradd -u 10001 -m nvsentinel

0 commit comments

Comments
 (0)