Skip to content

Commit 0ad394c

Browse files
Merge pull request #378 from NVIDIA/main
backport fixes
2 parents 50c2875 + f9958c5 commit 0ad394c

File tree

4 files changed

+26
-56
lines changed

4 files changed

+26
-56
lines changed

pkg/provisioner/templates/common.go

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,25 +30,23 @@ export HOLODECK_ENVIRONMENT=true
3030
echo "APT::Get::AllowUnauthenticated 1;" | sudo tee /etc/apt/apt.conf.d/99allow-unauthenticated
3131
3232
install_packages_with_retry() {
33-
local packages=("$@")
34-
local max_retries=5
35-
local retry_delay=5
36-
37-
for ((i=1; i<=$max_retries; i++)); do
38-
echo "Attempt $i to install packages: ${packages[@]}"
39-
40-
# Attempt to install packages
41-
sudo apt-get install -y --no-install-recommends "${packages[@]}"
42-
43-
# Check if the last command failed and the error is related to unsigned repository
44-
if [ $? -ne 0 ] && grep -q 'NO_PUBKEY' <<< "$(tail -n 1 /var/lib/dpkg/status 2>/dev/null)"; then
45-
echo "Error: Unsigned repository. Retrying in $retry_delay seconds..."
46-
sleep $retry_delay
47-
else
48-
# Exit loop if installation is successful or the error is not related to unsigned repository
49-
break
50-
fi
51-
done
33+
local max_retries=5 retry_delay=5
34+
local packages=("$@")
35+
36+
for ((i=1; i<=max_retries; i++)); do
37+
echo "[$i/$max_retries] apt-get update"
38+
if sudo apt-get -o Acquire::Retries=3 update; then
39+
echo "[$i/$max_retries] installing: ${packages[*]}"
40+
if sudo DEBIAN_FRONTEND=noninteractive \
41+
apt-get install -y --no-install-recommends "${packages[@]}"; then
42+
return 0 # success
43+
fi
44+
fi
45+
echo "Attempt $i failed; sleeping ${retry_delay}s" >&2
46+
sleep "$retry_delay"
47+
done
48+
echo "All ${max_retries} attempts failed" >&2
49+
return 1
5250
}
5351
5452
with_retry() {

pkg/provisioner/templates/kubernetes.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ with_retry 5 10s kubectl --kubeconfig $KUBECONFIG wait --for=condition=establish
117117
with_retry 10 15s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml
118118
119119
# Wait for cluster to be ready
120-
with_retry 5 10s kubectl --kubeconfig $KUBECONFIG wait --for=condition=ready --timeout=300s nodes --all
120+
with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=ready --timeout=300s nodes --all
121121
122122
# Make single-node cluster schedulable
123123
kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule-

pkg/provisioner/templates/nv-driver.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,8 @@ import (
2828
const NvDriverTemplate = `
2929
# Install Dependencies
3030
with_retry 3 10s sudo apt-get update
31-
install_packages_with_retry linux-headers-$(uname -r) gcc make
32-
install_packages_with_retry apt-utils build-essential \
33-
ca-certificates \
34-
curl \
35-
kmod \
36-
file \
37-
libelf-dev \
38-
libglvnd-dev \
39-
pkg-config
31+
install_packages_with_retry linux-headers-$(uname -r)
32+
install_packages_with_retry apt-utils build-essential ca-certificates curl kmod file libelf-dev libglvnd-dev pkg-config make
4033
4134
install_packages_with_retry gcc-12 g++-12 && \
4235
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 && \

pkg/provisioner/templates/nv-driver_test.go

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,8 @@ func TestNVDriverTemplate(t *testing.T) {
4343
4444
# Install Dependencies
4545
with_retry 3 10s sudo apt-get update
46-
install_packages_with_retry linux-headers-$(uname -r) gcc make
47-
install_packages_with_retry apt-utils build-essential \
48-
ca-certificates \
49-
curl \
50-
kmod \
51-
file \
52-
libelf-dev \
53-
libglvnd-dev \
54-
pkg-config
46+
install_packages_with_retry linux-headers-$(uname -r)
47+
install_packages_with_retry apt-utils build-essential ca-certificates curl kmod file libelf-dev libglvnd-dev pkg-config make
5548
5649
install_packages_with_retry gcc-12 g++-12 && \
5750
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 && \
@@ -86,15 +79,8 @@ nvidia-smi
8679
expectedOutput: `
8780
# Install Dependencies
8881
with_retry 3 10s sudo apt-get update
89-
install_packages_with_retry linux-headers-$(uname -r) gcc make
90-
install_packages_with_retry apt-utils build-essential \
91-
ca-certificates \
92-
curl \
93-
kmod \
94-
file \
95-
libelf-dev \
96-
libglvnd-dev \
97-
pkg-config
82+
install_packages_with_retry linux-headers-$(uname -r)
83+
install_packages_with_retry apt-utils build-essential ca-certificates curl kmod file libelf-dev libglvnd-dev pkg-config make
9884
9985
install_packages_with_retry gcc-12 g++-12 && \
10086
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 && \
@@ -130,15 +116,8 @@ nvidia-smi
130116
expectedOutput: `
131117
# Install Dependencies
132118
with_retry 3 10s sudo apt-get update
133-
install_packages_with_retry linux-headers-$(uname -r) gcc make
134-
install_packages_with_retry apt-utils build-essential \
135-
ca-certificates \
136-
curl \
137-
kmod \
138-
file \
139-
libelf-dev \
140-
libglvnd-dev \
141-
pkg-config
119+
install_packages_with_retry linux-headers-$(uname -r)
120+
install_packages_with_retry apt-utils build-essential ca-certificates curl kmod file libelf-dev libglvnd-dev pkg-config make
142121
143122
install_packages_with_retry gcc-12 g++-12 && \
144123
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 && \

0 commit comments

Comments
 (0)