Skip to content

Commit 55626a2

Browse files
mselim00ndbaker1
andauthored
feat: add neuron and efa device plugin updater (#643)
* feat: add neuron and efa device plugin updater * Update hack/update-image-tags.sh Co-authored-by: Nick Baker <[email protected]> --------- Co-authored-by: Nick Baker <[email protected]>
1 parent a816f72 commit 55626a2

File tree

4 files changed

+60
-7
lines changed

4 files changed

+60
-7
lines changed

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,9 @@ include ${BGO_MAKEFILE}
33
pre-release::
44
go test -c -tags=e2e ./test/... -o $(GOBIN)
55
go install sigs.k8s.io/kubetest2/...@latest
6+
7+
update-deps:
8+
for SCRIPT in ./hack/update-*.sh; do \
9+
"$$SCRIPT" ; \
10+
done
11+

hack/update-image-tags.sh

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,61 @@ set -o nounset
44
set -o errexit
55
set -o pipefail
66

7+
ECR_PUBLIC_REGISTRY="public.ecr.aws"
8+
EKS_CONTAINER_REGISTRY="602401143452.dkr.ecr.us-west-2.amazonaws.com"
9+
10+
# get_ecr_image_tags <REGISTRY> <REPOSITORY>
11+
# e.g. get_ecr_image_tags $ECR_PUBLIC_REGISTRY amazonlinux/amazonlinux
12+
get_ecr_image_tags() {
13+
set -e
14+
local REGISTRY=$1
15+
local REPOSITORY=$2
16+
local TOKEN
17+
18+
# Get ECR public token if image is from a public registry, otherwise use a private token
19+
# An authorization token is required for every ECR HTTP request
20+
if [ "$REGISTRY" = "$ECR_PUBLIC_REGISTRY" ]; then
21+
TOKEN=$(aws ecr-public get-authorization-token --region us-east-1 --output=text --query 'authorizationData.authorizationToken')
22+
local AUTHORIZATION_TYPE="Bearer"
23+
else
24+
TOKEN=$(aws ecr get-authorization-token --output text --query 'authorizationData[].authorizationToken')
25+
local AUTHORIZATION_TYPE="Basic"
26+
fi
27+
28+
curl -s -H "Authorization: ${AUTHORIZATION_TYPE} $TOKEN" "https://$REGISTRY/v2/$REPOSITORY/tags/list" | jq '.tags'
29+
}
30+
31+
# update_image_uris REPOSITORY IMAGE_TAG
32+
update_image_uris() {
33+
local REPOSITORY=$1
34+
local NEW_TAG=$2
35+
PREFIX="image: ${REPOSITORY}"
36+
find ./test/manifests -type f -exec sed -i "s#$PREFIX:.*#$PREFIX:$NEW_TAG#g" {} +
37+
}
38+
739
# update the nvidia k8s device plugin
40+
echo "Updating Nvidia device plugin image"
41+
NVIDIA_DEVICE_PLUGIN_TAG=$(curl -s 'https://catalog.ngc.nvidia.com/api/containers/images?orgName=nvidia&name=k8s-device-plugin&isPublic=true' | jq -r '.images | sort_by(.updatedDate) | reverse | map(select(.tag | test("^v[0-9]+.[0-9]+.[0-9]+$"))) | first | .tag')
42+
update_image_uris nvcr.io/nvidia/k8s-device-plugin $NVIDIA_DEVICE_PLUGIN_TAG
843

9-
NVIDIA_DEVICE_PLUGIN=$(curl -s 'https://catalog.ngc.nvidia.com/api/containers/images?orgName=nvidia&name=k8s-device-plugin&isPublic=true' | jq -r '.images | sort_by(.updatedDate) | reverse | map(select(.tag | test("^v[0-9]+.[0-9]+.[0-9]+$"))) | first | .tag')
10-
PREFIX="image: nvcr.io/nvidia/k8s-device-plugin"
11-
find ./test/manifests -type f -exec sed -i "s#$PREFIX:.*#$PREFIX:$NVIDIA_DEVICE_PLUGIN#g" {} +
44+
# below updates require authentication and should not exit early with a failure.
45+
# TODO: remove this once the aws credentials are setup and the paths are expected to succeed.
46+
set +e
1247

1348
# update the neuron k8s device plugin
14-
# TODO
49+
echo "Updating Neuron device plugin image"
50+
NEURON_DEVICE_PLUGIN_REPOSITORY_NAME="neuron/neuron-device-plugin"
51+
NEURON_DEVICE_PLUGIN_TAGS=$(get_ecr_image_tags $ECR_PUBLIC_REGISTRY $NEURON_DEVICE_PLUGIN_REPOSITORY_NAME)
52+
if [ $? -eq 0 ]; then
53+
LATEST_NEURON_DEVICE_PLUGIN_TAG=$(echo $NEURON_DEVICE_PLUGIN_TAGS | jq -r 'max_by(split(".") | map(tonumber))')
54+
update_image_uris "${ECR_PUBLIC_REGISTRY}/${NEURON_DEVICE_PLUGIN_REPOSITORY_NAME}" $LATEST_NEURON_DEVICE_PLUGIN_TAG
55+
fi
1556

1657
# update the efa k8s device plugin
17-
# TODO
58+
echo "Updating EFA device plugin image"
59+
EFA_DEVICE_PLUGIN_REPOSITORY_NAME="eks/aws-efa-k8s-device-plugin"
60+
EFA_DEVICE_PLUGIN_TAGS=$(get_ecr_image_tags $EKS_CONTAINER_REGISTRY $EFA_DEVICE_PLUGIN_REPOSITORY_NAME)
61+
if [ $? -eq 0 ]; then
62+
LATEST_EFA_DEVICE_PLUGIN_TAG=$(echo $EFA_DEVICE_PLUGIN_TAGS | jq -r 'map(split("-") | .[0]) | max_by(sub("^v"; "") | split(".") | map(tonumber))')
63+
update_image_uris "${EKS_CONTAINER_REGISTRY}/${EFA_DEVICE_PLUGIN_REPOSITORY_NAME}" $LATEST_EFA_DEVICE_PLUGIN_TAG
64+
fi

test/manifests/assets/efa-device-plugin.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ spec:
2929
priorityClassName: "system-node-critical"
3030
hostNetwork: true
3131
containers:
32-
- image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/aws-efa-k8s-device-plugin:v0.5.6
32+
- image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/aws-efa-k8s-device-plugin:v0.5.8
3333
name: aws-efa-k8s-device-plugin
3434
securityContext:
3535
allowPrivilegeEscalation: false

test/manifests/assets/k8s-neuron-device-plugin.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ spec:
5353
- trn2u.48xlarge
5454
containers:
5555
# Find all neuron-device-plugin images at https://gallery.ecr.aws/neuron/neuron-device-plugin
56-
- image: public.ecr.aws/neuron/neuron-device-plugin:2.25.24.0
56+
- image: public.ecr.aws/neuron/neuron-device-plugin:2.26.26.0
5757
imagePullPolicy: Always
5858
name: neuron-device-plugin
5959
env:

0 commit comments

Comments
 (0)