.github/workflows/build-notebooks-TEMPLATE.yaml

Switch to AIPCC Base Images and Add Perl Utility for RHEL #2

Workflow file for this run

.github/workflows/build-notebooks-TEMPLATE.yaml at b068a53

	# inspired by
Check failure on line 1 in .github/workflows/build-notebooks-TEMPLATE.yaml View workflow run for this annotation GitHub Actions / .github/workflows/build-notebooks-TEMPLATE.yaml Invalid workflow file `(Line: 285, Col: 13): Unrecognized named-value: 'secrets'. Located at position 1 within expression: secrets.AIPCC_QUAY_BOT_USERNAME != ''`
	# https://github.com/thesuperzapper/kubeflow/blob/master/.github/workflows/example_notebook_servers_publish_TEMPLATE.yaml
	---
	name: Build & Publish Notebook Servers (TEMPLATE)
	"on":
	workflow_call:
	inputs:
	# https://docs.github.com/en/actions/learn-github-actions/variables#default-environment-variables
	# https://docs.github.com/en/actions/learn-github-actions/contexts
	target:
	required: true
	description: "make target to build"
	type: string
	python:
	required: true
	description: "python version"
	type: string
	github:
	required: true
	description: "top workflow's `github`"
	type: string
	platform:
	required: true
	description: "platform to build, podman build --platform="
	type: string
	subscription:
	required: false
	default: false
	description: "add RHEL subscription from github secret"
	type: boolean

	jobs:
	build:
	# https://docs.github.com/en/actions/how-tos/using-github-hosted-runners/using-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
	runs-on: ${{ inputs.platform == 'linux/arm64' && 'ubuntu-24.04-arm' \|\| 'ubuntu-24.04' }}
	env:
	# Some pieces of code (image pulls, for example) in podman consult TMPDIR or default to /var/tmp
	TMPDIR: /home/runner/.local/share/containers/tmpdir
	# Use the rootful instance of podman for sharing images with cri-o
	# https://podman-desktop.io/blog/sharing-podman-images-with-kubernetes-cluster#introduction
	# https://access.redhat.com/solutions/6986565
	CONTAINER_HOST: unix:///var/run/podman/podman.sock
	# We don't push here when building PRs, so we can use the same IMAGE_REGISTRY in all branches of the workflow
	IMAGE_REGISTRY: "ghcr.io/${{ github.repository }}/workbench-images"
	# GitHub image registry used for storing $(CONTAINER_ENGINE)'s cache
	CACHE: "ghcr.io/${{ github.repository }}/workbench-images/build-cache"
	TRIVY_VERSION: 0.64.1
	# Targets (and their folder) that should be scanned using FS instead of IMAGE scan due to resource constraints
	TRIVY_SCAN_FS_JSON: '{}'
	# Makefile variables
	BUILD_ARCH: ${{ inputs.platform }}
	RELEASE_PYTHON_VERSION: ${{ inputs.python }}

	steps:

	# image repository name must be lowercase
	- name: downcase IMAGE_REGISTRY and CACHE
	run: \|
	echo "IMAGE_REGISTRY=${IMAGE_REGISTRY,,}" >>${GITHUB_ENV}
	echo "CACHE=${CACHE,,}" >>${GITHUB_ENV}

	- uses: actions/checkout@v5
	if: ${{ fromJson(inputs.github).event_name != 'pull_request_target' }}
	# we need to checkout the pr branch, not pr target (the default for pull_request_target)
	# user access check is done in calling workflow
	- uses: actions/checkout@v5
	if: ${{ fromJson(inputs.github).event_name == 'pull_request_target' }}
	with:
	ref: "refs/pull/${{ fromJson(inputs.github).event.number }}/merge"

	# https://github.com/docker/setup-qemu-action?tab=readme-ov-file#about
	# https://www.itix.fr/blog/qemu-user-static-with-podman/
	- name: Set up QEMU for non-native architecture
	if: ${{ contains(fromJSON('["linux/s390x", "linux/ppc64le"]'), inputs.platform) }}
	run: docker run --rm --privileged tonistiigi/binfmt --install ${platform#*/}
	env:
	platform: ${{ inputs.platform }}

	- run: mkdir -p $TMPDIR

	# do this early because it's fast and why not
	- name: Unlock encrypted secrets with git-crypt
	if: ${{ inputs.subscription }}
	run: \|
	sudo apt-get update
	sudo apt-get install git-crypt
	echo "${GIT_CRYPT_KEY}" \| base64 --decode > ./git-crypt-key
	git-crypt unlock ./git-crypt-key
	rm ./git-crypt-key
	env:
	GIT_CRYPT_KEY: ${{ secrets.GIT_CRYPT_KEY }}

	# https://console.redhat.com/insights/connector/activation-keys
	# This runs slower than storing the entitlement certificates with git-crypt,
	# but on the other hand, it's then not necessary to regularly update them in the repo.
	- name: Add subscriptions from GitHub secret
	if: ${{ inputs.subscription }}
	run: \|
	# https://access.redhat.com/solutions/5870841
	# https://github.com/containers/common/issues/1735
	mkdir entitlement
	mkdir consumer
	docker run \
	-v ${PWD}/entitlement:/etc/pki/entitlement:Z \
	-v ${PWD}/consumer:/etc/pki/consumer:Z \
	--rm -t registry.access.redhat.com/ubi9/ubi \
	/usr/sbin/subscription-manager register --org=${SUBSCRIPTION_ORG} --activationkey=${SUBSCRIPTION_ACTIVATION_KEY}
	printf "${PWD}/entitlement:/etc/pki/entitlement\n${PWD}/consumer:/etc/pki/consumer\n" \| sudo tee /usr/share/containers/mounts.conf

	mkdir -p $HOME/.config/containers/
	sudo cp ${PWD}/ci/secrets/pull-secret.json $HOME/.config/containers/auth.json
	env:
	SUBSCRIPTION_ORG: ${{ secrets.SUBSCRIPTION_ORG }}
	SUBSCRIPTION_ACTIVATION_KEY: ${{ secrets.SUBSCRIPTION_ACTIVATION_KEY }}

	# for bin/buildinputs in scripts/sandbox.py
	- uses: actions/setup-go@v5
	with:
	cache-dependency-path: "scripts/buildinputs/go.sum"

	- run: sudo apt-get update

	- name: Login to GitHub Container Registry
	uses: docker/login-action@v3
	with:
	registry: ghcr.io
	username: ${{ github.actor }}
	password: ${{ secrets.GITHUB_TOKEN }}

	# region Free up disk space

	- name: Free up additional disk space
	# https://docs.github.com/en/actions/learn-github-actions/expressions
	if: "${{ contains(inputs.target, 'rocm') \|\| contains(inputs.target, 'cuda') \|\|
	contains(inputs.target, 'pytorch') \|\| contains(inputs.target, 'tensorflow') }}"
	run: \|
	set -x

	df -h

	sudo apt-get update
	sudo apt-get remove -y '^dotnet-.*'
	sudo apt-get remove -y '^llvm-.*'
	sudo apt-get remove -y 'php.*'
	sudo apt-get remove -y '^mongodb-.*'
	sudo apt-get autoremove -y
	sudo apt-get clean
	sudo rm -rf /usr/local/.ghcup &
	sudo rm -rf /usr/local/lib/android &
	sudo rm -rf /usr/local/share/boost &
	sudo rm -rf /usr/local/lib/node_modules &
	sudo rm -rf /usr/share/dotnet &
	sudo rm -rf /opt/ghc &
	sudo rm -rf /opt/hostedtoolcache/CodeQL &

	sudo docker image prune --all --force &

	wait

	df -h

	- id: install-compsize
	run: sudo apt-get install -y btrfs-compsize

	- name: Mount lvm overlay for podman builds
	run: \|
	df -h
	free -h

	bash ./ci/cached-builds/gha_lvm_overlay.sh

	df -h
	free -h

	# endregion

	# region Podman setup

	# https://github.com/containers/buildah/issues/2521#issuecomment-884779112
	- name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598
	run: sudo apt-get -qq remove podman crun

	- uses: actions/cache@v4
	# https://docs.github.com/en/actions/reference/variables-reference#default-environment-variables
	# https://docs.github.com/en/actions/how-tos/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables
	id: cached-linuxbrew
	with:
	path: /home/linuxbrew/.linuxbrew
	key: linuxbrew-${{ runner.os }}-${{ runner.arch }}

	- name: Install podman (linux/amd64, or qemu-user emulation)
	if: contains(fromJSON('["linux/amd64", "linux/s390x", "linux/ppc64le"]'), inputs.platform) && steps.cached-linuxbrew.outputs.cache-hit != 'true'
	run: \|
	/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
	/home/linuxbrew/.linuxbrew/bin/brew install podman

	# Warning: Your CPU architecture (arm64) is not supported. We only support
	# x86_64 CPU architectures. You will be unable to use binary packages (bottles).
	#
	# This is a Tier 2 configuration:
	# https://docs.brew.sh/Support-Tiers#tier-2
	# Do not report any issues to Homebrew/* repositories!
	# Read the above document instead before opening any issues or PRs.
	- name: Install podman (linux/arm64)
	if: inputs.platform == 'linux/arm64' && steps.cached-linuxbrew.outputs.cache-hit != 'true'
	# Error: podman: no bottle available!
	# If you're feeling brave, you can try to install from source with:
	run: \|
	/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
	/home/linuxbrew/.linuxbrew/bin/brew install --build-from-source podman

	- name: Add linuxbrew to PATH
	run: echo "/home/linuxbrew/.linuxbrew/bin/" >> $GITHUB_PATH

	- name: Configure Podman
	run: \|
	set -Eeuxo pipefail

	# podman running as service ignores the TMPDIR env var here, let's give it a bind-mount to /var/tmp
	mkdir -p $TMPDIR
	sudo mount --bind -o rw,noexec,nosuid,nodev,bind $TMPDIR /var/tmp

	# podman from brew has its own /etc (was giving me Failed to obtain podman configuration: runroot must be set)
	# the (default) config location is also where cri-o gets its storage defaults (that can be overriden in crio.conf)
	sudo cp ci/cached-builds/containers.conf /etc/containers.conf
	sudo cp ci/cached-builds/containers.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers.conf
	sudo cp ci/cached-builds/storage.conf /etc/containers/storage.conf
	sudo cp ci/cached-builds/storage.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers/storage.conf
	sudo cp ci/cached-builds/registries.conf /etc/containers/registries.conf
	sudo cp ci/cached-builds/registries.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers/registries.conf

	# should reset storage when changing storage.conf
	mkdir -p $HOME/.local/share/containers/storage/tmp
	# remote (CONTAINER_HOST) podman does not do reset (and refuses --force option)
	sudo /home/linuxbrew/.linuxbrew/opt/podman/bin/podman system reset --force

	# https://github.com/containers/podman/pull/25504
	# podman 5.5.0: The podman system reset command no longer removes the user's podman.sock API socket
	sudo rm -rf /var/run/podman

	# https://github.com/containers/podman/blob/main/docs/tutorials/socket_activation.md
	# since `brew services start podman` is buggy, let's do our own brew-compatible service
	# Regarding directory paths, see https://unix.stackexchange.com/questions/224992/where-do-i-put-my-systemd-unit-file
	sudo mkdir -p /usr/local/lib/systemd/system/
	sudo cp ci/cached-builds/podman.service /usr/local/lib/systemd/system/podman.service
	sudo cp ci/cached-builds/podman.socket /usr/local/lib/systemd/system/podman.socket
	sudo systemctl daemon-reload
	sudo systemctl unmask --now podman.service podman.socket
	sudo systemctl start podman.socket

	# needed (much) later for trivy
	echo "PODMAN_SOCK=/var/run/podman/podman.sock" >> $GITHUB_ENV

	# quick check podman works
	podman ps

	- name: Show error logs (on failure)
	if: ${{ failure() }}
	run: \|
	set -Eeuxo pipefail

	journalctl -xe
	ls -AlF /var/run/podman/podman.sock \|\| echo "Socket /var/run/podman/podman.sock not found"
	sudo ss -xlpn \| grep 'podman.sock' \|\| echo "No active listener found for podman.sock via ss"

	- name: Calculate image name and tag
	id: calculated_vars
	run: \|
	# Need for sanitization explained in https://github.com/opendatahub-io/notebooks/issues/631
	# For length, Docker image tags have 128-character limit, and we form them as <inputs.target>-<ref_name>_<sha>
	# therefore since sha is 40 characters, and our target names are <40 chars, we should cut ref_name at 40
	SANITIZED_REF_NAME=$(echo "${{ github.ref_name }}" \| sed 's/[^a-zA-Z0-9._-]/_/g') \| cut -c 1-40
	IMAGE_TAG="${SANITIZED_REF_NAME}_${{ github.sha }}"

	echo "IMAGE_TAG=${IMAGE_TAG}" >> "$GITHUB_OUTPUT"
	echo "OUTPUT_IMAGE=${{ env.IMAGE_REGISTRY}}:${{ inputs.target }}-${IMAGE_TAG}" >> "$GITHUB_OUTPUT"

	echo "SANITIZED_PLATFORM=$(echo "${{ inputs.platform }}" \| sed 's/[^a-zA-Z0-9._-]/_/g')" >> "$GITHUB_OUTPUT"

	# endregion

	# region Image build

	- name: Login to quay.io/aipcc (if the secret is present)
	if: ${{ secrets.AIPCC_QUAY_BOT_USERNAME != '' }}
	shell: bash
	run: \|
	echo "${{ secrets.AIPCC_QUAY_BOT_PASSWORD }}" \| podman login quay.io/aipcc -u "${{ secrets.AIPCC_QUAY_BOT_USERNAME }}" --password-stdin

	- name: Compute extra podman build args
	id: extra-podman-build-args
	run: \|
	set -Eeuxo pipefail

	EXTRA_PODMAN_BUILD_ARGS=""
	if [[ "${{ inputs.platform }}" == "linux/s390x" ]]; then
	# workaround for known issue https://github.com/zeromq/libzmq/pull/4486
	# In qemu-user, CACHELINE_SIZE probe is undefined
	EXTRA_PODMAN_BUILD_ARGS+='--env=CXXFLAGS=-Dundefined=64'
	fi
	echo "EXTRA_PODMAN_BUILD_ARGS=$EXTRA_PODMAN_BUILD_ARGS" >> $GITHUB_OUTPUT

	# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push
	# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request

	- name: "push\|schedule\|workflow_dispatch: make ${{ inputs.target }}"
	run: \|
	# print running stats on disk occupancy
	(while true; do df -h \| grep "${HOME}/.local/share/containers"; sleep 30; done) &

	make ${{ inputs.target }}
	if: ${{ fromJson(inputs.github).event_name == 'push' \|\|
	fromJson(inputs.github).event_name == 'schedule' \|\|
	fromJson(inputs.github).event_name == 'workflow_dispatch' }}
	env:
	IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
	CONTAINER_BUILD_CACHE_ARGS: "${{ steps.extra-podman-build-args.outputs.EXTRA_PODMAN_BUILD_ARGS }} --cache-from ${{ env.CACHE }} --cache-to ${{ env.CACHE }}"
	- name: "pull_request: make ${{ inputs.target }}"
	run: \|
	# print running stats on disk occupancy
	(while true; do df -h \| grep "${HOME}/.local/share/containers"; sleep 30; done) &

	make ${{ inputs.target }}
	if: "${{ fromJson(inputs.github).event_name == 'pull_request' \|\|
	fromJson(inputs.github).event_name == 'pull_request_target' }}"
	env:
	IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
	CONTAINER_BUILD_CACHE_ARGS: "${{ steps.extra-podman-build-args.outputs.EXTRA_PODMAN_BUILD_ARGS }} --cache-from ${{ env.CACHE }}"
	# We don't have access to image registry, so disable pushing
	PUSH_IMAGES: "no"

	- name: "Show podman images information"
	run: podman images --digests

	# endregion

	# region Pytest image tests

	# https://github.com/astral-sh/setup-uv
	- name: Install the latest version of uv
	uses: astral-sh/setup-uv@v5
	with:
	version: "latest"
	python-version: "3.12"
	enable-cache: true
	cache-dependency-glob: "uv.lock"
	pyproject-file: "pyproject.toml"

	- name: Check uv is installed correctly
	run: uv version

	- name: Install deps
	run: uv sync --locked

	- name: Run Testcontainers container tests (in PyTest)
	run: \|
	set -Eeuxo pipefail
	uv run pytest --capture=fd tests/containers -m 'not openshift and not cuda and not rocm' --image="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
	env:
	DOCKER_HOST: "unix:///var/run/podman/podman.sock"
	TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: "/var/run/podman/podman.sock"
	# pulling the Ryuk container from docker.io introduces CI flakiness
	TESTCONTAINERS_RYUK_DISABLED: "true"

	# endregion Pytest image tests

	# region Makefile image tests

	- name: "Check if we have tests or not"
	id: have-tests
	run: "ci/cached-builds/has_tests.py --target ${{ inputs.target }}"

	- name: "Change pull policy to IfNotPresent"
	run: \|
	set -Eeuxo pipefail

	find . $ -name "statefulset.yaml" -o -name "pod.yaml" $ -type f -exec \
	sed -i'' 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' {} \;
	git diff

	# [INFO] Running command (('make deploy9-runtimes-rocm-tensorflow-ubi9-python-3.11',), {'shell': True})
	# Deploying notebook from runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base directory...
	# sed: can't read runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml: No such file or directory
	- name: "Fixup paths that prevent us from running rocm tests"
	if: ${{ steps.have-tests.outputs.tests == 'true' }}
	run: \|
	set -Eeuxo pipefail

	mkdir -p runtimes/rocm
	ln -s ../rocm-tensorflow runtimes/rocm/tensorflow
	ln -s ../rocm-pytorch runtimes/rocm/pytorch

	# https://cri-o.io/
	- name: Install cri-o
	id: install-crio
	if: ${{ steps.have-tests.outputs.tests == 'true' }}
	run: \|
	set -Eeuxo pipefail

	sudo apt-get update
	sudo apt-get install -y software-properties-common curl

	# https://github.com/cri-o/packaging?tab=readme-ov-file#distributions-using-deb-packages

	curl -fsSL https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/Release.key \| \
	sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg

	echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/ /" \| \
	sudo tee /etc/apt/sources.list.d/kubernetes.list

	curl -fsSL https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/Release.key \| \
	sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/cri-o-apt-keyring.gpg

	echo "deb [signed-by=/etc/apt/keyrings/cri-o-apt-keyring.gpg] https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/ /" \| \
	sudo tee /etc/apt/sources.list.d/cri-o.list

	sudo apt-get update

	# [ERROR FileExisting-conntrack]: conntrack not found in system path
	# see man apt-patterns for the ~name=version* syntax

	# The following packages will be DOWNGRADED:
	# kubectl
	# E: Packages were downgraded and -y was used without --allow-downgrades.

	sudo apt-get install -y --allow-downgrades \
	"cri-o=${CRIO_VERSION}.*" \
	"kubelet=${KUBERNETES_VERSION}." "kubeadm=${KUBERNETES_VERSION}." "kubectl=${KUBERNETES_VERSION}.*" \
	conntrack

	# make use of /etc/cni/net.d/11-crio-ipv4-bridge.conflist so we don't
	# need a pod network and just use the default bridge
	sudo rm -rf /etc/cni/net.d/*
	# cat /etc/cni/net.d/11-crio-ipv4-bridge.conflist
	# https://github.com/containerd/containerd/blob/main/script%2Fsetup%2Finstall-cni
	# https://www.cni.dev/plugins/current/main/bridge/
	sudo cp ci/cached-builds/11-crio-ipv4-bridge.conflist /etc/cni/net.d/11-crio-ipv4-bridge.conflist

	sudo cp ci/cached-builds/crio.conf /etc/crio/crio.conf.d/

	sudo systemctl start crio.service
	env:
	# TODO(jdanek): install also "cri-tools=${CRIO_VERSION}.*" when updating to 1.33
	CRIO_VERSION: 1.32
	# This has to be kept in sync with the packages above, otherwise
	# [ERROR KubeletVersion]: the kubelet version is higher than the control plane version.
	# This is not a supported version skew and may lead to a malfunctional cluster.
	# Kubelet version: "1.33.0" Control plane version: "1.30.12"
	KUBERNETES_VERSION: 1.33

	- name: Show crio debug data (on failure)
	if: ${{ failure() && steps.have-tests.outputs.tests == 'true' }}
	run: \|
	set -Eeuxo pipefail

	sudo systemctl status crio.service \|\| true
	sudo journalctl -xeu crio.service

	# do this early, it's a good check that cri-o is not completely broken
	- name: "Show crio images information"
	if: ${{ steps.have-tests.outputs.tests == 'true' }}
	run: sudo crictl images

	- name: Install Kubernetes cluster
	if: ${{ steps.have-tests.outputs.tests == 'true' }}
	run: \|
	set -Eeuxo pipefail

	sudo swapoff -a
	sudo modprobe br_netfilter
	sudo sysctl -w net.ipv4.ip_forward=1

	# Was getting strange DNS resolution errors from pods that don't seem to want to go away sometimes:
	# Resolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Name or service not known.
	# wget: unable to resolve host address ‘raw.githubusercontent.com’
	# Here's what helped:
	# https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/#known-issues
	# https://github.com/kubernetes/kubernetes/blob/e4c1f980b76fecece30c2f77885a7117192170a6/CHANGELOG/CHANGELOG-1.30.md?plain=1#L1454
	# https://github.com/canonical/microk8s/issues/68#issuecomment-404923563
	sudo ufw allow in on cni0
	sudo ufw allow out on cni0
	sudo ufw default allow routed
	sudo iptables -P FORWARD ACCEPT
	sudo iptables -t nat -A POSTROUTING -s 10.85.0.0/16 -o eth0 -j MASQUERADE

	# https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm
	sudo kubeadm init --config=ci/cached-builds/kubeadm.yaml

	mkdir -p $HOME/.kube
	sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
	sudo chown $(id -u):$(id -g) $HOME/.kube/config

	- name: Show kubelet debug data (on failure)
	if: ${{ failure() && steps.have-tests.outputs.tests == 'true' && steps.install-crio.outcome == 'success' }}
	run: \|
	set -Eeuxo pipefail

	sudo systemctl status kubelet \|\| true
	sudo journalctl -xeu kubelet

	# Here is one example how you may list all running Kubernetes containers by using crictl:
	sudo crictl --runtime-endpoint unix:///var/run/crio/crio.sock ps -a \| grep kube \| grep -v pause
	# Once you have found the failing container, you can inspect its logs with:
	# crictl --runtime-endpoint unix:///var/run/crio/crio.sock logs CONTAINERID

	- name: Show nodes status and wait for readiness
	if: ${{ steps.have-tests.outputs.tests == 'true' }}
	run: \|
	kubectl describe nodes
	kubectl wait --for=condition=Ready nodes --all --timeout=100s \|\| (kubectl describe nodes && false)

	- name: Wait for pods to be running
	if: ${{ steps.have-tests.outputs.tests == 'true' }}
	run: \|
	set -Eeuxo pipefail
	kubectl wait deployments --all --all-namespaces --for=condition=Available --timeout=100s
	kubectl wait pods --all --all-namespaces --for=condition=Ready --timeout=100s

	- name: "Install local-path provisioner"
	if: ${{ steps.have-tests.outputs.tests == 'true' }}
	run: \|
	set -Eeuxo pipefail
	kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.31/deploy/local-path-storage.yaml
	kubectl wait deployments --all --namespace=local-path-storage --for=condition=Available --timeout=100s
	# https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/
	kubectl get storageclass
	kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

	- name: "Run image tests"
	# skip on s390x because we are unable to install requirements-elyra.txt that's installed by runtime image tests
	# https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/etc/generic/requirements-elyra.txt
	if: ${{ steps.have-tests.outputs.tests == 'true' && !contains(fromJSON('["linux/s390x"]'), inputs.platform) }}
	run: python3 ci/cached-builds/make_test.py --target ${{ inputs.target }}
	env:
	IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
	# for make deploy, mandatory to specify for the more exotic cases
	NOTEBOOK_TAG: "${{ inputs.target }}-${{ steps.calculated_vars.outputs.IMAGE_TAG }}"

	# endregion

	- name: Run OpenShift container tests (in PyTest)
	if: ${{ steps.have-tests.outputs.tests == 'true' }}
	run: \|
	set -Eeuxo pipefail
	uv run pytest --capture=fd tests/containers -m 'openshift and not cuda and not rocm' --image="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
	env:
	# TODO(jdanek): this Testcontainers stuff should not be necessary but currently it has to be there
	DOCKER_HOST: "unix:///var/run/podman/podman.sock"
	TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: "/var/run/podman/podman.sock"
	# pulling the Ryuk container from docker.io introduces CI flakiness
	TESTCONTAINERS_RYUK_DISABLED: "true"

	# region Trivy vulnerability scan

	- name: "pull_request\|schedule: resolve target if Trivy scan should run"
	id: resolve-target
	if: ${{ fromJson(inputs.github).event_name == 'pull_request' \|\| fromJson(inputs.github).event_name == 'schedule' }}
	env:
	EVENT_NAME: ${{ fromJson(inputs.github).event_name }}
	HAS_TRIVY_LABEL: ${{ contains(fromJson(inputs.github).event.pull_request.labels.*.name, 'trivy-scan') }}
	FS_SCAN_FOLDER: ${{ fromJson(env.TRIVY_SCAN_FS_JSON)[inputs.target] }}
	run: \|
	if [[ "$EVENT_NAME" == "pull_request" && "$HAS_TRIVY_LABEL" == "true" ]]; then
	if [[ -n "$FS_SCAN_FOLDER" ]]; then
	TARGET="$FS_SCAN_FOLDER"
	TYPE="fs"
	else
	TARGET="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
	TYPE="image"
	fi
	elif [[ "$EVENT_NAME" == "schedule" ]]; then
	if [[ -n "$FS_SCAN_FOLDER" ]]; then
	TARGET="$FS_SCAN_FOLDER"
	TYPE="fs"
	else
	TARGET="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
	TYPE="image"
	fi
	fi

	if [[ -n "$TARGET" ]]; then
	echo "target=$TARGET" >> $GITHUB_OUTPUT
	echo "type=$TYPE" >> $GITHUB_OUTPUT
	echo "Trivy scan will run on $TARGET ($TYPE)"
	else
	echo "Trivy scan won't run"
	fi

	- name: Run Trivy vulnerability scanner
	if: ${{ steps.resolve-target.outputs.target }}
	run: \|
	REPORT_FOLDER=${{ github.workspace }}/report
	REPORT_FILE=trivy-report.md
	REPORT_TEMPLATE=trivy-markdown.tpl

	mkdir -p $REPORT_FOLDER
	cp ci/$REPORT_TEMPLATE $REPORT_FOLDER

	SCAN_TARGET=${{ steps.resolve-target.outputs.target }}
	SCAN_TYPE=${{ steps.resolve-target.outputs.type }}
	echo "Scanning $SCAN_TARGET ($SCAN_TYPE)"

	if [[ "$SCAN_TYPE" == "image" ]]; then
	SCAN_ARGS="--image-src podman --podman-host /var/run/podman/podman.sock"
	PODMAN_ARGS="-v ${PODMAN_SOCK}:/var/run/podman/podman.sock"
	elif [[ "$SCAN_TYPE" == "fs" ]]; then
	WORKSPACE_FOLDER="/workspace"
	SCAN_TARGET="$WORKSPACE_FOLDER/$SCAN_TARGET"
	PODMAN_ARGS="-v ${{ github.workspace }}:$WORKSPACE_FOLDER"
	fi

	# have trivy access podman socket,
	# https://github.com/aquasecurity/trivy/issues/580#issuecomment-666423279
	podman run --rm \
	$PODMAN_ARGS \
	-v ${REPORT_FOLDER}:/report \
	docker.io/aquasec/trivy:$TRIVY_VERSION \
	$SCAN_TYPE \
	$SCAN_ARGS \
	--scanners vuln --ignore-unfixed \
	--exit-code 0 --timeout 30m \
	--format template --template "@/report/$REPORT_TEMPLATE" -o /report/$REPORT_FILE \
	$SCAN_TARGET

	cat $REPORT_FOLDER/$REPORT_FILE >> $GITHUB_STEP_SUMMARY

	# endregion

	# region check-payload for FIPS compliance

	- id: check-payload-vars
	run: \|
	echo "GOPATH=${{ github.workspace }}/go-check-payload" >> "$GITHUB_OUTPUT"
	working-directory: scripts/check-payload

	# for https://github.com/openshift/check-payload to cache the built binary
	- uses: actions/setup-go@v5
	with:
	cache-dependency-path: "scripts/check-payload/go.sum"
	env:
	GOPATH: ${{ steps.check-payload-vars.outputs.GOPATH }}

	# F0512 15:43:03.219076 21568 main.go:294] Error: exec: "oc": executable file not found in $PATH
	- name: Install oc client
	run: \|
	# Install the oc client
	curl -L https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/clients/ocp/stable/openshift-client-linux.tar.gz -o /tmp/openshift-client-linux.tar.gz
	tar -xzvf /tmp/openshift-client-linux.tar.gz oc
	rm -f /tmp/openshift-client-linux.tar.gz
	sudo mv ./oc /usr/local/bin

	# perform `podman image mount` ourselves, and then follow the scenario from
	# https://github.com/openshift/check-payload/pull/154, that is because
	# `check-payload scan image --spec` insists on pulling the image, even if already present,
	# that causes trouble when checking PRs (image not pushed) and requires `podman login` as root
	# (we run podman as root in the GHA to reuse container storage in Kubernetes)
	# use sudo to avoid
	# podman error (args=[image mount ghcr.io/...])
	# (stderr=Error: cannot use command "podman image mount" with the remote podman client
	# and use --preserve-env=PATH to avoid
	# F0512 16:31:58.425584 9911 main.go:294] Error: exec: "podman": executable file not found in $PATH
	- name: Check image with check-payload for FIPS compliance
	run: \|
	set -Eeuxo pipefail
	# resolve podman under current user, not under sudo/root
	PODMAN="$(which podman)"
	# mount the image
	IMAGE_MOUNT_DIR=$(sudo "${PODMAN}" image mount "${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}")
	# run the check-payload scan
	sudo --preserve-env=PATH go tool github.com/openshift/check-payload scan local --path "${IMAGE_MOUNT_DIR}"
	# unmount the image
	sudo "${PODMAN}" image unmount --all
	working-directory: scripts/check-payload
	env:
	GOPATH: ${{ steps.check-payload-vars.outputs.GOPATH }}

	# endregion

	# region Typescript (browser) image tests

	# https://playwright.dev/docs/ci
	# https://playwright.dev/docs/docker
	# we leave little free disk space after we mount LVM for podman storage
	# not enough to install playwright; running playwright in podman uses the space we have
	- name: Run Playwright tests
	if: ${{ contains(inputs.target, 'codeserver') }}
	# --ipc=host because Microsoft says so in Playwright docs
	# --net=host because testcontainers connects to the Reaper container's exposed port
	# we need to pass through the relevant environment variables
	# DEBUG configures Node.js debuggers, sets different verbosity as needed
	# CI=true is set on every CI nowadays
	# PODMAN_SOCK should be mounted to /var/run/docker.sock, other likely mounting locations may not exist (mkdir -p)
	# TEST_TARGET is the workbench image the test will run
	# --volume(s) let us access docker socket and not clobber host's node_modules
	run: \|
	podman run \
	--interactive --rm \
	--ipc=host \
	--net=host \
	--env "CI=true" \
	--env "NPM_CONFIG_fund=false" \
	--env "DEBUG=testcontainers:*" \
	--env "PODMAN_SOCK=/var/run/docker.sock" \
	--env "TEST_TARGET" \
	--volume ${PODMAN_SOCK}:/var/run/docker.sock \
	--volume ${PWD}:/mnt \
	--volume /mnt/node_modules \
	mcr.microsoft.com/playwright:v1.53.1-noble \
	/bin/bash <<EOF
	set -Eeuxo pipefail
	cd /mnt
	npm install -g pnpm && pnpm install
	pnpm exec playwright test
	exit 0
	EOF
	working-directory: tests/browser
	env:
	TEST_TARGET: "${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
	- uses: actions/upload-artifact@v4
	if: ${{ !cancelled() && fromJson(inputs.github).event_name == 'pull_request' && contains(inputs.target, 'codeserver') }}
	with:
	name: "${{ inputs.target }}_${{ steps.calculated_vars.outputs.SANITIZED_PLATFORM }}_playwright-report"
	path: tests/browser/playwright-report/
	retention-days: 30

	# endregion

	- run: df -h
	if: "${{ !cancelled() }}"

	- run: sudo compsize -x "${HOME}/.local/share/containers"
	if: "${{ !cancelled() && steps.install-compsize.outcome == 'success' }}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Switch to AIPCC Base Images and Add Perl Utility for RHEL #2

Workflow file

Switch to AIPCC Base Images and Add Perl Utility for RHEL #2

Uh oh!

Jobs

Run details

Workflow file for this run

GitHub Actions / .github/workflows/build-notebooks-TEMPLATE.yaml