Skip to content

Switch to AIPCC Base Images and Add Perl Utility for RHEL #2

Switch to AIPCC Base Images and Add Perl Utility for RHEL

Switch to AIPCC Base Images and Add Perl Utility for RHEL #2

# inspired by

Check failure on line 1 in .github/workflows/build-notebooks-TEMPLATE.yaml

View workflow run for this annotation

GitHub Actions / .github/workflows/build-notebooks-TEMPLATE.yaml

Invalid workflow file

(Line: 285, Col: 13): Unrecognized named-value: 'secrets'. Located at position 1 within expression: secrets.AIPCC_QUAY_BOT_USERNAME != ''
# https://github.com/thesuperzapper/kubeflow/blob/master/.github/workflows/example_notebook_servers_publish_TEMPLATE.yaml
---
name: Build & Publish Notebook Servers (TEMPLATE)
"on":
workflow_call:
inputs:
# https://docs.github.com/en/actions/learn-github-actions/variables#default-environment-variables
# https://docs.github.com/en/actions/learn-github-actions/contexts
target:
required: true
description: "make target to build"
type: string
python:
required: true
description: "python version"
type: string
github:
required: true
description: "top workflow's `github`"
type: string
platform:
required: true
description: "platform to build, podman build --platform="
type: string
subscription:
required: false
default: false
description: "add RHEL subscription from github secret"
type: boolean
jobs:
build:
# https://docs.github.com/en/actions/how-tos/using-github-hosted-runners/using-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
runs-on: ${{ inputs.platform == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
env:
# Some pieces of code (image pulls, for example) in podman consult TMPDIR or default to /var/tmp
TMPDIR: /home/runner/.local/share/containers/tmpdir
# Use the rootful instance of podman for sharing images with cri-o
# https://podman-desktop.io/blog/sharing-podman-images-with-kubernetes-cluster#introduction
# https://access.redhat.com/solutions/6986565
CONTAINER_HOST: unix:///var/run/podman/podman.sock
# We don't push here when building PRs, so we can use the same IMAGE_REGISTRY in all branches of the workflow
IMAGE_REGISTRY: "ghcr.io/${{ github.repository }}/workbench-images"
# GitHub image registry used for storing $(CONTAINER_ENGINE)'s cache
CACHE: "ghcr.io/${{ github.repository }}/workbench-images/build-cache"
TRIVY_VERSION: 0.64.1
# Targets (and their folder) that should be scanned using FS instead of IMAGE scan due to resource constraints
TRIVY_SCAN_FS_JSON: '{}'
# Makefile variables
BUILD_ARCH: ${{ inputs.platform }}
RELEASE_PYTHON_VERSION: ${{ inputs.python }}
steps:
# image repository name must be lowercase
- name: downcase IMAGE_REGISTRY and CACHE
run: |
echo "IMAGE_REGISTRY=${IMAGE_REGISTRY,,}" >>${GITHUB_ENV}
echo "CACHE=${CACHE,,}" >>${GITHUB_ENV}
- uses: actions/checkout@v5
if: ${{ fromJson(inputs.github).event_name != 'pull_request_target' }}
# we need to checkout the pr branch, not pr target (the default for pull_request_target)
# user access check is done in calling workflow
- uses: actions/checkout@v5
if: ${{ fromJson(inputs.github).event_name == 'pull_request_target' }}
with:
ref: "refs/pull/${{ fromJson(inputs.github).event.number }}/merge"
# https://github.com/docker/setup-qemu-action?tab=readme-ov-file#about
# https://www.itix.fr/blog/qemu-user-static-with-podman/
- name: Set up QEMU for non-native architecture
if: ${{ contains(fromJSON('["linux/s390x", "linux/ppc64le"]'), inputs.platform) }}
run: docker run --rm --privileged tonistiigi/binfmt --install ${platform#*/}
env:
platform: ${{ inputs.platform }}
- run: mkdir -p $TMPDIR
# do this early because it's fast and why not
- name: Unlock encrypted secrets with git-crypt
if: ${{ inputs.subscription }}
run: |
sudo apt-get update
sudo apt-get install git-crypt
echo "${GIT_CRYPT_KEY}" | base64 --decode > ./git-crypt-key
git-crypt unlock ./git-crypt-key
rm ./git-crypt-key
env:
GIT_CRYPT_KEY: ${{ secrets.GIT_CRYPT_KEY }}
# https://console.redhat.com/insights/connector/activation-keys
# This runs slower than storing the entitlement certificates with git-crypt,
# but on the other hand, it's then not necessary to regularly update them in the repo.
- name: Add subscriptions from GitHub secret
if: ${{ inputs.subscription }}
run: |
# https://access.redhat.com/solutions/5870841
# https://github.com/containers/common/issues/1735
mkdir entitlement
mkdir consumer
docker run \
-v ${PWD}/entitlement:/etc/pki/entitlement:Z \
-v ${PWD}/consumer:/etc/pki/consumer:Z \
--rm -t registry.access.redhat.com/ubi9/ubi \
/usr/sbin/subscription-manager register --org=${SUBSCRIPTION_ORG} --activationkey=${SUBSCRIPTION_ACTIVATION_KEY}
printf "${PWD}/entitlement:/etc/pki/entitlement\n${PWD}/consumer:/etc/pki/consumer\n" | sudo tee /usr/share/containers/mounts.conf
mkdir -p $HOME/.config/containers/
sudo cp ${PWD}/ci/secrets/pull-secret.json $HOME/.config/containers/auth.json
env:
SUBSCRIPTION_ORG: ${{ secrets.SUBSCRIPTION_ORG }}
SUBSCRIPTION_ACTIVATION_KEY: ${{ secrets.SUBSCRIPTION_ACTIVATION_KEY }}
# for bin/buildinputs in scripts/sandbox.py
- uses: actions/setup-go@v5
with:
cache-dependency-path: "scripts/buildinputs/go.sum"
- run: sudo apt-get update
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# region Free up disk space
- name: Free up additional disk space
# https://docs.github.com/en/actions/learn-github-actions/expressions
if: "${{ contains(inputs.target, 'rocm') || contains(inputs.target, 'cuda') ||
contains(inputs.target, 'pytorch') || contains(inputs.target, 'tensorflow') }}"
run: |
set -x
df -h
sudo apt-get update
sudo apt-get remove -y '^dotnet-.*'
sudo apt-get remove -y '^llvm-.*'
sudo apt-get remove -y 'php.*'
sudo apt-get remove -y '^mongodb-.*'
sudo apt-get autoremove -y
sudo apt-get clean
sudo rm -rf /usr/local/.ghcup &
sudo rm -rf /usr/local/lib/android &
sudo rm -rf /usr/local/share/boost &
sudo rm -rf /usr/local/lib/node_modules &
sudo rm -rf /usr/share/dotnet &
sudo rm -rf /opt/ghc &
sudo rm -rf /opt/hostedtoolcache/CodeQL &
sudo docker image prune --all --force &
wait
df -h
- id: install-compsize
run: sudo apt-get install -y btrfs-compsize
- name: Mount lvm overlay for podman builds
run: |
df -h
free -h
bash ./ci/cached-builds/gha_lvm_overlay.sh
df -h
free -h
# endregion
# region Podman setup
# https://github.com/containers/buildah/issues/2521#issuecomment-884779112
- name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598
run: sudo apt-get -qq remove podman crun
- uses: actions/cache@v4
# https://docs.github.com/en/actions/reference/variables-reference#default-environment-variables
# https://docs.github.com/en/actions/how-tos/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables
id: cached-linuxbrew
with:
path: /home/linuxbrew/.linuxbrew
key: linuxbrew-${{ runner.os }}-${{ runner.arch }}
- name: Install podman (linux/amd64, or qemu-user emulation)
if: contains(fromJSON('["linux/amd64", "linux/s390x", "linux/ppc64le"]'), inputs.platform) && steps.cached-linuxbrew.outputs.cache-hit != 'true'
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
/home/linuxbrew/.linuxbrew/bin/brew install podman
# Warning: Your CPU architecture (arm64) is not supported. We only support
# x86_64 CPU architectures. You will be unable to use binary packages (bottles).
#
# This is a Tier 2 configuration:
# https://docs.brew.sh/Support-Tiers#tier-2
# Do not report any issues to Homebrew/* repositories!
# Read the above document instead before opening any issues or PRs.
- name: Install podman (linux/arm64)
if: inputs.platform == 'linux/arm64' && steps.cached-linuxbrew.outputs.cache-hit != 'true'
# Error: podman: no bottle available!
# If you're feeling brave, you can try to install from source with:
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
/home/linuxbrew/.linuxbrew/bin/brew install --build-from-source podman
- name: Add linuxbrew to PATH
run: echo "/home/linuxbrew/.linuxbrew/bin/" >> $GITHUB_PATH
- name: Configure Podman
run: |
set -Eeuxo pipefail
# podman running as service ignores the TMPDIR env var here, let's give it a bind-mount to /var/tmp
mkdir -p $TMPDIR
sudo mount --bind -o rw,noexec,nosuid,nodev,bind $TMPDIR /var/tmp
# podman from brew has its own /etc (was giving me Failed to obtain podman configuration: runroot must be set)
# the (default) config location is also where cri-o gets its storage defaults (that can be overriden in crio.conf)
sudo cp ci/cached-builds/containers.conf /etc/containers.conf
sudo cp ci/cached-builds/containers.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers.conf
sudo cp ci/cached-builds/storage.conf /etc/containers/storage.conf
sudo cp ci/cached-builds/storage.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers/storage.conf
sudo cp ci/cached-builds/registries.conf /etc/containers/registries.conf
sudo cp ci/cached-builds/registries.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers/registries.conf
# should reset storage when changing storage.conf
mkdir -p $HOME/.local/share/containers/storage/tmp
# remote (CONTAINER_HOST) podman does not do reset (and refuses --force option)
sudo /home/linuxbrew/.linuxbrew/opt/podman/bin/podman system reset --force
# https://github.com/containers/podman/pull/25504
# podman 5.5.0: The podman system reset command no longer removes the user's podman.sock API socket
sudo rm -rf /var/run/podman
# https://github.com/containers/podman/blob/main/docs/tutorials/socket_activation.md
# since `brew services start podman` is buggy, let's do our own brew-compatible service
# Regarding directory paths, see https://unix.stackexchange.com/questions/224992/where-do-i-put-my-systemd-unit-file
sudo mkdir -p /usr/local/lib/systemd/system/
sudo cp ci/cached-builds/podman.service /usr/local/lib/systemd/system/podman.service
sudo cp ci/cached-builds/podman.socket /usr/local/lib/systemd/system/podman.socket
sudo systemctl daemon-reload
sudo systemctl unmask --now podman.service podman.socket
sudo systemctl start podman.socket
# needed (much) later for trivy
echo "PODMAN_SOCK=/var/run/podman/podman.sock" >> $GITHUB_ENV
# quick check podman works
podman ps
- name: Show error logs (on failure)
if: ${{ failure() }}
run: |
set -Eeuxo pipefail
journalctl -xe
ls -AlF /var/run/podman/podman.sock || echo "Socket /var/run/podman/podman.sock not found"
sudo ss -xlpn | grep 'podman.sock' || echo "No active listener found for podman.sock via ss"
- name: Calculate image name and tag
id: calculated_vars
run: |
# Need for sanitization explained in https://github.com/opendatahub-io/notebooks/issues/631
# For length, Docker image tags have 128-character limit, and we form them as <inputs.target>-<ref_name>_<sha>
# therefore since sha is 40 characters, and our target names are <40 chars, we should cut ref_name at 40
SANITIZED_REF_NAME=$(echo "${{ github.ref_name }}" | sed 's/[^a-zA-Z0-9._-]/_/g') | cut -c 1-40
IMAGE_TAG="${SANITIZED_REF_NAME}_${{ github.sha }}"
echo "IMAGE_TAG=${IMAGE_TAG}" >> "$GITHUB_OUTPUT"
echo "OUTPUT_IMAGE=${{ env.IMAGE_REGISTRY}}:${{ inputs.target }}-${IMAGE_TAG}" >> "$GITHUB_OUTPUT"
echo "SANITIZED_PLATFORM=$(echo "${{ inputs.platform }}" | sed 's/[^a-zA-Z0-9._-]/_/g')" >> "$GITHUB_OUTPUT"
# endregion
# region Image build
- name: Login to quay.io/aipcc (if the secret is present)
if: ${{ secrets.AIPCC_QUAY_BOT_USERNAME != '' }}
shell: bash
run: |
echo "${{ secrets.AIPCC_QUAY_BOT_PASSWORD }}" | podman login quay.io/aipcc -u "${{ secrets.AIPCC_QUAY_BOT_USERNAME }}" --password-stdin
- name: Compute extra podman build args
id: extra-podman-build-args
run: |
set -Eeuxo pipefail
EXTRA_PODMAN_BUILD_ARGS=""
if [[ "${{ inputs.platform }}" == "linux/s390x" ]]; then
# workaround for known issue https://github.com/zeromq/libzmq/pull/4486
# In qemu-user, CACHELINE_SIZE probe is undefined
EXTRA_PODMAN_BUILD_ARGS+='--env=CXXFLAGS=-Dundefined=64'
fi
echo "EXTRA_PODMAN_BUILD_ARGS=$EXTRA_PODMAN_BUILD_ARGS" >> $GITHUB_OUTPUT
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request
- name: "push|schedule|workflow_dispatch: make ${{ inputs.target }}"
run: |
# print running stats on disk occupancy
(while true; do df -h | grep "${HOME}/.local/share/containers"; sleep 30; done) &
make ${{ inputs.target }}
if: ${{ fromJson(inputs.github).event_name == 'push' ||
fromJson(inputs.github).event_name == 'schedule' ||
fromJson(inputs.github).event_name == 'workflow_dispatch' }}
env:
IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
CONTAINER_BUILD_CACHE_ARGS: "${{ steps.extra-podman-build-args.outputs.EXTRA_PODMAN_BUILD_ARGS }} --cache-from ${{ env.CACHE }} --cache-to ${{ env.CACHE }}"
- name: "pull_request: make ${{ inputs.target }}"
run: |
# print running stats on disk occupancy
(while true; do df -h | grep "${HOME}/.local/share/containers"; sleep 30; done) &
make ${{ inputs.target }}
if: "${{ fromJson(inputs.github).event_name == 'pull_request' ||
fromJson(inputs.github).event_name == 'pull_request_target' }}"
env:
IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
CONTAINER_BUILD_CACHE_ARGS: "${{ steps.extra-podman-build-args.outputs.EXTRA_PODMAN_BUILD_ARGS }} --cache-from ${{ env.CACHE }}"
# We don't have access to image registry, so disable pushing
PUSH_IMAGES: "no"
- name: "Show podman images information"
run: podman images --digests
# endregion
# region Pytest image tests
# https://github.com/astral-sh/setup-uv
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v5
with:
version: "latest"
python-version: "3.12"
enable-cache: true
cache-dependency-glob: "uv.lock"
pyproject-file: "pyproject.toml"
- name: Check uv is installed correctly
run: uv version
- name: Install deps
run: uv sync --locked
- name: Run Testcontainers container tests (in PyTest)
run: |
set -Eeuxo pipefail
uv run pytest --capture=fd tests/containers -m 'not openshift and not cuda and not rocm' --image="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
env:
DOCKER_HOST: "unix:///var/run/podman/podman.sock"
TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: "/var/run/podman/podman.sock"
# pulling the Ryuk container from docker.io introduces CI flakiness
TESTCONTAINERS_RYUK_DISABLED: "true"
# endregion Pytest image tests
# region Makefile image tests
- name: "Check if we have tests or not"
id: have-tests
run: "ci/cached-builds/has_tests.py --target ${{ inputs.target }}"
- name: "Change pull policy to IfNotPresent"
run: |
set -Eeuxo pipefail
find . \( -name "statefulset.yaml" -o -name "pod.yaml" \) -type f -exec \
sed -i'' 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' {} \;
git diff
# [INFO] Running command (('make deploy9-runtimes-rocm-tensorflow-ubi9-python-3.11',), {'shell': True})
# Deploying notebook from runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base directory...
# sed: can't read runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml: No such file or directory
- name: "Fixup paths that prevent us from running rocm tests"
if: ${{ steps.have-tests.outputs.tests == 'true' }}
run: |
set -Eeuxo pipefail
mkdir -p runtimes/rocm
ln -s ../rocm-tensorflow runtimes/rocm/tensorflow
ln -s ../rocm-pytorch runtimes/rocm/pytorch
# https://cri-o.io/
- name: Install cri-o
id: install-crio
if: ${{ steps.have-tests.outputs.tests == 'true' }}
run: |
set -Eeuxo pipefail
sudo apt-get update
sudo apt-get install -y software-properties-common curl
# https://github.com/cri-o/packaging?tab=readme-ov-file#distributions-using-deb-packages
curl -fsSL https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/Release.key | \
sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/ /" | \
sudo tee /etc/apt/sources.list.d/kubernetes.list
curl -fsSL https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/Release.key | \
sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/cri-o-apt-keyring.gpg
echo "deb [signed-by=/etc/apt/keyrings/cri-o-apt-keyring.gpg] https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/ /" | \
sudo tee /etc/apt/sources.list.d/cri-o.list
sudo apt-get update
# [ERROR FileExisting-conntrack]: conntrack not found in system path
# see man apt-patterns for the ~name=version* syntax
# The following packages will be DOWNGRADED:
# kubectl
# E: Packages were downgraded and -y was used without --allow-downgrades.
sudo apt-get install -y --allow-downgrades \
"cri-o=${CRIO_VERSION}.*" \
"kubelet=${KUBERNETES_VERSION}.*" "kubeadm=${KUBERNETES_VERSION}.*" "kubectl=${KUBERNETES_VERSION}.*" \
conntrack
# make use of /etc/cni/net.d/11-crio-ipv4-bridge.conflist so we don't
# need a pod network and just use the default bridge
sudo rm -rf /etc/cni/net.d/*
# cat /etc/cni/net.d/11-crio-ipv4-bridge.conflist
# https://github.com/containerd/containerd/blob/main/script%2Fsetup%2Finstall-cni
# https://www.cni.dev/plugins/current/main/bridge/
sudo cp ci/cached-builds/11-crio-ipv4-bridge.conflist /etc/cni/net.d/11-crio-ipv4-bridge.conflist
sudo cp ci/cached-builds/crio.conf /etc/crio/crio.conf.d/
sudo systemctl start crio.service
env:
# TODO(jdanek): install also "cri-tools=${CRIO_VERSION}.*" when updating to 1.33
CRIO_VERSION: 1.32
# This has to be kept in sync with the packages above, otherwise
# [ERROR KubeletVersion]: the kubelet version is higher than the control plane version.
# This is not a supported version skew and may lead to a malfunctional cluster.
# Kubelet version: "1.33.0" Control plane version: "1.30.12"
KUBERNETES_VERSION: 1.33
- name: Show crio debug data (on failure)
if: ${{ failure() && steps.have-tests.outputs.tests == 'true' }}
run: |
set -Eeuxo pipefail
sudo systemctl status crio.service || true
sudo journalctl -xeu crio.service
# do this early, it's a good check that cri-o is not completely broken
- name: "Show crio images information"
if: ${{ steps.have-tests.outputs.tests == 'true' }}
run: sudo crictl images
- name: Install Kubernetes cluster
if: ${{ steps.have-tests.outputs.tests == 'true' }}
run: |
set -Eeuxo pipefail
sudo swapoff -a
sudo modprobe br_netfilter
sudo sysctl -w net.ipv4.ip_forward=1
# Was getting strange DNS resolution errors from pods that don't seem to want to go away sometimes:
# Resolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Name or service not known.
# wget: unable to resolve host address ‘raw.githubusercontent.com’
# Here's what helped:
# https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/#known-issues
# https://github.com/kubernetes/kubernetes/blob/e4c1f980b76fecece30c2f77885a7117192170a6/CHANGELOG/CHANGELOG-1.30.md?plain=1#L1454
# https://github.com/canonical/microk8s/issues/68#issuecomment-404923563
sudo ufw allow in on cni0
sudo ufw allow out on cni0
sudo ufw default allow routed
sudo iptables -P FORWARD ACCEPT
sudo iptables -t nat -A POSTROUTING -s 10.85.0.0/16 -o eth0 -j MASQUERADE
# https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm
sudo kubeadm init --config=ci/cached-builds/kubeadm.yaml
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
- name: Show kubelet debug data (on failure)
if: ${{ failure() && steps.have-tests.outputs.tests == 'true' && steps.install-crio.outcome == 'success' }}
run: |
set -Eeuxo pipefail
sudo systemctl status kubelet || true
sudo journalctl -xeu kubelet
# Here is one example how you may list all running Kubernetes containers by using crictl:
sudo crictl --runtime-endpoint unix:///var/run/crio/crio.sock ps -a | grep kube | grep -v pause
# Once you have found the failing container, you can inspect its logs with:
# crictl --runtime-endpoint unix:///var/run/crio/crio.sock logs CONTAINERID
- name: Show nodes status and wait for readiness
if: ${{ steps.have-tests.outputs.tests == 'true' }}
run: |
kubectl describe nodes
kubectl wait --for=condition=Ready nodes --all --timeout=100s || (kubectl describe nodes && false)
- name: Wait for pods to be running
if: ${{ steps.have-tests.outputs.tests == 'true' }}
run: |
set -Eeuxo pipefail
kubectl wait deployments --all --all-namespaces --for=condition=Available --timeout=100s
kubectl wait pods --all --all-namespaces --for=condition=Ready --timeout=100s
- name: "Install local-path provisioner"
if: ${{ steps.have-tests.outputs.tests == 'true' }}
run: |
set -Eeuxo pipefail
kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.31/deploy/local-path-storage.yaml
kubectl wait deployments --all --namespace=local-path-storage --for=condition=Available --timeout=100s
# https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/
kubectl get storageclass
kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
- name: "Run image tests"
# skip on s390x because we are unable to install requirements-elyra.txt that's installed by runtime image tests
# https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/etc/generic/requirements-elyra.txt
if: ${{ steps.have-tests.outputs.tests == 'true' && !contains(fromJSON('["linux/s390x"]'), inputs.platform) }}
run: python3 ci/cached-builds/make_test.py --target ${{ inputs.target }}
env:
IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
# for make deploy, mandatory to specify for the more exotic cases
NOTEBOOK_TAG: "${{ inputs.target }}-${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
# endregion
- name: Run OpenShift container tests (in PyTest)
if: ${{ steps.have-tests.outputs.tests == 'true' }}
run: |
set -Eeuxo pipefail
uv run pytest --capture=fd tests/containers -m 'openshift and not cuda and not rocm' --image="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
env:
# TODO(jdanek): this Testcontainers stuff should not be necessary but currently it has to be there
DOCKER_HOST: "unix:///var/run/podman/podman.sock"
TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: "/var/run/podman/podman.sock"
# pulling the Ryuk container from docker.io introduces CI flakiness
TESTCONTAINERS_RYUK_DISABLED: "true"
# region Trivy vulnerability scan
- name: "pull_request|schedule: resolve target if Trivy scan should run"
id: resolve-target
if: ${{ fromJson(inputs.github).event_name == 'pull_request' || fromJson(inputs.github).event_name == 'schedule' }}
env:
EVENT_NAME: ${{ fromJson(inputs.github).event_name }}
HAS_TRIVY_LABEL: ${{ contains(fromJson(inputs.github).event.pull_request.labels.*.name, 'trivy-scan') }}
FS_SCAN_FOLDER: ${{ fromJson(env.TRIVY_SCAN_FS_JSON)[inputs.target] }}
run: |
if [[ "$EVENT_NAME" == "pull_request" && "$HAS_TRIVY_LABEL" == "true" ]]; then
if [[ -n "$FS_SCAN_FOLDER" ]]; then
TARGET="$FS_SCAN_FOLDER"
TYPE="fs"
else
TARGET="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
TYPE="image"
fi
elif [[ "$EVENT_NAME" == "schedule" ]]; then
if [[ -n "$FS_SCAN_FOLDER" ]]; then
TARGET="$FS_SCAN_FOLDER"
TYPE="fs"
else
TARGET="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
TYPE="image"
fi
fi
if [[ -n "$TARGET" ]]; then
echo "target=$TARGET" >> $GITHUB_OUTPUT
echo "type=$TYPE" >> $GITHUB_OUTPUT
echo "Trivy scan will run on $TARGET ($TYPE)"
else
echo "Trivy scan won't run"
fi
- name: Run Trivy vulnerability scanner
if: ${{ steps.resolve-target.outputs.target }}
run: |
REPORT_FOLDER=${{ github.workspace }}/report
REPORT_FILE=trivy-report.md
REPORT_TEMPLATE=trivy-markdown.tpl
mkdir -p $REPORT_FOLDER
cp ci/$REPORT_TEMPLATE $REPORT_FOLDER
SCAN_TARGET=${{ steps.resolve-target.outputs.target }}
SCAN_TYPE=${{ steps.resolve-target.outputs.type }}
echo "Scanning $SCAN_TARGET ($SCAN_TYPE)"
if [[ "$SCAN_TYPE" == "image" ]]; then
SCAN_ARGS="--image-src podman --podman-host /var/run/podman/podman.sock"
PODMAN_ARGS="-v ${PODMAN_SOCK}:/var/run/podman/podman.sock"
elif [[ "$SCAN_TYPE" == "fs" ]]; then
WORKSPACE_FOLDER="/workspace"
SCAN_TARGET="$WORKSPACE_FOLDER/$SCAN_TARGET"
PODMAN_ARGS="-v ${{ github.workspace }}:$WORKSPACE_FOLDER"
fi
# have trivy access podman socket,
# https://github.com/aquasecurity/trivy/issues/580#issuecomment-666423279
podman run --rm \
$PODMAN_ARGS \
-v ${REPORT_FOLDER}:/report \
docker.io/aquasec/trivy:$TRIVY_VERSION \
$SCAN_TYPE \
$SCAN_ARGS \
--scanners vuln --ignore-unfixed \
--exit-code 0 --timeout 30m \
--format template --template "@/report/$REPORT_TEMPLATE" -o /report/$REPORT_FILE \
$SCAN_TARGET
cat $REPORT_FOLDER/$REPORT_FILE >> $GITHUB_STEP_SUMMARY
# endregion
# region check-payload for FIPS compliance
- id: check-payload-vars
run: |
echo "GOPATH=${{ github.workspace }}/go-check-payload" >> "$GITHUB_OUTPUT"
working-directory: scripts/check-payload
# for https://github.com/openshift/check-payload to cache the built binary
- uses: actions/setup-go@v5
with:
cache-dependency-path: "scripts/check-payload/go.sum"
env:
GOPATH: ${{ steps.check-payload-vars.outputs.GOPATH }}
# F0512 15:43:03.219076 21568 main.go:294] Error: exec: "oc": executable file not found in $PATH
- name: Install oc client
run: |
# Install the oc client
curl -L https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/clients/ocp/stable/openshift-client-linux.tar.gz -o /tmp/openshift-client-linux.tar.gz
tar -xzvf /tmp/openshift-client-linux.tar.gz oc
rm -f /tmp/openshift-client-linux.tar.gz
sudo mv ./oc /usr/local/bin
# perform `podman image mount` ourselves, and then follow the scenario from
# https://github.com/openshift/check-payload/pull/154, that is because
# `check-payload scan image --spec` insists on pulling the image, even if already present,
# that causes trouble when checking PRs (image not pushed) and requires `podman login` as root
# (we run podman as root in the GHA to reuse container storage in Kubernetes)
# use sudo to avoid
# podman error (args=[image mount ghcr.io/...])
# (stderr=Error: cannot use command "podman image mount" with the remote podman client
# and use --preserve-env=PATH to avoid
# F0512 16:31:58.425584 9911 main.go:294] Error: exec: "podman": executable file not found in $PATH
- name: Check image with check-payload for FIPS compliance
run: |
set -Eeuxo pipefail
# resolve podman under current user, not under sudo/root
PODMAN="$(which podman)"
# mount the image
IMAGE_MOUNT_DIR=$(sudo "${PODMAN}" image mount "${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}")
# run the check-payload scan
sudo --preserve-env=PATH go tool github.com/openshift/check-payload scan local --path "${IMAGE_MOUNT_DIR}"
# unmount the image
sudo "${PODMAN}" image unmount --all
working-directory: scripts/check-payload
env:
GOPATH: ${{ steps.check-payload-vars.outputs.GOPATH }}
# endregion
# region Typescript (browser) image tests
# https://playwright.dev/docs/ci
# https://playwright.dev/docs/docker
# we leave little free disk space after we mount LVM for podman storage
# not enough to install playwright; running playwright in podman uses the space we have
- name: Run Playwright tests
if: ${{ contains(inputs.target, 'codeserver') }}
# --ipc=host because Microsoft says so in Playwright docs
# --net=host because testcontainers connects to the Reaper container's exposed port
# we need to pass through the relevant environment variables
# DEBUG configures Node.js debuggers, sets different verbosity as needed
# CI=true is set on every CI nowadays
# PODMAN_SOCK should be mounted to /var/run/docker.sock, other likely mounting locations may not exist (mkdir -p)
# TEST_TARGET is the workbench image the test will run
# --volume(s) let us access docker socket and not clobber host's node_modules
run: |
podman run \
--interactive --rm \
--ipc=host \
--net=host \
--env "CI=true" \
--env "NPM_CONFIG_fund=false" \
--env "DEBUG=testcontainers:*" \
--env "PODMAN_SOCK=/var/run/docker.sock" \
--env "TEST_TARGET" \
--volume ${PODMAN_SOCK}:/var/run/docker.sock \
--volume ${PWD}:/mnt \
--volume /mnt/node_modules \
mcr.microsoft.com/playwright:v1.53.1-noble \
/bin/bash <<EOF
set -Eeuxo pipefail
cd /mnt
npm install -g pnpm && pnpm install
pnpm exec playwright test
exit 0
EOF
working-directory: tests/browser
env:
TEST_TARGET: "${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}"
- uses: actions/upload-artifact@v4
if: ${{ !cancelled() && fromJson(inputs.github).event_name == 'pull_request' && contains(inputs.target, 'codeserver') }}
with:
name: "${{ inputs.target }}_${{ steps.calculated_vars.outputs.SANITIZED_PLATFORM }}_playwright-report"
path: tests/browser/playwright-report/
retention-days: 30
# endregion
- run: df -h
if: "${{ !cancelled() }}"
- run: sudo compsize -x "${HOME}/.local/share/containers"
if: "${{ !cancelled() && steps.install-compsize.outcome == 'success' }}"