Switch to AIPCC Base Images and Add Perl Utility for RHEL #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # inspired by | ||
|
Check failure on line 1 in .github/workflows/build-notebooks-TEMPLATE.yaml
|
||
| # https://github.com/thesuperzapper/kubeflow/blob/master/.github/workflows/example_notebook_servers_publish_TEMPLATE.yaml | ||
| --- | ||
| name: Build & Publish Notebook Servers (TEMPLATE) | ||
| "on": | ||
| workflow_call: | ||
| inputs: | ||
| # https://docs.github.com/en/actions/learn-github-actions/variables#default-environment-variables | ||
| # https://docs.github.com/en/actions/learn-github-actions/contexts | ||
| target: | ||
| required: true | ||
| description: "make target to build" | ||
| type: string | ||
| python: | ||
| required: true | ||
| description: "python version" | ||
| type: string | ||
| github: | ||
| required: true | ||
| description: "top workflow's `github`" | ||
| type: string | ||
| platform: | ||
| required: true | ||
| description: "platform to build, podman build --platform=" | ||
| type: string | ||
| subscription: | ||
| required: false | ||
| default: false | ||
| description: "add RHEL subscription from github secret" | ||
| type: boolean | ||
| jobs: | ||
| build: | ||
| # https://docs.github.com/en/actions/how-tos/using-github-hosted-runners/using-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories | ||
| runs-on: ${{ inputs.platform == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} | ||
| env: | ||
| # Some pieces of code (image pulls, for example) in podman consult TMPDIR or default to /var/tmp | ||
| TMPDIR: /home/runner/.local/share/containers/tmpdir | ||
| # Use the rootful instance of podman for sharing images with cri-o | ||
| # https://podman-desktop.io/blog/sharing-podman-images-with-kubernetes-cluster#introduction | ||
| # https://access.redhat.com/solutions/6986565 | ||
| CONTAINER_HOST: unix:///var/run/podman/podman.sock | ||
| # We don't push here when building PRs, so we can use the same IMAGE_REGISTRY in all branches of the workflow | ||
| IMAGE_REGISTRY: "ghcr.io/${{ github.repository }}/workbench-images" | ||
| # GitHub image registry used for storing $(CONTAINER_ENGINE)'s cache | ||
| CACHE: "ghcr.io/${{ github.repository }}/workbench-images/build-cache" | ||
| TRIVY_VERSION: 0.64.1 | ||
| # Targets (and their folder) that should be scanned using FS instead of IMAGE scan due to resource constraints | ||
| TRIVY_SCAN_FS_JSON: '{}' | ||
| # Makefile variables | ||
| BUILD_ARCH: ${{ inputs.platform }} | ||
| RELEASE_PYTHON_VERSION: ${{ inputs.python }} | ||
| steps: | ||
| # image repository name must be lowercase | ||
| - name: downcase IMAGE_REGISTRY and CACHE | ||
| run: | | ||
| echo "IMAGE_REGISTRY=${IMAGE_REGISTRY,,}" >>${GITHUB_ENV} | ||
| echo "CACHE=${CACHE,,}" >>${GITHUB_ENV} | ||
| - uses: actions/checkout@v5 | ||
| if: ${{ fromJson(inputs.github).event_name != 'pull_request_target' }} | ||
| # we need to checkout the pr branch, not pr target (the default for pull_request_target) | ||
| # user access check is done in calling workflow | ||
| - uses: actions/checkout@v5 | ||
| if: ${{ fromJson(inputs.github).event_name == 'pull_request_target' }} | ||
| with: | ||
| ref: "refs/pull/${{ fromJson(inputs.github).event.number }}/merge" | ||
| # https://github.com/docker/setup-qemu-action?tab=readme-ov-file#about | ||
| # https://www.itix.fr/blog/qemu-user-static-with-podman/ | ||
| - name: Set up QEMU for non-native architecture | ||
| if: ${{ contains(fromJSON('["linux/s390x", "linux/ppc64le"]'), inputs.platform) }} | ||
| run: docker run --rm --privileged tonistiigi/binfmt --install ${platform#*/} | ||
| env: | ||
| platform: ${{ inputs.platform }} | ||
| - run: mkdir -p $TMPDIR | ||
| # do this early because it's fast and why not | ||
| - name: Unlock encrypted secrets with git-crypt | ||
| if: ${{ inputs.subscription }} | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install git-crypt | ||
| echo "${GIT_CRYPT_KEY}" | base64 --decode > ./git-crypt-key | ||
| git-crypt unlock ./git-crypt-key | ||
| rm ./git-crypt-key | ||
| env: | ||
| GIT_CRYPT_KEY: ${{ secrets.GIT_CRYPT_KEY }} | ||
| # https://console.redhat.com/insights/connector/activation-keys | ||
| # This runs slower than storing the entitlement certificates with git-crypt, | ||
| # but on the other hand, it's then not necessary to regularly update them in the repo. | ||
| - name: Add subscriptions from GitHub secret | ||
| if: ${{ inputs.subscription }} | ||
| run: | | ||
| # https://access.redhat.com/solutions/5870841 | ||
| # https://github.com/containers/common/issues/1735 | ||
| mkdir entitlement | ||
| mkdir consumer | ||
| docker run \ | ||
| -v ${PWD}/entitlement:/etc/pki/entitlement:Z \ | ||
| -v ${PWD}/consumer:/etc/pki/consumer:Z \ | ||
| --rm -t registry.access.redhat.com/ubi9/ubi \ | ||
| /usr/sbin/subscription-manager register --org=${SUBSCRIPTION_ORG} --activationkey=${SUBSCRIPTION_ACTIVATION_KEY} | ||
| printf "${PWD}/entitlement:/etc/pki/entitlement\n${PWD}/consumer:/etc/pki/consumer\n" | sudo tee /usr/share/containers/mounts.conf | ||
| mkdir -p $HOME/.config/containers/ | ||
| sudo cp ${PWD}/ci/secrets/pull-secret.json $HOME/.config/containers/auth.json | ||
| env: | ||
| SUBSCRIPTION_ORG: ${{ secrets.SUBSCRIPTION_ORG }} | ||
| SUBSCRIPTION_ACTIVATION_KEY: ${{ secrets.SUBSCRIPTION_ACTIVATION_KEY }} | ||
| # for bin/buildinputs in scripts/sandbox.py | ||
| - uses: actions/setup-go@v5 | ||
| with: | ||
| cache-dependency-path: "scripts/buildinputs/go.sum" | ||
| - run: sudo apt-get update | ||
| - name: Login to GitHub Container Registry | ||
| uses: docker/login-action@v3 | ||
| with: | ||
| registry: ghcr.io | ||
| username: ${{ github.actor }} | ||
| password: ${{ secrets.GITHUB_TOKEN }} | ||
| # region Free up disk space | ||
| - name: Free up additional disk space | ||
| # https://docs.github.com/en/actions/learn-github-actions/expressions | ||
| if: "${{ contains(inputs.target, 'rocm') || contains(inputs.target, 'cuda') || | ||
| contains(inputs.target, 'pytorch') || contains(inputs.target, 'tensorflow') }}" | ||
| run: | | ||
| set -x | ||
| df -h | ||
| sudo apt-get update | ||
| sudo apt-get remove -y '^dotnet-.*' | ||
| sudo apt-get remove -y '^llvm-.*' | ||
| sudo apt-get remove -y 'php.*' | ||
| sudo apt-get remove -y '^mongodb-.*' | ||
| sudo apt-get autoremove -y | ||
| sudo apt-get clean | ||
| sudo rm -rf /usr/local/.ghcup & | ||
| sudo rm -rf /usr/local/lib/android & | ||
| sudo rm -rf /usr/local/share/boost & | ||
| sudo rm -rf /usr/local/lib/node_modules & | ||
| sudo rm -rf /usr/share/dotnet & | ||
| sudo rm -rf /opt/ghc & | ||
| sudo rm -rf /opt/hostedtoolcache/CodeQL & | ||
| sudo docker image prune --all --force & | ||
| wait | ||
| df -h | ||
| - id: install-compsize | ||
| run: sudo apt-get install -y btrfs-compsize | ||
| - name: Mount lvm overlay for podman builds | ||
| run: | | ||
| df -h | ||
| free -h | ||
| bash ./ci/cached-builds/gha_lvm_overlay.sh | ||
| df -h | ||
| free -h | ||
| # endregion | ||
| # region Podman setup | ||
| # https://github.com/containers/buildah/issues/2521#issuecomment-884779112 | ||
| - name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598 | ||
| run: sudo apt-get -qq remove podman crun | ||
| - uses: actions/cache@v4 | ||
| # https://docs.github.com/en/actions/reference/variables-reference#default-environment-variables | ||
| # https://docs.github.com/en/actions/how-tos/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables | ||
| id: cached-linuxbrew | ||
| with: | ||
| path: /home/linuxbrew/.linuxbrew | ||
| key: linuxbrew-${{ runner.os }}-${{ runner.arch }} | ||
| - name: Install podman (linux/amd64, or qemu-user emulation) | ||
| if: contains(fromJSON('["linux/amd64", "linux/s390x", "linux/ppc64le"]'), inputs.platform) && steps.cached-linuxbrew.outputs.cache-hit != 'true' | ||
| run: | | ||
| /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" | ||
| /home/linuxbrew/.linuxbrew/bin/brew install podman | ||
| # Warning: Your CPU architecture (arm64) is not supported. We only support | ||
| # x86_64 CPU architectures. You will be unable to use binary packages (bottles). | ||
| # | ||
| # This is a Tier 2 configuration: | ||
| # https://docs.brew.sh/Support-Tiers#tier-2 | ||
| # Do not report any issues to Homebrew/* repositories! | ||
| # Read the above document instead before opening any issues or PRs. | ||
| - name: Install podman (linux/arm64) | ||
| if: inputs.platform == 'linux/arm64' && steps.cached-linuxbrew.outputs.cache-hit != 'true' | ||
| # Error: podman: no bottle available! | ||
| # If you're feeling brave, you can try to install from source with: | ||
| run: | | ||
| /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" | ||
| /home/linuxbrew/.linuxbrew/bin/brew install --build-from-source podman | ||
| - name: Add linuxbrew to PATH | ||
| run: echo "/home/linuxbrew/.linuxbrew/bin/" >> $GITHUB_PATH | ||
| - name: Configure Podman | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| # podman running as service ignores the TMPDIR env var here, let's give it a bind-mount to /var/tmp | ||
| mkdir -p $TMPDIR | ||
| sudo mount --bind -o rw,noexec,nosuid,nodev,bind $TMPDIR /var/tmp | ||
| # podman from brew has its own /etc (was giving me Failed to obtain podman configuration: runroot must be set) | ||
| # the (default) config location is also where cri-o gets its storage defaults (that can be overriden in crio.conf) | ||
| sudo cp ci/cached-builds/containers.conf /etc/containers.conf | ||
| sudo cp ci/cached-builds/containers.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers.conf | ||
| sudo cp ci/cached-builds/storage.conf /etc/containers/storage.conf | ||
| sudo cp ci/cached-builds/storage.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers/storage.conf | ||
| sudo cp ci/cached-builds/registries.conf /etc/containers/registries.conf | ||
| sudo cp ci/cached-builds/registries.conf /home/linuxbrew/.linuxbrew/opt/podman/etc/containers/registries.conf | ||
| # should reset storage when changing storage.conf | ||
| mkdir -p $HOME/.local/share/containers/storage/tmp | ||
| # remote (CONTAINER_HOST) podman does not do reset (and refuses --force option) | ||
| sudo /home/linuxbrew/.linuxbrew/opt/podman/bin/podman system reset --force | ||
| # https://github.com/containers/podman/pull/25504 | ||
| # podman 5.5.0: The podman system reset command no longer removes the user's podman.sock API socket | ||
| sudo rm -rf /var/run/podman | ||
| # https://github.com/containers/podman/blob/main/docs/tutorials/socket_activation.md | ||
| # since `brew services start podman` is buggy, let's do our own brew-compatible service | ||
| # Regarding directory paths, see https://unix.stackexchange.com/questions/224992/where-do-i-put-my-systemd-unit-file | ||
| sudo mkdir -p /usr/local/lib/systemd/system/ | ||
| sudo cp ci/cached-builds/podman.service /usr/local/lib/systemd/system/podman.service | ||
| sudo cp ci/cached-builds/podman.socket /usr/local/lib/systemd/system/podman.socket | ||
| sudo systemctl daemon-reload | ||
| sudo systemctl unmask --now podman.service podman.socket | ||
| sudo systemctl start podman.socket | ||
| # needed (much) later for trivy | ||
| echo "PODMAN_SOCK=/var/run/podman/podman.sock" >> $GITHUB_ENV | ||
| # quick check podman works | ||
| podman ps | ||
| - name: Show error logs (on failure) | ||
| if: ${{ failure() }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| journalctl -xe | ||
| ls -AlF /var/run/podman/podman.sock || echo "Socket /var/run/podman/podman.sock not found" | ||
| sudo ss -xlpn | grep 'podman.sock' || echo "No active listener found for podman.sock via ss" | ||
| - name: Calculate image name and tag | ||
| id: calculated_vars | ||
| run: | | ||
| # Need for sanitization explained in https://github.com/opendatahub-io/notebooks/issues/631 | ||
| # For length, Docker image tags have 128-character limit, and we form them as <inputs.target>-<ref_name>_<sha> | ||
| # therefore since sha is 40 characters, and our target names are <40 chars, we should cut ref_name at 40 | ||
| SANITIZED_REF_NAME=$(echo "${{ github.ref_name }}" | sed 's/[^a-zA-Z0-9._-]/_/g') | cut -c 1-40 | ||
| IMAGE_TAG="${SANITIZED_REF_NAME}_${{ github.sha }}" | ||
| echo "IMAGE_TAG=${IMAGE_TAG}" >> "$GITHUB_OUTPUT" | ||
| echo "OUTPUT_IMAGE=${{ env.IMAGE_REGISTRY}}:${{ inputs.target }}-${IMAGE_TAG}" >> "$GITHUB_OUTPUT" | ||
| echo "SANITIZED_PLATFORM=$(echo "${{ inputs.platform }}" | sed 's/[^a-zA-Z0-9._-]/_/g')" >> "$GITHUB_OUTPUT" | ||
| # endregion | ||
| # region Image build | ||
| - name: Login to quay.io/aipcc (if the secret is present) | ||
| if: ${{ secrets.AIPCC_QUAY_BOT_USERNAME != '' }} | ||
| shell: bash | ||
| run: | | ||
| echo "${{ secrets.AIPCC_QUAY_BOT_PASSWORD }}" | podman login quay.io/aipcc -u "${{ secrets.AIPCC_QUAY_BOT_USERNAME }}" --password-stdin | ||
| - name: Compute extra podman build args | ||
| id: extra-podman-build-args | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| EXTRA_PODMAN_BUILD_ARGS="" | ||
| if [[ "${{ inputs.platform }}" == "linux/s390x" ]]; then | ||
| # workaround for known issue https://github.com/zeromq/libzmq/pull/4486 | ||
| # In qemu-user, CACHELINE_SIZE probe is undefined | ||
| EXTRA_PODMAN_BUILD_ARGS+='--env=CXXFLAGS=-Dundefined=64' | ||
| fi | ||
| echo "EXTRA_PODMAN_BUILD_ARGS=$EXTRA_PODMAN_BUILD_ARGS" >> $GITHUB_OUTPUT | ||
| # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push | ||
| # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request | ||
| - name: "push|schedule|workflow_dispatch: make ${{ inputs.target }}" | ||
| run: | | ||
| # print running stats on disk occupancy | ||
| (while true; do df -h | grep "${HOME}/.local/share/containers"; sleep 30; done) & | ||
| make ${{ inputs.target }} | ||
| if: ${{ fromJson(inputs.github).event_name == 'push' || | ||
| fromJson(inputs.github).event_name == 'schedule' || | ||
| fromJson(inputs.github).event_name == 'workflow_dispatch' }} | ||
| env: | ||
| IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}" | ||
| CONTAINER_BUILD_CACHE_ARGS: "${{ steps.extra-podman-build-args.outputs.EXTRA_PODMAN_BUILD_ARGS }} --cache-from ${{ env.CACHE }} --cache-to ${{ env.CACHE }}" | ||
| - name: "pull_request: make ${{ inputs.target }}" | ||
| run: | | ||
| # print running stats on disk occupancy | ||
| (while true; do df -h | grep "${HOME}/.local/share/containers"; sleep 30; done) & | ||
| make ${{ inputs.target }} | ||
| if: "${{ fromJson(inputs.github).event_name == 'pull_request' || | ||
| fromJson(inputs.github).event_name == 'pull_request_target' }}" | ||
| env: | ||
| IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}" | ||
| CONTAINER_BUILD_CACHE_ARGS: "${{ steps.extra-podman-build-args.outputs.EXTRA_PODMAN_BUILD_ARGS }} --cache-from ${{ env.CACHE }}" | ||
| # We don't have access to image registry, so disable pushing | ||
| PUSH_IMAGES: "no" | ||
| - name: "Show podman images information" | ||
| run: podman images --digests | ||
| # endregion | ||
| # region Pytest image tests | ||
| # https://github.com/astral-sh/setup-uv | ||
| - name: Install the latest version of uv | ||
| uses: astral-sh/setup-uv@v5 | ||
| with: | ||
| version: "latest" | ||
| python-version: "3.12" | ||
| enable-cache: true | ||
| cache-dependency-glob: "uv.lock" | ||
| pyproject-file: "pyproject.toml" | ||
| - name: Check uv is installed correctly | ||
| run: uv version | ||
| - name: Install deps | ||
| run: uv sync --locked | ||
| - name: Run Testcontainers container tests (in PyTest) | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| uv run pytest --capture=fd tests/containers -m 'not openshift and not cuda and not rocm' --image="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}" | ||
| env: | ||
| DOCKER_HOST: "unix:///var/run/podman/podman.sock" | ||
| TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: "/var/run/podman/podman.sock" | ||
| # pulling the Ryuk container from docker.io introduces CI flakiness | ||
| TESTCONTAINERS_RYUK_DISABLED: "true" | ||
| # endregion Pytest image tests | ||
| # region Makefile image tests | ||
| - name: "Check if we have tests or not" | ||
| id: have-tests | ||
| run: "ci/cached-builds/has_tests.py --target ${{ inputs.target }}" | ||
| - name: "Change pull policy to IfNotPresent" | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| find . \( -name "statefulset.yaml" -o -name "pod.yaml" \) -type f -exec \ | ||
| sed -i'' 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' {} \; | ||
| git diff | ||
| # [INFO] Running command (('make deploy9-runtimes-rocm-tensorflow-ubi9-python-3.11',), {'shell': True}) | ||
| # Deploying notebook from runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base directory... | ||
| # sed: can't read runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml: No such file or directory | ||
| - name: "Fixup paths that prevent us from running rocm tests" | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| mkdir -p runtimes/rocm | ||
| ln -s ../rocm-tensorflow runtimes/rocm/tensorflow | ||
| ln -s ../rocm-pytorch runtimes/rocm/pytorch | ||
| # https://cri-o.io/ | ||
| - name: Install cri-o | ||
| id: install-crio | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| sudo apt-get update | ||
| sudo apt-get install -y software-properties-common curl | ||
| # https://github.com/cri-o/packaging?tab=readme-ov-file#distributions-using-deb-packages | ||
| curl -fsSL https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/Release.key | \ | ||
| sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg | ||
| echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${KUBERNETES_VERSION}/deb/ /" | \ | ||
| sudo tee /etc/apt/sources.list.d/kubernetes.list | ||
| curl -fsSL https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/Release.key | \ | ||
| sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/cri-o-apt-keyring.gpg | ||
| echo "deb [signed-by=/etc/apt/keyrings/cri-o-apt-keyring.gpg] https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v${CRIO_VERSION}/deb/ /" | \ | ||
| sudo tee /etc/apt/sources.list.d/cri-o.list | ||
| sudo apt-get update | ||
| # [ERROR FileExisting-conntrack]: conntrack not found in system path | ||
| # see man apt-patterns for the ~name=version* syntax | ||
| # The following packages will be DOWNGRADED: | ||
| # kubectl | ||
| # E: Packages were downgraded and -y was used without --allow-downgrades. | ||
| sudo apt-get install -y --allow-downgrades \ | ||
| "cri-o=${CRIO_VERSION}.*" \ | ||
| "kubelet=${KUBERNETES_VERSION}.*" "kubeadm=${KUBERNETES_VERSION}.*" "kubectl=${KUBERNETES_VERSION}.*" \ | ||
| conntrack | ||
| # make use of /etc/cni/net.d/11-crio-ipv4-bridge.conflist so we don't | ||
| # need a pod network and just use the default bridge | ||
| sudo rm -rf /etc/cni/net.d/* | ||
| # cat /etc/cni/net.d/11-crio-ipv4-bridge.conflist | ||
| # https://github.com/containerd/containerd/blob/main/script%2Fsetup%2Finstall-cni | ||
| # https://www.cni.dev/plugins/current/main/bridge/ | ||
| sudo cp ci/cached-builds/11-crio-ipv4-bridge.conflist /etc/cni/net.d/11-crio-ipv4-bridge.conflist | ||
| sudo cp ci/cached-builds/crio.conf /etc/crio/crio.conf.d/ | ||
| sudo systemctl start crio.service | ||
| env: | ||
| # TODO(jdanek): install also "cri-tools=${CRIO_VERSION}.*" when updating to 1.33 | ||
| CRIO_VERSION: 1.32 | ||
| # This has to be kept in sync with the packages above, otherwise | ||
| # [ERROR KubeletVersion]: the kubelet version is higher than the control plane version. | ||
| # This is not a supported version skew and may lead to a malfunctional cluster. | ||
| # Kubelet version: "1.33.0" Control plane version: "1.30.12" | ||
| KUBERNETES_VERSION: 1.33 | ||
| - name: Show crio debug data (on failure) | ||
| if: ${{ failure() && steps.have-tests.outputs.tests == 'true' }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| sudo systemctl status crio.service || true | ||
| sudo journalctl -xeu crio.service | ||
| # do this early, it's a good check that cri-o is not completely broken | ||
| - name: "Show crio images information" | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' }} | ||
| run: sudo crictl images | ||
| - name: Install Kubernetes cluster | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| sudo swapoff -a | ||
| sudo modprobe br_netfilter | ||
| sudo sysctl -w net.ipv4.ip_forward=1 | ||
| # Was getting strange DNS resolution errors from pods that don't seem to want to go away sometimes: | ||
| # Resolving raw.githubusercontent.com (raw.githubusercontent.com)... failed: Name or service not known. | ||
| # wget: unable to resolve host address ‘raw.githubusercontent.com’ | ||
| # Here's what helped: | ||
| # https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/#known-issues | ||
| # https://github.com/kubernetes/kubernetes/blob/e4c1f980b76fecece30c2f77885a7117192170a6/CHANGELOG/CHANGELOG-1.30.md?plain=1#L1454 | ||
| # https://github.com/canonical/microk8s/issues/68#issuecomment-404923563 | ||
| sudo ufw allow in on cni0 | ||
| sudo ufw allow out on cni0 | ||
| sudo ufw default allow routed | ||
| sudo iptables -P FORWARD ACCEPT | ||
| sudo iptables -t nat -A POSTROUTING -s 10.85.0.0/16 -o eth0 -j MASQUERADE | ||
| # https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm | ||
| sudo kubeadm init --config=ci/cached-builds/kubeadm.yaml | ||
| mkdir -p $HOME/.kube | ||
| sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config | ||
| sudo chown $(id -u):$(id -g) $HOME/.kube/config | ||
| - name: Show kubelet debug data (on failure) | ||
| if: ${{ failure() && steps.have-tests.outputs.tests == 'true' && steps.install-crio.outcome == 'success' }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| sudo systemctl status kubelet || true | ||
| sudo journalctl -xeu kubelet | ||
| # Here is one example how you may list all running Kubernetes containers by using crictl: | ||
| sudo crictl --runtime-endpoint unix:///var/run/crio/crio.sock ps -a | grep kube | grep -v pause | ||
| # Once you have found the failing container, you can inspect its logs with: | ||
| # crictl --runtime-endpoint unix:///var/run/crio/crio.sock logs CONTAINERID | ||
| - name: Show nodes status and wait for readiness | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' }} | ||
| run: | | ||
| kubectl describe nodes | ||
| kubectl wait --for=condition=Ready nodes --all --timeout=100s || (kubectl describe nodes && false) | ||
| - name: Wait for pods to be running | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| kubectl wait deployments --all --all-namespaces --for=condition=Available --timeout=100s | ||
| kubectl wait pods --all --all-namespaces --for=condition=Ready --timeout=100s | ||
| - name: "Install local-path provisioner" | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.31/deploy/local-path-storage.yaml | ||
| kubectl wait deployments --all --namespace=local-path-storage --for=condition=Available --timeout=100s | ||
| # https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/ | ||
| kubectl get storageclass | ||
| kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' | ||
| - name: "Run image tests" | ||
| # skip on s390x because we are unable to install requirements-elyra.txt that's installed by runtime image tests | ||
| # https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/etc/generic/requirements-elyra.txt | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' && !contains(fromJSON('["linux/s390x"]'), inputs.platform) }} | ||
| run: python3 ci/cached-builds/make_test.py --target ${{ inputs.target }} | ||
| env: | ||
| IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}" | ||
| # for make deploy, mandatory to specify for the more exotic cases | ||
| NOTEBOOK_TAG: "${{ inputs.target }}-${{ steps.calculated_vars.outputs.IMAGE_TAG }}" | ||
| # endregion | ||
| - name: Run OpenShift container tests (in PyTest) | ||
| if: ${{ steps.have-tests.outputs.tests == 'true' }} | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| uv run pytest --capture=fd tests/containers -m 'openshift and not cuda and not rocm' --image="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}" | ||
| env: | ||
| # TODO(jdanek): this Testcontainers stuff should not be necessary but currently it has to be there | ||
| DOCKER_HOST: "unix:///var/run/podman/podman.sock" | ||
| TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: "/var/run/podman/podman.sock" | ||
| # pulling the Ryuk container from docker.io introduces CI flakiness | ||
| TESTCONTAINERS_RYUK_DISABLED: "true" | ||
| # region Trivy vulnerability scan | ||
| - name: "pull_request|schedule: resolve target if Trivy scan should run" | ||
| id: resolve-target | ||
| if: ${{ fromJson(inputs.github).event_name == 'pull_request' || fromJson(inputs.github).event_name == 'schedule' }} | ||
| env: | ||
| EVENT_NAME: ${{ fromJson(inputs.github).event_name }} | ||
| HAS_TRIVY_LABEL: ${{ contains(fromJson(inputs.github).event.pull_request.labels.*.name, 'trivy-scan') }} | ||
| FS_SCAN_FOLDER: ${{ fromJson(env.TRIVY_SCAN_FS_JSON)[inputs.target] }} | ||
| run: | | ||
| if [[ "$EVENT_NAME" == "pull_request" && "$HAS_TRIVY_LABEL" == "true" ]]; then | ||
| if [[ -n "$FS_SCAN_FOLDER" ]]; then | ||
| TARGET="$FS_SCAN_FOLDER" | ||
| TYPE="fs" | ||
| else | ||
| TARGET="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}" | ||
| TYPE="image" | ||
| fi | ||
| elif [[ "$EVENT_NAME" == "schedule" ]]; then | ||
| if [[ -n "$FS_SCAN_FOLDER" ]]; then | ||
| TARGET="$FS_SCAN_FOLDER" | ||
| TYPE="fs" | ||
| else | ||
| TARGET="${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}" | ||
| TYPE="image" | ||
| fi | ||
| fi | ||
| if [[ -n "$TARGET" ]]; then | ||
| echo "target=$TARGET" >> $GITHUB_OUTPUT | ||
| echo "type=$TYPE" >> $GITHUB_OUTPUT | ||
| echo "Trivy scan will run on $TARGET ($TYPE)" | ||
| else | ||
| echo "Trivy scan won't run" | ||
| fi | ||
| - name: Run Trivy vulnerability scanner | ||
| if: ${{ steps.resolve-target.outputs.target }} | ||
| run: | | ||
| REPORT_FOLDER=${{ github.workspace }}/report | ||
| REPORT_FILE=trivy-report.md | ||
| REPORT_TEMPLATE=trivy-markdown.tpl | ||
| mkdir -p $REPORT_FOLDER | ||
| cp ci/$REPORT_TEMPLATE $REPORT_FOLDER | ||
| SCAN_TARGET=${{ steps.resolve-target.outputs.target }} | ||
| SCAN_TYPE=${{ steps.resolve-target.outputs.type }} | ||
| echo "Scanning $SCAN_TARGET ($SCAN_TYPE)" | ||
| if [[ "$SCAN_TYPE" == "image" ]]; then | ||
| SCAN_ARGS="--image-src podman --podman-host /var/run/podman/podman.sock" | ||
| PODMAN_ARGS="-v ${PODMAN_SOCK}:/var/run/podman/podman.sock" | ||
| elif [[ "$SCAN_TYPE" == "fs" ]]; then | ||
| WORKSPACE_FOLDER="/workspace" | ||
| SCAN_TARGET="$WORKSPACE_FOLDER/$SCAN_TARGET" | ||
| PODMAN_ARGS="-v ${{ github.workspace }}:$WORKSPACE_FOLDER" | ||
| fi | ||
| # have trivy access podman socket, | ||
| # https://github.com/aquasecurity/trivy/issues/580#issuecomment-666423279 | ||
| podman run --rm \ | ||
| $PODMAN_ARGS \ | ||
| -v ${REPORT_FOLDER}:/report \ | ||
| docker.io/aquasec/trivy:$TRIVY_VERSION \ | ||
| $SCAN_TYPE \ | ||
| $SCAN_ARGS \ | ||
| --scanners vuln --ignore-unfixed \ | ||
| --exit-code 0 --timeout 30m \ | ||
| --format template --template "@/report/$REPORT_TEMPLATE" -o /report/$REPORT_FILE \ | ||
| $SCAN_TARGET | ||
| cat $REPORT_FOLDER/$REPORT_FILE >> $GITHUB_STEP_SUMMARY | ||
| # endregion | ||
| # region check-payload for FIPS compliance | ||
| - id: check-payload-vars | ||
| run: | | ||
| echo "GOPATH=${{ github.workspace }}/go-check-payload" >> "$GITHUB_OUTPUT" | ||
| working-directory: scripts/check-payload | ||
| # for https://github.com/openshift/check-payload to cache the built binary | ||
| - uses: actions/setup-go@v5 | ||
| with: | ||
| cache-dependency-path: "scripts/check-payload/go.sum" | ||
| env: | ||
| GOPATH: ${{ steps.check-payload-vars.outputs.GOPATH }} | ||
| # F0512 15:43:03.219076 21568 main.go:294] Error: exec: "oc": executable file not found in $PATH | ||
| - name: Install oc client | ||
| run: | | ||
| # Install the oc client | ||
| curl -L https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/clients/ocp/stable/openshift-client-linux.tar.gz -o /tmp/openshift-client-linux.tar.gz | ||
| tar -xzvf /tmp/openshift-client-linux.tar.gz oc | ||
| rm -f /tmp/openshift-client-linux.tar.gz | ||
| sudo mv ./oc /usr/local/bin | ||
| # perform `podman image mount` ourselves, and then follow the scenario from | ||
| # https://github.com/openshift/check-payload/pull/154, that is because | ||
| # `check-payload scan image --spec` insists on pulling the image, even if already present, | ||
| # that causes trouble when checking PRs (image not pushed) and requires `podman login` as root | ||
| # (we run podman as root in the GHA to reuse container storage in Kubernetes) | ||
| # use sudo to avoid | ||
| # podman error (args=[image mount ghcr.io/...]) | ||
| # (stderr=Error: cannot use command "podman image mount" with the remote podman client | ||
| # and use --preserve-env=PATH to avoid | ||
| # F0512 16:31:58.425584 9911 main.go:294] Error: exec: "podman": executable file not found in $PATH | ||
| - name: Check image with check-payload for FIPS compliance | ||
| run: | | ||
| set -Eeuxo pipefail | ||
| # resolve podman under current user, not under sudo/root | ||
| PODMAN="$(which podman)" | ||
| # mount the image | ||
| IMAGE_MOUNT_DIR=$(sudo "${PODMAN}" image mount "${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}") | ||
| # run the check-payload scan | ||
| sudo --preserve-env=PATH go tool github.com/openshift/check-payload scan local --path "${IMAGE_MOUNT_DIR}" | ||
| # unmount the image | ||
| sudo "${PODMAN}" image unmount --all | ||
| working-directory: scripts/check-payload | ||
| env: | ||
| GOPATH: ${{ steps.check-payload-vars.outputs.GOPATH }} | ||
| # endregion | ||
| # region Typescript (browser) image tests | ||
| # https://playwright.dev/docs/ci | ||
| # https://playwright.dev/docs/docker | ||
| # we leave little free disk space after we mount LVM for podman storage | ||
| # not enough to install playwright; running playwright in podman uses the space we have | ||
| - name: Run Playwright tests | ||
| if: ${{ contains(inputs.target, 'codeserver') }} | ||
| # --ipc=host because Microsoft says so in Playwright docs | ||
| # --net=host because testcontainers connects to the Reaper container's exposed port | ||
| # we need to pass through the relevant environment variables | ||
| # DEBUG configures Node.js debuggers, sets different verbosity as needed | ||
| # CI=true is set on every CI nowadays | ||
| # PODMAN_SOCK should be mounted to /var/run/docker.sock, other likely mounting locations may not exist (mkdir -p) | ||
| # TEST_TARGET is the workbench image the test will run | ||
| # --volume(s) let us access docker socket and not clobber host's node_modules | ||
| run: | | ||
| podman run \ | ||
| --interactive --rm \ | ||
| --ipc=host \ | ||
| --net=host \ | ||
| --env "CI=true" \ | ||
| --env "NPM_CONFIG_fund=false" \ | ||
| --env "DEBUG=testcontainers:*" \ | ||
| --env "PODMAN_SOCK=/var/run/docker.sock" \ | ||
| --env "TEST_TARGET" \ | ||
| --volume ${PODMAN_SOCK}:/var/run/docker.sock \ | ||
| --volume ${PWD}:/mnt \ | ||
| --volume /mnt/node_modules \ | ||
| mcr.microsoft.com/playwright:v1.53.1-noble \ | ||
| /bin/bash <<EOF | ||
| set -Eeuxo pipefail | ||
| cd /mnt | ||
| npm install -g pnpm && pnpm install | ||
| pnpm exec playwright test | ||
| exit 0 | ||
| EOF | ||
| working-directory: tests/browser | ||
| env: | ||
| TEST_TARGET: "${{ steps.calculated_vars.outputs.OUTPUT_IMAGE }}" | ||
| - uses: actions/upload-artifact@v4 | ||
| if: ${{ !cancelled() && fromJson(inputs.github).event_name == 'pull_request' && contains(inputs.target, 'codeserver') }} | ||
| with: | ||
| name: "${{ inputs.target }}_${{ steps.calculated_vars.outputs.SANITIZED_PLATFORM }}_playwright-report" | ||
| path: tests/browser/playwright-report/ | ||
| retention-days: 30 | ||
| # endregion | ||
| - run: df -h | ||
| if: "${{ !cancelled() }}" | ||
| - run: sudo compsize -x "${HOME}/.local/share/containers" | ||
| if: "${{ !cancelled() && steps.install-compsize.outcome == 'success' }}" | ||