Skip to content

increase nofile ulimit if using sccache-dist #1

increase nofile ulimit if using sccache-dist

increase nofile ulimit if using sccache-dist #1

Workflow file for this run

name: Build RAPIDS wheels

Check failure on line 1 in .github/workflows/wheels-build.yaml

View workflow run for this annotation

GitHub Actions / .github/workflows/wheels-build.yaml

Invalid workflow file

(Line: 86, Col: 9): Required property is missing: type
on:
workflow_call:
inputs:
branch:
description: |
Git branch the workflow run targets.
This is required even when 'sha' is provided because it is also used for organizing artifacts.
type: string
date:
description: "Date (YYYY-MM-DD) this run is for. Used to organize artifacts produced by nightly builds"
type: string
sha:
description: "Full git commit SHA to check out"
type: string
repo:
description: "Git repo to check out, in '{org}/{repo}' form, e.g. 'rapidsai/cudf'"
type: string
build_type:
description: "One of: [branch, nightly, pull-request]"
required: true
type: string
script:
required: true
type: string
description: "Shell code to be executed in a step. Ideally this should just invoke a script managed in the repo the workflow runs from, like 'ci/build_wheel.sh'."
package-name:
required: true
type: string
description: "Distribution name, without any other qualifiers (e.g. 'pylibcudf', not 'pylibcudf-cu12-cp311-manylinux_2_24_aarch64')"
package-type:
description: "One of: [cpp, python]"
required: true
type: string
pure-wheel:
required: false
type: boolean
default: false
description: "One of [true, false], true if the wheel is not dependent on operating system, Python minor version, or CPU architecture"
append-cuda-suffix:
required: false
type: boolean
default: true
description: "One of [true, false] to indicate if CUDA version should be appended to the wheel name"
# allow a bigger runner instance
node_type:
description: |
Suffix, without leading '-', indicating the type of machine to run jobs on (e.g., 'cpu4' or 'gpu-l4-latest-1').
Runner labels are of the form '{operating_system}-{arch}-{node_type}'.
See https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md for a list
of valid values.
required: false
type: string
default: "cpu16"
# general settings
matrix_filter:
description: |
jq expression which modifies the matrix.
For example, 'map(select(.ARCH == "amd64"))' to achieve "only run amd64 jobs".
type: string
default: "."
upload-artifacts:
type: boolean
default: true
required: false
description: "One of [true, false], true if artifacts should be uploaded to GitHub's artifact store"
extra-repo:
required: false
type: string
default: ''
description: "Extra repository that will be cloned into the project directory."
extra-repo-sha:
required: false
type: string
default: ''
description: "Commit SHA in 'extra-repo' to clone."
extra-repo-deploy-key:
required: false
type: string
default: ''
description: "The _name_ of a secret containing a deploy key for 'extra-repo' (not the key itself)."
sccache-dist-request-timeout:
default: 7140
description: |
The maximum time (in seconds) the sccache client should wait for a distributed compilation to complete.
sccache-dist-token-secret-name:
type: string
required: false
description: |
The name of the secret that contains the token used to authenticate with the RAPIDS Build Engineering sccache-dist build cluster.
alternative-gh-token-secret-name:
type: string
required: false
description: |
If provided, should contain the name of a secret in the repo which holds a GitHub API token.
When this is non-empty, that secret's value is used in place of the default repo-level token
anywhere that environment variable GH_TOKEN is set. This is especially useful for downloading
artifacts from other private repos, which repo tokens do not have access to.
defaults:
run:
shell: bash
permissions:
actions: read
checks: none
contents: read
deployments: none
discussions: none
id-token: write
issues: none
packages: read
pages: none
pull-requests: read
repository-projects: none
security-events: none
statuses: none
jobs:
compute-matrix:
runs-on: ubuntu-latest
outputs:
MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }}
steps:
- name: Compute Build Matrix
id: compute-matrix
env:
MATRIX_FILTER: ${{ inputs.matrix_filter }}
run: |
set -eo pipefail
# please keep the matrices sorted in ascending order by the following:
#
# [ARCH, PY_VER, CUDA_VER, LINUX_VER]
#
export MATRIX="
# amd64
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.1', LINUX_VER: 'rockylinux8' }
# arm64
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.0.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.0.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.1', LINUX_VER: 'rockylinux8' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.0.1', LINUX_VER: 'rockylinux8' }
"
MATRIX="$(
yq -n -o json 'env(MATRIX)' | \
jq -c "${MATRIX_FILTER} | if (. | length) > 0 then {include: .} else \"Error: Empty matrix\n\" | halt_error(1) end"
)"
echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}"
build:
name: ${{ matrix.CUDA_VER }}, ${{ matrix.PY_VER }}, ${{ matrix.ARCH }}, ${{ matrix.LINUX_VER }}
needs: [compute-matrix]
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
runs-on: "linux-${{ matrix.ARCH }}-${{ inputs.node_type }}"
env:
RAPIDS_ARTIFACTS_DIR: ${{ github.workspace }}/artifacts
container:
image: "rapidsai/ci-wheel:25.12-cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}"
env:
RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
steps:
- uses: aws-actions/configure-aws-credentials@00943011d9042930efac3dcd3a170e4273319bc8 # v5.1.0
with:
role-to-assume: ${{ vars.AWS_ROLE_ARN }}
aws-region: ${{ vars.AWS_REGION }}
role-duration-seconds: 43200 # 12h
- name: checkout code repo
uses: actions/checkout@v5
with:
repository: ${{ inputs.repo }}
ref: ${{ inputs.sha }}
fetch-depth: 0 # unshallow fetch for setuptools-scm
persist-credentials: false
- name: Standardize repository information
uses: rapidsai/shared-actions/rapids-github-info@main
with:
repo: ${{ inputs.repo }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
- name: Preprocess extra repos
id: preprocess-extras
if: ${{ inputs.extra-repo != '' }}
env:
EXTRA_REPO: ${{ inputs.extra-repo }}
run: |
EXTRA_REPO_PATH=$(echo "$EXTRA_REPO" | cut -d "/" -f 2)
echo "EXTRA_REPO_PATH=${EXTRA_REPO_PATH}" >> "${GITHUB_OUTPUT}"
- name: checkout extra repos
uses: actions/checkout@v5
if: ${{ inputs.extra-repo != '' }}
with:
repository: ${{ inputs.extra-repo }}
ref: ${{ inputs.extra-repo-sha }}
path: "./${{ steps.preprocess-extras.outputs.EXTRA_REPO_PATH }}"
ssh-key: ${{ secrets[inputs.extra-repo-deploy-key] }} # zizmor: ignore[overprovisioned-secrets]
persist-credentials: false
- name: Setup proxy cache
uses: nv-gha-runners/setup-proxy-cache@main
continue-on-error: true
- name: Telemetry setup
uses: rapidsai/shared-actions/telemetry-dispatch-setup@main
continue-on-error: true
if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
env:
# DOES NOT NEED alternative-gh-token-secret-name - github.token is enough and more limited
GH_TOKEN: ${{ github.token }}
with:
extra_attributes: "rapids.PACKAGER=wheel,rapids.CUDA_VER=${{ matrix.CUDA_VER }},rapids.PY_VER=${{ matrix.PY_VER }},rapids.ARCH=${{ matrix.ARCH }},rapids.LINUX_VER=${{ matrix.LINUX_VER }}"
# Install latest rapidsai/sccache client and configure sccache-dist
- name: Setup sccache-dist
uses: rapidsai/shared-actions/setup-sccache-dist@fea/setup-sccache-dist
if: ${{ inputs.sccache-dist-token-secret-name != '' }}
env:
AWS_REGION: "${{env.AWS_REGION}}"
AWS_ACCESS_KEY_ID: "${{env.AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY: "${{env.AWS_SECRET_ACCESS_KEY}}"
with:
auth: "${{ secrets[inputs.sccache-dist-token-secret-name] }}" # zizmor: ignore[overprovisioned-secrets]
cache-slug: "conda-py${{matrix.PY_VER}}-cuda${{matrix.CUDA_VER}}-${{matrix.ARCH}}"
log-file: "${{ env.RAPIDS_ARTIFACTS_DIR }}/sccache.log"
request-timeout: ${{ inputs.sccache-dist-request-timeout }}
# Per the docs at https://docs.github.com/en/rest/rate-limit/rate-limit?apiVersion=2022-11-28#get-rate-limit-status-for-the-authenticated-user,
# checking '/rate_limit | jq .' should not itself count against any rate limits.
#
# gh CLI is pre-installed on Github-hosted runners, but may not be on self-hosted runners.
- name: Check GitHub API rate limits
run: |
if ! type gh >/dev/null; then
echo "'gh' CLI is not installed... skipping rate-limits check"
else
gh api /rate_limit | jq .
fi
env:
# NEEDS alternative-gh-token-secret_name - API limits need to be for whatever token is used for upload/download. Repo token may be a different pool for rate limits.
GH_TOKEN: ${{ inputs.alternative-gh-token-secret-name && secrets[inputs.alternative-gh-token-secret-name] || github.token }} # zizmor: ignore[overprovisioned-secrets]
- name: Build and repair the wheel
run: |
if test -n "${SCCACHE_DIST_TOKEN_NAME:+x}"; then ulimit -n "$(ulimit -Hn)"; fi
$INPUTS_SCRIPT
env:
INPUTS_SCRIPT: "${{ inputs.script }}"
SCCACHE_DIST_TOKEN_NAME: "${{ inputs.sccache-dist-token-secret-name }}"
# NEEDS alternative-gh-token-secret-name - may require a token with more permissions
GH_TOKEN: ${{ inputs.alternative-gh-token-secret-name && secrets[inputs.alternative-gh-token-secret-name] || github.token }} # zizmor: ignore[overprovisioned-secrets]
# Use a shell that loads the rc file so that we get the compiler settings
shell: bash -leo pipefail {0}
- name: Get package name
if: ${{ inputs.upload-artifacts }}
env:
APPEND_CUDA_SUFFIX: ${{ inputs.append-cuda-suffix }}
PACKAGE_NAME: ${{ inputs.package-name }}
PACKAGE_TYPE: ${{ inputs.package-type }}
PURE_WHEEL: ${{ inputs.pure-wheel }}
run: |
if [ -z "${PACKAGE_NAME}" ]; then
PACKAGE_NAME="${RAPIDS_REPOSITORY#*/}"
fi
export "RAPIDS_PY_CUDA_SUFFIX=$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
if [ "${APPEND_CUDA_SUFFIX}" = "true" ]; then
export "RAPIDS_PY_WHEEL_NAME=${PACKAGE_NAME}_${RAPIDS_PY_CUDA_SUFFIX}"
else
export "RAPIDS_PY_WHEEL_NAME=${PACKAGE_NAME}"
fi
if [ "${PURE_WHEEL}" = "true" ]; then
export "RAPIDS_PY_WHEEL_PURE=1"
fi
echo "RAPIDS_PACKAGE_NAME=$(RAPIDS_NO_PKG_EXTENSION=true rapids-package-name "wheel_${PACKAGE_TYPE}")" >> "${GITHUB_OUTPUT}"
echo "WHEEL_OUTPUT_DIR=${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" >> "${GITHUB_OUTPUT}"
id: package-name
- name: Show files to be uploaded
if: ${{ inputs.upload-artifacts }}
env:
WHEEL_OUTPUT_DIR: ${{ steps.package-name.outputs.WHEEL_OUTPUT_DIR }}
run: |
echo "Contents of directory to be uploaded:"
ls -R "$WHEEL_OUTPUT_DIR"
- uses: actions/upload-artifact@v4
if: ${{ inputs.upload-artifacts }}
with:
if-no-files-found: 'error'
name: ${{ steps.package-name.outputs.RAPIDS_PACKAGE_NAME }}
path: ${{ steps.package-name.outputs.WHEEL_OUTPUT_DIR }}
- name: Upload additional artifacts
if: "!cancelled()"
run: rapids-upload-artifacts-dir "cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch)_py${RAPIDS_PY_VERSION//.}"
- name: Telemetry upload attributes
if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
env:
# DOES NOT NEED alternative-gh-token-secret-name - github.token is enough and more limited
GH_TOKEN: ${{ github.token }}
uses: rapidsai/shared-actions/telemetry-dispatch-stash-job-artifacts@main
continue-on-error: true