increase nofile ulimit if using sccache-dist #2
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test RAPIDS wheels | ||
| on: | ||
| workflow_call: | ||
| inputs: | ||
| branch: | ||
| description: | | ||
| Git branch the workflow run targets. | ||
| This is required even when 'sha' is provided because it is also used for organizing artifacts. | ||
| type: string | ||
| date: | ||
| description: "Date (YYYY-MM-DD) this run is for. Used to organize artifacts produced by nightly builds" | ||
| type: string | ||
| sha: | ||
| description: "Full git commit SHA to check out" | ||
| type: string | ||
| repo: | ||
| description: "Git repo to check out, in '{org}/{repo}' form, e.g. 'rapidsai/cudf'" | ||
| type: string | ||
| build_type: | ||
| description: "One of: [branch, nightly, pull-request]" | ||
| required: true | ||
| type: string | ||
| matrix_type: | ||
| description: "One of: [auto, nightly, pull-request]. 'auto' means 'choose a value based on what's provided via build_type'." | ||
| required: false | ||
| type: string | ||
| default: "auto" | ||
| script: | ||
| type: string | ||
| required: true | ||
| description: "Shell code to be executed in a step. Ideally this should just invoke a script managed in the repo the workflow runs from, like 'ci/test_wheel.sh'." | ||
| matrix_filter: | ||
| description: | | ||
| jq expression which modifies the matrix. | ||
| For example, 'map(select(.ARCH == "amd64"))' to achieve "only run amd64 jobs". | ||
| type: string | ||
| default: "." | ||
| container-options: | ||
| description: | | ||
| Command-line arguments passed to 'docker run' when starting the container this workflow runs in. | ||
| This should be provided as a single string to be inlined into 'docker run', not an array. | ||
| For example, '--quiet --ulimit nofile=2048'. | ||
| required: false | ||
| type: string | ||
| default: "-e _NOOP" | ||
| test_summary_show: | ||
| description: | | ||
| Sets the 'show:' input to the test-summary/action third-party action. | ||
| The default, 'fail', means "only show failing tests in the summary in the GitHub UI". | ||
| See https://github.com/test-summary/action?tab=readme-ov-file#options for a list of | ||
| available options. | ||
| required: false | ||
| type: string | ||
| default: "fail" | ||
| rapids-aux-secret-1: | ||
| required: false | ||
| type: string | ||
| default: '' | ||
| description: | | ||
| The NAME (not value) of a GitHub secret in the calling repo. | ||
| This allows callers of the workflow to make a single secret available in the job's | ||
| environment, via environment variable `RAPIDS_AUX_SECRET_1`. | ||
| build_workflow_name: | ||
| description: | | ||
| Name of a workflow file that produced artifacts to be downloaded in this run. | ||
| If not set (the default), artifact-handling scripts use RAPIDS-conventional defaults (like "build.yaml" when "build_type == nightly"). | ||
| required: false | ||
| type: string | ||
| sccache-dist-request-timeout: | ||
| default: 7140 | ||
| description: | | ||
| The maximum time (in seconds) the sccache client should wait for a distributed compilation to complete. | ||
| sccache-dist-token-secret-name: | ||
| type: string | ||
| required: false | ||
| description: | | ||
| The name of the secret that contains the token used to authenticate with the RAPIDS Build Engineering sccache-dist build cluster. | ||
| alternative-gh-token-secret-name: | ||
| type: string | ||
| required: false | ||
| description: | | ||
| If provided, should contain the name of a secret in the repo which holds a GitHub API token. | ||
| When this is non-empty, that secret's value is used in place of the default repo-level token | ||
| anywhere that environment variable GH_TOKEN is set. This is especially useful for downloading | ||
| artifacts from other private repos, which repo tokens do not have access to. | ||
| defaults: | ||
| run: | ||
| shell: bash | ||
| permissions: | ||
| actions: read | ||
| checks: none | ||
| contents: read | ||
| deployments: none | ||
| discussions: none | ||
| id-token: write | ||
| issues: none | ||
| packages: read | ||
| pages: none | ||
| pull-requests: read | ||
| repository-projects: none | ||
| security-events: none | ||
| statuses: none | ||
| jobs: | ||
| compute-matrix: | ||
| runs-on: ubuntu-latest | ||
| env: | ||
| BUILD_TYPE: ${{ inputs.build_type }} | ||
| MATRIX_TYPE: ${{ inputs.matrix_type }} | ||
| outputs: | ||
| MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }} | ||
| steps: | ||
| - name: Validate Inputs | ||
| run: | | ||
| if [[ "$BUILD_TYPE" != "branch" ]] && [[ "$BUILD_TYPE" != "nightly" ]] && [[ "$BUILD_TYPE" != "pull-request" ]]; then | ||
| echo "Invalid build_type! Must be one of 'branch', 'nightly', or 'pull-request'." | ||
| exit 1 | ||
| fi | ||
| if [[ "$MATRIX_TYPE" != "auto" ]] && [[ "$MATRIX_TYPE" != "nightly" ]] && [[ "$MATRIX_TYPE" != "pull-request" ]]; then | ||
| echo "Invalid matrix_type! Must be one of 'auto', 'nightly', or 'pull-request'." | ||
| exit 1 | ||
| fi | ||
| - name: Compute test matrix | ||
| id: compute-matrix | ||
| env: | ||
| MATRIX_FILTER: ${{ inputs.matrix_filter }} | ||
| run: | | ||
| set -eo pipefail | ||
| # please keep the matrices sorted in ascending order by the following: | ||
| # | ||
| # [ARCH, PY_VER, CUDA_VER, LINUX_VER, GPU, DRIVER, DEPENDENCIES] | ||
| # | ||
| export MATRICES=" | ||
| pull-request: | ||
| # amd64 | ||
| - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LINUX_VER: 'ubuntu22.04', GPU: 'l4', DRIVER: 'latest', DEPENDENCIES: 'oldest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.1', LINUX_VER: 'ubuntu24.04', GPU: 'l4', DRIVER: 'latest', DEPENDENCIES: 'latest' } | ||
| # arm64 | ||
| - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '12.9.1', LINUX_VER: 'ubuntu22.04', GPU: 'a100', DRIVER: 'latest', DEPENDENCIES: 'latest' } | ||
| - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.0.1', LINUX_VER: 'ubuntu22.04', GPU: 'l4', DRIVER: 'latest', DEPENDENCIES: 'latest' } | ||
| nightly: | ||
| # amd64 | ||
| - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8', GPU: 'l4', DRIVER: 'latest', DEPENDENCIES: 'oldest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LINUX_VER: 'ubuntu22.04', GPU: 'l4', DRIVER: 'earliest', DEPENDENCIES: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LINUX_VER: 'ubuntu24.04', GPU: 'h100', DRIVER: 'latest', DEPENDENCIES: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.1', LINUX_VER: 'ubuntu24.04', GPU: 'h100', DRIVER: 'latest', DEPENDENCIES: 'latest' } | ||
| # arm64 | ||
| - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8', GPU: 'a100', DRIVER: 'latest', DEPENDENCIES: 'oldest' } | ||
| - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.2.2', LINUX_VER: 'ubuntu22.04', GPU: 'l4', DRIVER: 'latest', DEPENDENCIES: 'latest' } | ||
| - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.0.1', LINUX_VER: 'ubuntu24.04', GPU: 'l4', DRIVER: 'latest', DEPENDENCIES: 'latest' } | ||
| " | ||
| # only overwrite MATRIX_TYPE if it was set to 'auto' | ||
| if [[ "${MATRIX_TYPE}" == "auto" ]]; then | ||
| if [[ "${BUILD_TYPE}" == "branch" ]]; then | ||
| # Use the nightly matrix for branch tests | ||
| MATRIX_TYPE="nightly" | ||
| else | ||
| MATRIX_TYPE="${BUILD_TYPE}" | ||
| fi | ||
| fi | ||
| export MATRIX_TYPE | ||
| TEST_MATRIX=$(yq -n 'env(MATRICES) | .[strenv(MATRIX_TYPE)]') | ||
| export TEST_MATRIX | ||
| MATRIX="$( | ||
| yq -n -o json 'env(TEST_MATRIX)' | \ | ||
| jq -c "${MATRIX_FILTER} | if (. | length) > 0 then {include: .} else \"Error: Empty matrix\n\" | halt_error(1) end" | ||
| )" | ||
| echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" | ||
| test: | ||
| name: ${{ matrix.CUDA_VER }}, ${{ matrix.PY_VER }}, ${{ matrix.ARCH }}, ${{ matrix.LINUX_VER }}, ${{ matrix.GPU }}, ${{ matrix.DRIVER }}-driver, ${{ matrix.DEPENDENCIES }}-deps | ||
| needs: compute-matrix | ||
| env: | ||
| RAPIDS_ARTIFACTS_DIR: ${{ github.workspace }}/artifacts | ||
| RAPIDS_DEPENDENCIES: ${{ matrix.DEPENDENCIES }} | ||
| RAPIDS_TESTS_DIR: ${{ github.workspace }}/test-results | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }} | ||
| runs-on: "linux-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1" | ||
| container: | ||
| image: "rapidsai/citestwheel:25.12-cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}" | ||
| options: ${{ inputs.container-options }} | ||
| env: | ||
| NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} # GPU jobs must set this container env variable | ||
| RAPIDS_BUILD_TYPE: ${{ inputs.build_type }} | ||
| steps: | ||
| - uses: aws-actions/configure-aws-credentials@00943011d9042930efac3dcd3a170e4273319bc8 # v5.1.0 | ||
| with: | ||
| role-to-assume: ${{ vars.AWS_ROLE_ARN }} | ||
| aws-region: ${{ vars.AWS_REGION }} | ||
| role-duration-seconds: 43200 # 12h | ||
| - name: Run nvidia-smi to make sure GPU is working | ||
| run: nvidia-smi | ||
| - name: checkout code repo | ||
| uses: actions/checkout@v5 | ||
| with: | ||
| repository: ${{ inputs.repo }} | ||
| ref: ${{ inputs.sha }} | ||
| fetch-depth: 0 # unshallow fetch for setuptools-scm | ||
| persist-credentials: false | ||
| - name: Telemetry setup | ||
| uses: rapidsai/shared-actions/telemetry-dispatch-setup@main | ||
| continue-on-error: true | ||
| if: ${{ vars.TELEMETRY_ENABLED == 'true' }} | ||
| env: | ||
| # DOES NOT NEED alternative-gh-token-secret-name - github.token is enough and more limited | ||
| GH_TOKEN: ${{ github.token }} | ||
| with: | ||
| extra_attributes: "rapids.PACKAGER=wheel,rapids.CUDA_VER=${{ matrix.CUDA_VER }},rapids.PY_VER=${{ matrix.PY_VER }},rapids.ARCH=${{ matrix.ARCH }},rapids.LINUX_VER=${{ matrix.LINUX_VER }},rapids.GPU=${{ matrix.GPU }},rapids.DRIVER=${{ matrix.DRIVER }},rapids.DEPENDENCIES=${{ matrix.DEPENDENCIES }}" | ||
| - name: Standardize repository information | ||
| uses: rapidsai/shared-actions/rapids-github-info@main | ||
| with: | ||
| repo: ${{ inputs.repo }} | ||
| branch: ${{ inputs.branch }} | ||
| date: ${{ inputs.date }} | ||
| sha: ${{ inputs.sha }} | ||
| build_workflow_name: ${{ inputs.build_workflow_name }} | ||
| - name: Setup proxy cache | ||
| uses: nv-gha-runners/setup-proxy-cache@main | ||
| continue-on-error: true | ||
| # Install latest rapidsai/sccache client and configure sccache-dist | ||
| - name: Setup sccache-dist | ||
| uses: rapidsai/shared-actions/setup-sccache-dist@fea/setup-sccache-dist | ||
| if: ${{ inputs.sccache-dist-token-secret-name != '' }} | ||
| env: | ||
| AWS_REGION: "${{env.AWS_REGION}}" | ||
| AWS_ACCESS_KEY_ID: "${{env.AWS_ACCESS_KEY_ID}}" | ||
| AWS_SECRET_ACCESS_KEY: "${{env.AWS_SECRET_ACCESS_KEY}}" | ||
| with: | ||
| auth: "${{ secrets[inputs.sccache-dist-token-secret-name] }}" # zizmor: ignore[overprovisioned-secrets] | ||
| cache-slug: "conda-py${{matrix.PY_VER}}-cuda${{matrix.CUDA_VER}}-${{matrix.ARCH}}" | ||
| log-file: "${{ env.RAPIDS_ARTIFACTS_DIR }}/sccache.log" | ||
| request-timeout: ${{ inputs.sccache-dist-request-timeout }} | ||
| # Per the docs at https://docs.github.com/en/rest/rate-limit/rate-limit?apiVersion=2022-11-28#get-rate-limit-status-for-the-authenticated-user, | ||
| # checking '/rate_limit | jq .' should not itself count against any rate limits. | ||
| # | ||
| # gh CLI is pre-installed on Github-hosted runners, but may not be on self-hosted runners. | ||
| - name: Check GitHub API rate limits | ||
| run: | | ||
| if ! type gh >/dev/null; then | ||
| echo "'gh' CLI is not installed... skipping rate-limits check" | ||
| else | ||
| gh api /rate_limit | jq . | ||
| fi | ||
| env: | ||
| # NEEDS alternative-gh-token-secret_name - API limits need to be for whatever token is used for upload/download. Repo token may be a different pool for rate limits. | ||
| GH_TOKEN: ${{ inputs.alternative-gh-token-secret-name && secrets[inputs.alternative-gh-token-secret-name] || github.token }} # zizmor: ignore[overprovisioned-secrets] | ||
| - name: Run tests | ||
| run: | | ||
| if test -n "${SCCACHE_DIST_TOKEN_NAME:+x}"; then ulimit -n "$(ulimit -Hn)"; fi | ||
| $INPUTS_SCRIPT | ||
| env: | ||
| INPUTS_SCRIPT: "${{ inputs.script }}" | ||
| SCCACHE_DIST_TOKEN_NAME: "${{ inputs.sccache-dist-token-secret-name }}" | ||
| # NEEDS alternative-gh-token-secret-name - may require a token with more permissions | ||
| GH_TOKEN: ${{ inputs.alternative-gh-token-secret-name && secrets[inputs.alternative-gh-token-secret-name] || github.token }} # zizmor: ignore[overprovisioned-secrets] | ||
| RAPIDS_AUX_SECRET_1: ${{ inputs.rapids-aux-secret-1 != '' && secrets[inputs.rapids-aux-secret-1] || '' }} # zizmor: ignore[overprovisioned-secrets] | ||
| - name: Generate test report | ||
| uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86 # v2.4 | ||
| with: | ||
| paths: "${{ env.RAPIDS_TESTS_DIR }}/*.xml" | ||
| show: ${{ inputs.test_summary_show }} | ||
| if: always() | ||
| - name: Upload additional artifacts | ||
| if: "!cancelled()" | ||
| run: rapids-upload-artifacts-dir "cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch)_py${RAPIDS_PY_VERSION//.}" | ||
| - name: Telemetry upload attributes | ||
| uses: rapidsai/shared-actions/telemetry-dispatch-stash-job-artifacts@main | ||
| continue-on-error: true | ||
| if: ${{ vars.TELEMETRY_ENABLED == 'true' }} | ||
| env: | ||
| # DOES NOT NEED alternative-gh-token-secret-name - github.token is enough and more limited | ||
| GH_TOKEN: ${{ github.token }} | ||