Skip to content

updates for new TE version #1127

updates for new TE version

updates for new TE version #1127

name: "BioNeMo Recipes CI"
on:
push:
branches:
- "pull-request/[0-9]+"
- "dependabot/**"
merge_group:
types: [checks_requested]
schedule:
- cron: "0 9 * * *" # Runs at 9 AM UTC daily (2 AM MST)
defaults:
run:
shell: bash -x -e -u -o pipefail {0}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
changed-dirs:
runs-on: ubuntu-latest
outputs:
any_changed: ${{ steps.changed-files.outputs.any_changed }}
all_changed_files: ${{ steps.changed-files.outputs.all_changed_files }}
dirs: ${{ steps.set-dirs.outputs.dirs }}
steps:
- id: get-pr-info
if: ${{ startsWith(github.ref_name, 'pull-request/') }}
uses: nv-gha-runners/get-pr-info@main
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get merge-base commit
id: merge-base
run: |
# Get the merge-base between current branch and main
MERGE_BASE=$(git merge-base HEAD origin/main)
echo "merge-base=$MERGE_BASE" >> $GITHUB_OUTPUT
echo "Merge-base commit: $MERGE_BASE"
- name: Get changed files
id: changed-files
uses: step-security/changed-files@v46
with:
json: true
matrix: true
base_sha: ${{ steps.merge-base.outputs.merge-base }}
dir_names: true
dir_names_max_depth: 3
files: |
bionemo-recipes/models/**
bionemo-recipes/recipes/**
- id: set-dirs
name: Determine which directories to run
env:
EVENT_NAME: ${{ github.event_name }}
PR_INFO: ${{ steps.get-pr-info.outputs.pr-info }}
CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
run: |
# Get all recipe and model directories
ALL_DIRS=$(ls -d bionemo-recipes/models/*/ bionemo-recipes/recipes/*/ 2>/dev/null | jq -R -s -c 'split("\n")[:-1] | map(rtrimstr("/"))')
# Determine which directories to run: all for schedule, filtered for other events
if [[ "$EVENT_NAME" == "schedule" ]]; then
DIRS=$(echo "$ALL_DIRS")
else
# Check if "ciflow:all-recipes" label is present
HAS_INCLUDE_ALL_LABEL=false
if [[ "$PR_INFO" != "null" && "$PR_INFO" != "" ]]; then
if echo "$PR_INFO" | jq -e '.labels[]? | select(.name == "ciflow:all-recipes")' > /dev/null 2>&1; then
HAS_INCLUDE_ALL_LABEL=true
echo "Found 'ciflow:all-recipes' label - running all directories"
fi
fi
if [[ "$HAS_INCLUDE_ALL_LABEL" == "true" ]]; then
DIRS=$(echo "$ALL_DIRS")
else
# Filter directories to only those that have changed files
DIRS=$(echo "$ALL_DIRS" | jq -c --argjson changed "$CHANGED_FILES" '
map(select(. as $dir | $changed | index($dir) != null))
')
fi
fi
# Assign Docker images to the selected directories
# Currently, AMPLIFY is the only folder that needs a custom base image, since we have to support both TE and
# xformers-based models for golden value testing. The rest of the models use the default pytorch image.
# This uses a squashed version of the pytorch:25.10-py3 image, generated with `docker-squash
# nvcr.io/nvidia/pytorch:25.10-py3 -t svcbionemo023/bionemo-framework:pytorch25.10-py3-squashed --output
# type=registry,compression=zstd,force-compression=true,oci-mediatypes=true,compression-level=15` and pushed
# to the dockerhub registry. Our github actions are able to cache image pulls from dockerhub but not nvcr, so
# hopefully this cuts down slightly on CI time at the expense of having a slightly in-directed image location.
DIRS_WITH_IMAGES=$(echo "$DIRS" | jq -c '
map({
dir: .,
name: (. | sub("^bionemo-recipes/"; "")),
image: (
if . == "bionemo-recipes/models/amplify" then
"svcbionemo023/bionemo-framework:amplify-model-devcontainer-082025"
else
# "nvcr.io/nvidia/pytorch:25.10-py3"
"svcbionemo023/bionemo-framework:pytorch25.10-py3-squashed"
end
)
})
')
echo "dirs=$DIRS_WITH_IMAGES" >> $GITHUB_OUTPUT
- name: Show output
run: |
echo "=== Changed Files Analysis ==="
echo "Current branch: ${{ github.ref_name }}"
echo "Merge-base commit: ${{ steps.merge-base.outputs.merge-base }}"
echo "Changed files compared to merge-base:"
echo '${{ steps.changed-files.outputs.all_changed_files }}' | jq -r '.[]' | sed 's/^/ - /'
echo "Total changed files: $(echo '${{ steps.changed-files.outputs.all_changed_files }}' | jq '. | length')"
echo '${{ toJSON(steps.changed-files.outputs) }}'
echo '${{ toJSON(steps.set-dirs.outputs) }}'
shell: bash
unit-tests:
needs: changed-dirs
runs-on: linux-amd64-gpu-l4-latest-1
if: ${{ needs.changed-dirs.outputs.dirs != '[]' }}
name: "unit-tests (${{ matrix.recipe.name }})"
container:
image: ${{ matrix.recipe.image }}
options: --shm-size=16G
env:
CI: true
HF_TOKEN: ${{ secrets.HF_TOKEN }}
strategy:
matrix:
recipe: ${{ fromJson(needs.changed-dirs.outputs.dirs) }}
fail-fast: false
steps:
- name: Show GPU info
run: nvidia-smi
- name: Setup proxy cache
uses: nv-gha-runners/setup-proxy-cache@main
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: "${{ matrix.recipe.dir }}"
sparse-checkout-cone-mode: false
- name: Install dependencies
working-directory: ${{ matrix.recipe.dir }}
run: |
if [ -f pyproject.toml ] || [ -f setup.py ]; then
PIP_CONSTRAINT= pip install -e .
echo "Installed ${{ matrix.recipe.dir }} as editable package"
elif [ -f requirements.txt ]; then
PIP_CONSTRAINT= pip install -r requirements.txt
echo "Installed ${{ matrix.recipe.dir }} from requirements.txt"
else
echo "No pyproject.toml, setup.py, or requirements.txt found in ${{ matrix.recipe.dir }}"
exit 1
fi
- name: Run tests
working-directory: ${{ matrix.recipe.dir }}
run: pytest -v .
verify-recipe-tests:
# This job checks the status of the unit-tests matrix and fails if any matrix job failed or was cancelled.
# Use this job as the required check for PRs.
needs: unit-tests
runs-on: ubuntu-latest
if: always()
steps:
- name: Check unit-tests matrix status
run: |
if [[ "${{ needs.unit-tests.result }}" == "failure" || "${{ needs.unit-tests.result }}" == "cancelled" ]]; then
echo "Some unit-tests matrix jobs have failed or been cancelled!"
exit 1
else
echo "All unit-tests matrix jobs have completed successfully or were skipped!"
exit 0
fi