Skip to content

Commit 8be4731

Browse files
nipung90facebook-github-bot
authored andcommitted
Call torchrec cpu tests from fbgemm test gha (pytorch#4424)
Summary: Pull Request resolved: pytorch#4424 This diff kicks off the torchrec cpu unittests along with fbgemm's cpu tests when there is a pull request on fbgemm Reviewed By: spcyppt Differential Revision: D77598015
1 parent 7a67041 commit 8be4731

File tree

2 files changed

+317
-0
lines changed

2 files changed

+317
-0
lines changed
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
# Exit on failure
9+
set -e
10+
11+
verbose=0
12+
python_version=""
13+
14+
usage () {
15+
# shellcheck disable=SC2086
16+
echo "Usage: bash $(basename ${BASH_SOURCE[0]}) -p PYTHON_VERSION -b BUILD_ENV [-v] [-h]"
17+
echo "-v : verbose"
18+
echo "-h : help"
19+
echo "PYTHON_VERSION : Python version (e.g., 3.10)"
20+
echo "BUILD_ENV : build environment name (e.g., build_env)"
21+
echo "Example: Run torchrec tests with Python 3.10 and dist/*.wfl fbgemm wheel"
22+
# shellcheck disable=SC2086
23+
echo " bash $(basename ${BASH_SOURCE[0]}) -v -o torchrec_nightly -p 3.10 -P pytorch-nightly -c 11.7 -w dist/fbgemm_gpu_nightly.whl"
24+
}
25+
26+
while getopts vho:p:P:b: flag
27+
do
28+
case "$flag" in
29+
v) verbose="1";;
30+
p) python_version="${OPTARG}";;
31+
b) build_env="${OPTARG}";;
32+
h) usage
33+
exit 0;;
34+
*) usage
35+
exit 1;;
36+
esac
37+
done
38+
39+
if [ "$python_version" == "" ] || [ "$build_env" == "" ]; then
40+
usage
41+
exit 1
42+
fi
43+
44+
env_name=$build_env
45+
python_tag="${python_version//\./}"
46+
47+
if [ "$verbose" == "1" ]; then
48+
# Print each line verbosely
49+
set -x -e
50+
fi
51+
52+
################################################################################
53+
echo "## 0. Minimal check"
54+
################################################################################
55+
56+
if [ ! -d "torchrec" ]; then
57+
echo "Error: this script must be executed in torchrec/"
58+
exit 1
59+
fi
60+
61+
# Install PyTorch
62+
conda run -n "$env_name" pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu
63+
conda run -n "$env_name" python -c "import torch"
64+
65+
# Import torch.distributed
66+
conda run -n "$env_name" python -c "import torch.distributed"
67+
68+
# Import fbgemm_gpu
69+
70+
echo "Importing FBGEMM-GPU..."
71+
conda run -n "$env_name" python -c "import fbgemm_gpu"
72+
73+
################################################################################
74+
echo "## 1. Install TorchRec Requirements"
75+
################################################################################
76+
# Comment out FBGEMM_GPU since we should pre-install it from the downloaded wheel file
77+
sed -i 's/fbgemm-gpu/#fbgemm-gpu/g' requirements.txt
78+
conda run -n "$env_name" python -m pip install -r requirements.txt
79+
80+
81+
################################################################################
82+
echo "## 2. Build TorchRec"
83+
################################################################################
84+
85+
rm -rf dist
86+
conda run -n "$env_name" python setup.py bdist_wheel --python-tag="py${python_tag}"
87+
88+
################################################################################
89+
echo "## 3. Import TorchRec"
90+
################################################################################
91+
92+
conda run -n "$env_name" python -c "import torchrec"
93+
94+
echo "Test succeeded"
95+
96+
################################################################################
97+
echo "## 4. Run TorchRec tests"
98+
################################################################################
99+
100+
conda install -n "$env_name" -y pytest
101+
# Read the list of tests to skip from a file, ignoring empty lines and comments
102+
skip_expression=$(awk '!/^($|#)/ {printf " and not %s", $0}' ./.github/scripts/tests_to_skip.txt)
103+
# Check if skip_expression is effectively empty
104+
if [ -z "$skip_expression" ]; then
105+
skip_expression=""
106+
else
107+
skip_expression=${skip_expression:5} # Remove the leading " and "
108+
fi
109+
110+
# Add test_dlrm_inference_package to skip expression
111+
skip_expression="${skip_expression} and not test_dlrm_inference_package"
112+
113+
conda run -n "$env_name" \
114+
python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \
115+
--ignore-glob=**/test_utils/ -k "$skip_expression"
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# This workflow is used for FBGEMM_GPU-CPU CI as well as nightly builds of
7+
# FBGEMM_GPU-CPU against PyTorch-CPU Nightly.
8+
name: FBGEMM_GPU_TORCHREC-CPU CI
9+
10+
on:
11+
# PR Trigger (enabled for regression checks and debugging)
12+
#
13+
pull_request:
14+
branches:
15+
- main
16+
17+
# Push Trigger (enable to catch errors coming out of multiple merges)
18+
#
19+
push:
20+
branches:
21+
- main
22+
23+
# Cron Trigger (UTC)
24+
#
25+
# Based on the Conda page for PyTorch-nightly, the CPU nightly releases appear
26+
# around 00:30 PST every day
27+
#
28+
schedule:
29+
- cron: '45 12 * * *'
30+
31+
# Manual Trigger
32+
#
33+
workflow_dispatch:
34+
inputs:
35+
pytorch_channel_version:
36+
description: Package Channel + Version to Use for PyTorch Installation, in `<channel>[/<version>]` Format
37+
type: string
38+
required: false
39+
default: ""
40+
publish_to_pypi:
41+
description: Publish Artifact to PyPI
42+
type: boolean
43+
required: false
44+
default: false
45+
46+
concurrency:
47+
# Cancel previous runs in the PR if a new commit is pushed
48+
# https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre
49+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
50+
cancel-in-progress: true
51+
52+
jobs:
53+
# Build on CPU hosts, run tests, and upload to GHA
54+
build_artifact:
55+
if: ${{ github.repository_owner == 'pytorch' }}
56+
runs-on: ${{ matrix.host-machine.instance }}
57+
container:
58+
image: amazonlinux:2023
59+
options: --user root
60+
defaults:
61+
run:
62+
shell: bash
63+
env:
64+
PRELUDE: .github/scripts/setup_env.bash
65+
BUILD_ENV: build_binary
66+
BUILD_TARGET: ${{ matrix.build-target }}
67+
BUILD_VARIANT: cpu
68+
continue-on-error: true
69+
strategy:
70+
# Don't fast-fail all the other builds if one of the them fails
71+
fail-fast: false
72+
matrix:
73+
host-machine: [
74+
{ arch: x86, instance: "linux.4xlarge" },
75+
{ arch: arm, instance: "linux.arm64.m7g.4xlarge" },
76+
]
77+
build-target: [ "default" ]
78+
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
79+
compiler: [ "gcc", "clang" ]
80+
81+
steps:
82+
- name: Setup Build Container
83+
run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which
84+
85+
- name: Checkout the Repository
86+
uses: actions/checkout@v4
87+
with:
88+
ref: ${{ (github.event_name == 'schedule' && 'nightly') || github.ref }}
89+
90+
- name: Display System Info
91+
run: . $PRELUDE; print_system_info
92+
93+
- name: Display GPU Info
94+
run: . $PRELUDE; print_gpu_info
95+
96+
- name: Setup Miniconda
97+
run: . $PRELUDE; setup_miniconda $HOME/miniconda
98+
99+
- name: Create Conda Environment
100+
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
101+
102+
- name: Install C/C++ Compilers
103+
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
104+
105+
- name: Install Build Tools
106+
run: . $PRELUDE; install_build_tools $BUILD_ENV
107+
108+
- name: Install PyTorch-CPU Nightly
109+
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cpu
110+
111+
- name: Collect PyTorch Environment Info
112+
if: ${{ success() || failure() }}
113+
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
114+
115+
- name: Prepare FBGEMM_GPU Build
116+
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
117+
118+
- name: Build FBGEMM_GPU Wheel
119+
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV nightly ${{ matrix.build-target }}/cpu
120+
121+
- name: Upload Built Wheel as GHA Artifact
122+
uses: actions/upload-artifact@v4
123+
with:
124+
name: fbgemm_${{ matrix.build-target }}_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cpu.whl
125+
path: fbgemm_gpu/dist/*.whl
126+
if-no-files-found: error
127+
128+
# Run torchrec CPU tests
129+
torchrec_cpu_tests:
130+
if: ${{ github.repository_owner == 'pytorch' }}
131+
runs-on: ${{ matrix.host-machine.instance }}
132+
container:
133+
image: amazonlinux:2023
134+
options: --user root
135+
defaults:
136+
run:
137+
shell: bash
138+
env:
139+
PRELUDE: .github/scripts/setup_env.bash
140+
BUILD_ENV: build_binary
141+
BUILD_TARGET: ${{ matrix.build-target }}
142+
BUILD_VARIANT: cpu
143+
strategy:
144+
fail-fast: false
145+
matrix:
146+
host-machine: [
147+
# { arch: arm, instance: "linux.arm64.2xlarge", timeout: 30 },
148+
{ arch: x86, instance: "linux.4xlarge", timeout: 20 },
149+
]
150+
build-target: [ "default" ]
151+
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
152+
compiler: [ "gcc", "clang" ]
153+
needs: build_artifact
154+
155+
steps:
156+
- name: Setup Build Container
157+
run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which
158+
159+
- name: Checkout the Repository
160+
uses: actions/checkout@v4
161+
162+
- name: Display System Info
163+
run: . $PRELUDE; print_system_info; print_ec2_info
164+
165+
- name: Display GPU Info
166+
run: . $PRELUDE; print_gpu_info
167+
168+
- name: Setup Miniconda
169+
run: . $PRELUDE; setup_miniconda $HOME/miniconda
170+
171+
- name: Create Conda Environment
172+
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
173+
174+
- name: Install C/C++ Compilers for Updated LIBGCC
175+
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
176+
177+
- name: Install PyTorch-CPU Nightly
178+
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cpu
179+
180+
- name: Collect PyTorch Environment Info
181+
if: ${{ success() || failure() }}
182+
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
183+
184+
- name: Download Wheel Artifact from GHA
185+
uses: actions/download-artifact@v4
186+
with:
187+
name: fbgemm_${{ matrix.build-target }}_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cpu.whl
188+
189+
- name: Prepare FBGEMM_GPU Build
190+
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
191+
192+
- name: Install FBGEMM_GPU Wheel
193+
run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
194+
195+
- name: Clone torchrec
196+
uses: actions/checkout@v4
197+
with:
198+
repository: pytorch/torchrec
199+
path: torchrec
200+
201+
- name: Run torchrec CPU tests
202+
run: . $PRELUDE; cd torchrec; ../.github/scripts/torchrec_cpu_tests.bash -p ${{ matrix.python-version }} -b $BUILD_ENV -v 1

0 commit comments

Comments
 (0)