Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
8e99e73
fix axlearn tests, and remove not necessary ones
Steboss May 9, 2025
f84b130
fix axlearn test
Steboss May 9, 2025
a7b5e79
fix summary write up and fix exclude patterns
Steboss May 12, 2025
e785509
fix axlearn tests
Steboss May 12, 2025
f82bda4
add a workflow dispatch for running selective jobs + try to run axlae…
Steboss May 13, 2025
fc34220
fix error
Steboss May 13, 2025
7294422
Merge branch 'main' into sbosisio/fix_axlearn_tests
Steboss May 13, 2025
7f357ff
wrong variable
Steboss May 13, 2025
b6e5121
Merge branch 'sbosisio/fix_axlearn_tests' of github.com:NVIDIA/JAX-To…
Steboss May 13, 2025
b4c5831
Fix output directory
Steboss May 13, 2025
fb1666c
fix the copy from s3
Steboss May 14, 2025
cb3dadc
fix the aws cp command
Steboss May 14, 2025
d105d81
Fake test to run ci
Steboss May 14, 2025
9fc1724
try to revert this action in order to detect failures and successes
Steboss May 14, 2025
833ba61
revert changes to k8s checker
Steboss May 14, 2025
f13deb8
add the xla flag
Steboss May 14, 2025
e6e6e52
Merge branch 'main' into sbosisio/fix_axlearn_tests
Steboss May 15, 2025
589e06a
try with new branch for runnings tests
Steboss May 15, 2025
94579a7
Merge branch 'sbosisio/fix_axlearn_tests' of github.com:NVIDIA/JAX-To…
Steboss May 15, 2025
0febad3
back to the origins
Steboss May 15, 2025
d852bc9
fix error
Steboss May 15, 2025
3e3ed35
exclude the host_array_test and try to run everything on gpus
Steboss May 15, 2025
d871ad2
remove unnecessary tests
Steboss May 15, 2025
b971e9e
exclude unnecessary tests
Steboss May 16, 2025
2489910
test with cuda as platform
Steboss May 16, 2025
33b400f
fix tests
Steboss May 16, 2025
e6e204c
Fix tests for GPUs and devices
Steboss May 16, 2025
440fc2d
try to check what gpus capabilities we see
Steboss May 19, 2025
6bdd345
Merge branch 'main' into sbosisio/fix_axlearn_tests
Steboss May 19, 2025
8a46fce
Update .github/eks-workflow-files/axlearn/axlearn-job.yml
Steboss May 19, 2025
acce96a
run the for 8 devices test
Steboss May 19, 2025
ca165c9
Merge branch 'sbosisio/fix_axlearn_tests' of github.com:NVIDIA/JAX-To…
Steboss May 19, 2025
404c799
fix script for jobs
Steboss May 19, 2025
8b7c570
fix error in test variable
Steboss May 19, 2025
8443391
remove unnecessary cuda
Steboss May 19, 2025
d93ca0e
Merge branch 'main' into sbosisio/fix_axlearn_tests
Steboss May 20, 2025
dff5456
reset CI to standard
Steboss May 20, 2025
b53408f
Merge branch 'sbosisio/fix_axlearn_tests' of github.com:NVIDIA/JAX-To…
Steboss May 20, 2025
c6f8342
test on tests
Steboss May 20, 2025
1388e6b
Merge branch 'main' into sbosisio/fix_axlearn_tests
Steboss May 20, 2025
be90585
fix test and run 8_devices
Steboss May 20, 2025
3171727
Merge branch 'sbosisio/fix_axlearn_tests' of github.com:NVIDIA/JAX-To…
Steboss May 20, 2025
f815fa3
install missing packages
Steboss May 20, 2025
dd22aaf
reset ci axlearn
Steboss May 20, 2025
25d1c77
fix @olupton comments
Steboss May 21, 2025
8a20be6
fix @olupton comments
Steboss May 21, 2025
66e374d
reset ci
Steboss May 21, 2025
7ff0d6f
Fix whitespace
Steboss May 21, 2025
5398b7c
Merge branch 'main' into sbosisio/fix_axlearn_tests
Steboss May 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/container/Dockerfile.axlearn
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ seqio==0.0.18
protobuf==3.20.3
pytest>=7.4.3
tensorflow==2.18.1
pytest-xdist
pytest-reportlog
REQUIREMENTS
EOF

Expand Down
52 changes: 32 additions & 20 deletions .github/container/test-axlearn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ cd "$DIR" || exit 1
echo "Running tests..."

pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip install timm transformers scikit-learn
pip install timm transformers scikit-learn


if [ "${#TEST_FILES[@]}" -eq 0 ]; then
Expand All @@ -117,30 +117,42 @@ if [ "${#expanded_test_files[@]}" -eq 0 ]; then
exit 1
fi

# in case we have the exclusion list file
EXCLUDE_LIST_FILE="$DIR/exclusion_list.txt"
EXCLUDE_PATTERNS=()

if [ -f "$EXCLUDE_LIST_FILE" ]; then
echo "Reading exclusion list from '$EXCLUDE_LIST_FILE'"
mapfile -t EXCLUDE_PATTERNS < "$EXCLUDE_LIST_FILE"
else
echo "Exclusion list file not found at '$EXCLUDE_LIST_FILE'"
fi
# exclude all those tests that are not necessary because of time or error
EXCLUDE_PATTERNS=(
"/opt/axlearn/axlearn/common/array_serialization_test.py"
"/opt/axlearn/axlearn/common/loss_test.py"
"/opt/axlearn/axlearn/common/mixture_of_experts_test.py"
"/opt/axlearn/axlearn/common/t5_test.py"
"/opt/axlearn/axlearn/common/param_converter_test.py"
"/opt/axlearn/axlearn/common/ssm_test.py"
"/opt/axlearn/axlearn/common/adapter_torch_test.py"
"/opt/axlearn/axlearn/common/state_builder_test.py"
"/opt/axlearn/axlearn/common/convolution_test.py"
"/opt/axlearn/axlearn/common/encoder_decoder_test.py"
"/opt/axlearn/axlearn/common/decoder_test.py"
"/opt/axlearn/axlearn/common/attention_test.py"
"/opt/axlearn/axlearn/common/deberta_test.py"
"/opt/axlearn/axlearn/common/trainer_test.py"
"/opt/axlearn/axlearn/common/vision_transformer_test.py"
"/opt/axlearn/axlearn/common/input_reading_comprehension_test.py"
"/opt/axlearn/axlearn/common/input_t5_test.py"
"/opt/axlearn/axlearn/common/distilbert_test.py"
"/opt/axlearn/axlearn/common/summary_writer_test.py"
)

final_test_files=()

for test_file in "${expanded_test_files[@]}"; do
exclude=false
for pattern in "${EXCLUDE_PATTERNS[@]}"; do
for test_file in "${expanded_test_files[@]}"; do
exclude=false
for pattern in "${EXCLUDE_PATTERNS[@]}"; do
if [[ "$(basename "$test_file")" == "$(basename "$pattern")" ]]; then
exclude=true
break
fi
done
if [ "$exclude" = false ]; then
exclude=true
break
fi
done
if [ "$exclude" = false ]; then
final_test_files+=("$test_file")
fi
fi
done

# Initialize counters for test
Expand Down