diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py index c9afb73eb..ff78bffe1 100644 --- a/.github/scripts/check-ut.py +++ b/.github/scripts/check-ut.py @@ -186,14 +186,14 @@ def parse_log_file(log_file): return summary def determine_category(ut): - if ut == 'op_regression': - return 'op_regression' - elif ut == 'op_regression_dev1': - return 'op_regression_dev1' - elif ut == 'op_extended': - return 'op_extended' - elif 'op_ut' in ut: - return 'op_ut' + if ut == 'ut_regression': + return 'ut_regression' + elif ut == 'xpu_dev1': + return 'xpu_dev1' + elif ut == 'ut_extended': + return 'ut_extended' + elif 'ut_op' in ut: + return 'ut_op' else: return 'unknown' diff --git a/.github/scripts/ut_result_check.sh b/.github/scripts/ut_result_check.sh index cf1c4a1df..dd399471d 100644 --- a/.github/scripts/ut_result_check.sh +++ b/.github/scripts/ut_result_check.sh @@ -1,5 +1,5 @@ #!/bin/bash -ut_suite="${1:-op_regression}" # op_regression / op_extended / op_ut / torch_xpu +ut_suite="${1:-ut_regression}" # ut_regression / ut_extended / ut_op / ut_torch # usage # compare_and_filter_logs [output.log] @@ -101,7 +101,7 @@ check_passed_known_issues() { fi } -if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_regression_dev1' || "${ut_suite}" == 'op_extended' || "${ut_suite}" == 'op_transformers' ]]; then +if [[ "${ut_suite}" == 'ut_regression' || "${ut_suite}" == 'xpu_dev1' || "${ut_suite}" == 'ut_extended' || "${ut_suite}" == 'ut_transformers' ]]; then grep -E "FAILED" "${ut_suite}"_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_failed.log grep -E "have failures" "${ut_suite}"_test.log | awk '{print $1}' >> ./"${ut_suite}"_failed.log grep -E "Timeout" "${ut_suite}"_test.log | grep "test" >> ./"${ut_suite}"_failed.log @@ -131,7 +131,7 @@ if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_regression_dev1' echo -e "[PASS] UT ${ut_suite} test Pass" fi fi -if [[ "${ut_suite}" == 'op_ut' ]]; then +if [[ "${ut_suite}" == 'ut_op' ]]; then grep -E "FAILED" op_ut_with_skip_test.log | awk '{print $1}' | grep -v "FAILED" > ./"${ut_suite}"_with_skip_test_failed.log grep -E "have failures" op_ut_with_skip_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_skip_test_failed.log grep -E "Timeout" op_ut_with_skip_test.log | grep "test" >> ./"${ut_suite}"_with_skip_test_failed.log @@ -175,8 +175,8 @@ if [[ "${ut_suite}" == 'op_ut' ]]; then num_failed_with_only=$(wc -l < "./${ut_suite}_with_only_test_failed.log") fi ((num_failed=num_failed_with_skip+num_failed_with_only)) - grep "PASSED" op_ut_with_skip_test.log | awk '{print $1}' > ./"${ut_suite}"_with_skip_test_passed.log - grep "PASSED" op_ut_with_only_test.log | awk '{print $1}' > ./"${ut_suite}"_with_only_test_passed.log + grep "PASSED" ut_op_with_skip_test.log | awk '{print $1}' > ./"${ut_suite}"_with_skip_test_passed.log + grep "PASSED" ut_op_with_only_test.log | awk '{print $1}' > ./"${ut_suite}"_with_only_test_passed.log num_passed_with_skip=$(wc -l < "./${ut_suite}_with_skip_test_passed.log") num_passed_with_only=$(wc -l < "./${ut_suite}_with_only_test_passed.log") ((num_passed=num_passed_with_skip+num_passed_with_only)) @@ -187,13 +187,13 @@ if [[ "${ut_suite}" == 'op_ut' ]]; then echo -e "[PASS] UT ${ut_suite} test Pass" fi fi -if [[ "${ut_suite}" == 'torch_xpu' ]]; then +if [[ "${ut_suite}" == 'ut_torch' ]]; then echo "Pytorch XPU binary UT checking" cd ../../pytorch || exit for xpu_case in build/bin/*{xpu,sycl}*; do if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then case_name=$(basename "$xpu_case") - cd ../ut_log/torch_xpu || exit + cd ../ut_log/ut_torch || exit grep -E "FAILED|have failures" binary_ut_"${ut_suite}"_"${case_name}"_test.log | awk '{print $2}' > ./binary_ut_"${ut_suite}"_"${case_name}"_failed.log wc -l < "./binary_ut_${ut_suite}_${case_name}_failed.log" | tee -a ./binary_ut_"${ut_suite}"_failed_summary.log grep -E "PASSED|Pass" binary_ut_"${ut_suite}"_"${case_name}"_test.log | awk '{print $2}' > ./binary_ut_"${ut_suite}"_"${case_name}"_passed.log @@ -204,7 +204,7 @@ if [[ "${ut_suite}" == 'torch_xpu' ]]; then echo -e "=========================================================================" echo -e "Show Failed cases in ${ut_suite}" echo -e "=========================================================================" - cd ../ut_log/torch_xpu || exit + cd ../ut_log/ut_torch || exit cat "./binary_ut_${ut_suite}_${case_name}_failed.log" num_failed_binary_ut=$(awk '{sum += $1};END {print sum}' binary_ut_"${ut_suite}"_failed_summary.log) num_passed_binary_ut=$(awk '{sum += $1};END {print sum}' binary_ut_"${ut_suite}"_passed_summary.log) diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml index ad9d61b37..1eee38fef 100644 --- a/.github/workflows/_linux_op_benchmark.yml +++ b/.github/workflows/_linux_op_benchmark.yml @@ -60,6 +60,9 @@ jobs: env: GH_TOKEN: ${{ github.token }} steps: + - name: Cleanup workspace + run: | + find ./ |grep -v "^\./$" |xargs rm -rf - name: Checkout torch-xpu-ops uses: actions/checkout@v4 - name: Prepare test env on ${{ needs.runner.outputs.hostname }} diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 5a8f944dc..20732a3a4 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -69,7 +69,12 @@ jobs: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} HF_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} steps: + - name: Cleanup workspace + if: ${{ contains(inputs.ut, matrix.test) }} + run: | + find ./ |grep -v "^\./$" |xargs rm -rf - name: Checkout torch-xpu-ops + if: ${{ contains(inputs.ut, matrix.test) }} uses: actions/checkout@v4 - name: Prepare test env on ${{ needs.runner.outputs.hostname }} uses: ./.github/actions/linux-testenv @@ -168,18 +173,25 @@ jobs: if: ${{ ! cancelled() }} runs-on: ubuntu-24.04 timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + test: [ut_regression, ut_transformers, ut_extended, ut_op, ut_torch, ut_profiling, xpu_dev1, xpu_distributed] env: GH_TOKEN: ${{ github.token }} UT_SKIP_ISSUE: 1624 steps: - name: Checkout torch-xpu-ops + if: ${{ contains(inputs.ut, matrix.test) }} uses: actions/checkout@v4 - name: Download XPU UT Logs + if: ${{ contains(inputs.ut, matrix.test) }} uses: actions/download-artifact@v4 with: name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ inputs.ut }} path: ${{ github.workspace }}/ut_log - name: Check UT Results + if: ${{ contains(inputs.ut, matrix.test) }} shell: bash run: | repo="${{ github.repository }}" @@ -192,7 +204,7 @@ jobs: grep -Eo 'test[^[:space:]]+( \|\| [^[:space:]]+)?' | sed 's/ *|| */ /g' | sort -u > issues_temp.log awk '$2 == "op_ut" {print $1}' issues_temp.log > issues_op_ut.log cat issues_temp.log | awk '{print $1}' >> Known_issue.log - awk -F'::' '{print $1}' issues_op_ut.log | sort -u | paste -sd ',' >> Known_issue.log + awk -F'::' '{print $1}' issues_ut_op.log | sort -u | paste -sd ',' >> Known_issue.log cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./ bash ut_result_check.sh ${{ inputs.ut }} - name: Upload Inductor XPU UT Log diff --git a/.github/workflows/_windows_ut.yml b/.github/workflows/_windows_ut.yml index 9ca7f7eb8..3c211ccfc 100644 --- a/.github/workflows/_windows_ut.yml +++ b/.github/workflows/_windows_ut.yml @@ -17,7 +17,7 @@ on: required: true type: string default: '' - description: UT scope. `op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu` Delimiter is comma + description: UT scope. `ut_regression,xpu_dev1,ut_extended,ut_op,ut_torch` Delimiter is comma python: required: false type: string @@ -157,7 +157,7 @@ jobs: path: 'C:\actions-runner\_work\torch-xpu-ops\pytorch\dist' - name: Run XPU OP Extended UT - if: contains(inputs.ut, 'op_extended') || github.event_name == 'schedule' + if: contains(inputs.ut, 'ut_extended') || github.event_name == 'schedule' shell: cmd run: | call "C:\ProgramData\miniforge3\Scripts\activate.bat" @@ -169,7 +169,7 @@ jobs: python run_test_with_skip_mtl.py - name: Run Test XPU UT - if: contains(inputs.ut, 'torch_xpu') || github.event_name == 'schedule' + if: contains(inputs.ut, 'ut_torch') || github.event_name == 'schedule' shell: cmd run: | call "C:\ProgramData\miniforge3\Scripts\activate.bat" diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 4b972bf53..b6ff7c4c7 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -154,7 +154,7 @@ jobs: needs: [conditions-filter, preci-lint-check] uses: ./.github/workflows/_windows_ut.yml with: - ut: op_extended,torch_xpu + ut: ut_extended,ut_torch runner: Windows_CI src_changed: ${{ needs.conditions-filter.outputs.src_changed }} has_label: ${{ needs.conditions-filter.outputs.has_label }} diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py index e7b07efc8..f8b0a4c31 100644 --- a/test/xpu/extended/run_test_with_skip.py +++ b/test/xpu/extended/run_test_with_skip.py @@ -1,6 +1,7 @@ import os import sys +import torch from skip_list_common import skip_dict from skip_list_win import skip_dict as skip_dict_win @@ -16,8 +17,20 @@ skip_options += skip_option skip_options += '"' +# pytest options +xpu_num = torch.xpu.device_count() +parallel_options = ( + " --dist worksteal " + + " ".join([f"--tx popen//env:ZE_AFFINITY_MASK={x}" for x in range(xpu_num)]) + if xpu_num > 1 + else " -n 1 " +) +test_options = f" --timeout 600 --timeout_method=thread {parallel_options} " + os.environ["PYTORCH_TEST_WITH_SLOW"] = "1" -test_command = "pytest --junit-xml=./op_extended.xml test_ops_xpu.py" +test_command = ( + f" pytest {test_options} --junit-xml=./ut_extended.xml test_ops_xpu.py " +) test_command += skip_options res = os.system(test_command) sys.exit(res) diff --git a/test/xpu/run_test_with_only.py b/test/xpu/run_test_with_only.py index 0c3d11b50..fccbc1c16 100644 --- a/test/xpu/run_test_with_only.py +++ b/test/xpu/run_test_with_only.py @@ -1,12 +1,25 @@ import os import sys +import torch + # Cases in the file is too slow to run all suites on CPU. So add white list. def launch_test(test_case, skip_list=None, exe_list=None): os.environ["PYTORCH_ENABLE_XPU_FALLBACK"] = "1" os.environ["PYTORCH_TEST_WITH_SLOW"] = "1" + + # pytest options + xpu_num = torch.xpu.device_count() + parallel_options = ( + " --dist worksteal " + + " ".join([f"--tx popen//env:ZE_AFFINITY_MASK={x}" for x in range(xpu_num)]) + if xpu_num > 1 + else " -n 1 " + ) + test_options = f" --timeout 600 --timeout_method=thread {parallel_options} " + if skip_list is not None: skip_options = ' -k "not ' + skip_list[0] for skip_case in skip_list[1:]: @@ -14,7 +27,9 @@ def launch_test(test_case, skip_list=None, exe_list=None): skip_options += skip_option skip_options += '"' test_command = ( - "pytest --junit-xml=./op_ut_with_only.xml " + test_case + skip_options + f" pytest {test_options} --junit-xml=./ut_op_with_only.xml " + + test_case + + skip_options ) return os.system(test_command) elif exe_list is not None: @@ -24,11 +39,15 @@ def launch_test(test_case, skip_list=None, exe_list=None): exe_options += exe_option exe_options += '"' test_command = ( - "pytest --junit-xml=./op_ut_with_only.xml " + test_case + exe_options + f" pytest {test_options} --junit-xml=./ut_op_with_only.xml " + + test_case + + exe_options ) return os.system(test_command) else: - test_command = "pytest --junit-xml=./op_ut_with_only.xml " + test_case + test_command = ( + f" pytest {test_options} --junit-xml=./ut_op_with_only.xml " + test_case + ) return os.system(test_command) diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py index f0118b0ed..f7e629d87 100644 --- a/test/xpu/xpu_test_utils.py +++ b/test/xpu/xpu_test_utils.py @@ -1163,6 +1163,17 @@ def copy_tests( def launch_test(test_case, skip_list=None, exe_list=None): os.environ["PYTORCH_ENABLE_XPU_FALLBACK"] = "1" os.environ["PYTORCH_TEST_WITH_SLOW"] = "1" + + # pytest options + xpu_num = torch.xpu.device_count() + parallel_options = ( + " --dist worksteal " + + " ".join([f"--tx popen//env:ZE_AFFINITY_MASK={x}" for x in range(xpu_num)]) + if xpu_num > 1 + else " -n 1 " + ) + test_options = f" --timeout 600 --timeout_method=thread {parallel_options} " + if skip_list is not None: skip_options = ' -k "not ' + skip_list[0] for skip_case in skip_list[1:]: @@ -1170,7 +1181,8 @@ def launch_test(test_case, skip_list=None, exe_list=None): skip_options += skip_option skip_options += '"' test_command = ( - f"pytest --junit-xml=./op_ut_with_skip_{test_case}.xml " + test_case + f" pytest {test_options} --junit-xml=./ut_op_with_skip_{test_case}.xml " + + test_case ) test_command += skip_options elif exe_list is not None: @@ -1180,11 +1192,13 @@ def launch_test(test_case, skip_list=None, exe_list=None): exe_options += exe_option exe_options += '"' test_command = ( - f"pytest --junit-xml=./op_ut_with_skip_{test_case}.xml " + test_case + f" pytest {test_options} --junit-xml=./ut_op_with_skip_{test_case}.xml " + + test_case ) test_command += exe_options else: test_command = ( - f"pytest --junit-xml=./op_ut_with_skip_{test_case}.xml " + test_case + f" pytest {test_options} --junit-xml=./ut_op_with_skip_{test_case}.xml " + + test_case ) return os.system(test_command)