Skip to content

update

update #2

Workflow file for this run

name: Linux E2E Test
on:
workflow_call:
inputs:
runner:
required: true
type: string
default: 'pvc_rolling'
description: Runner label
test_type:
type: string
default: 'build-from-source'
description: Build from source or install nightly wheel
pytorch:
type: string
default: 'main'
description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
oneapi:
type: string
default: 'installed'
description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
python:
type: string
default: '3.10'
description: Python version
suite:
type: string
default: 'huggingface'
description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
dt:
type: string
default: 'float32'
description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma
mode:
type: string
default: 'inference'
description: Test mode. `inference,training`. Delimiter is comma
scenario:
type: string
default: 'accuracy'
description: Test scenario. `accuracy,performance`. Delimiter is comma
model:
required: false
type: string
default: ''
description: Model. Will only run this one mode if set
permissions: read-all
jobs:
get_runner:
runs-on: ${{ inputs.runner }}
outputs:
test_host: ${{ steps.runner-info.outputs.test_host }}
test_user: ${{ steps.runner-info.outputs.test_user }}
test_group: ${{ steps.runner-info.outputs.test_group }}
steps:
- name: Get runner
id: runner-info
uses: .github/actions/get-runner
e2e_test:
runs-on: ${{ needs.get_runner.outputs.test_host }}
needs: get_runner
timeout-minutes: 3600
container:
image: mengfeili/intel-pvc-driver:1146-1136
volumes:
- ${{ github.workspace }}:${{ github.workspace }}
options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
env:
AGENT_TOOLSDIRECTORY: /tmp/_tools
GH_TOKEN: ${{ github.token }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
MODEL_ONLY_NAME: ${{ inputs.model }}
defaults:
run:
shell: bash -xe {0}
steps:
- name: Setup Test Env
uses: .github/actions/setup-testenv
with:
test_type: ${{ inputs.test_type }}
pytorch: ${{ inputs.pytorch }}
torch_xpu_ops: skipped
oneapi: ${{ inputs.oneapi }}
python: ${{ inputs.python }}
# CICD launch
- name: Nightly Huggingface BF16 & FP16 Training Test
if: ${{ contains(inputs.test_type, 'cicd') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: huggingface
dt: bfloat16,float16
mode: training
scenario: accuracy,performance
- name: Nightly Torchbench BF16 Training Test
if: ${{ contains(inputs.test_type, 'cicd') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: torchbench
dt: bfloat16
mode: training
scenario: accuracy,performance
- name: Nightly Timm_models BF16 Training Test
if: ${{ contains(inputs.test_type, 'cicd') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: timm_models
dt: bfloat16
mode: training
scenario: accuracy,performance
# Nihglty launch
- name: Nightly Huggingface Full Test
if: ${{ contains(inputs.test_type, 'nightly') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: huggingface
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
- name: Nightly Torchbench BF16 Training Test
if: ${{ contains(inputs.test_type, 'nightly') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: torchbench
dt: bfloat16
mode: training
scenario: accuracy,performance
- name: Nightly Timm_models FP16 Training Test
if: ${{ contains(inputs.test_type, 'nightly') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: timm_models
dt: float16
mode: training
scenario: accuracy,performance
- name: Nightly PT2E Full Test
if: ${{ contains(inputs.test_type, 'nightly') }}
uses: .github/actions/pt2e
with:
env_prepare: true
dt: float32,int8
scenario: accuracy,performance
# Weekly launch
- name: Nightly Huggingface Full Test
if: ${{ contains(inputs.test_type, 'weekly') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: huggingface
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
- name: Nightly Torchbench BF16 Training Test
if: ${{ contains(inputs.test_type, 'weekly') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: torchbench
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
- name: Nightly Timm_models FP16 Training Test
if: ${{ contains(inputs.test_type, 'weekly') }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: timm_models
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
- name: Nightly PT2E Full Test
if: ${{ contains(inputs.test_type, 'weekly') }}
uses: .github/actions/pt2e
with:
env_prepare: true
dt: float32,int8
scenario: accuracy,performance
# On-demand launch
- name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
if: ${{ contains(inputs.test_type, 'ondemand') && inputs.suite != 'pt2e' }}
uses: .github/actions/inductor-xpu-e2e-test
with:
env_prepare: true
suite: ${{ inputs.suite }}
dt: ${{ inputs.dt }}
mode: ${{ inputs.mode }}
scenario: ${{ inputs.scenario }}
- name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
if: ${{ contains(inputs.test_type, 'ondemand') && contains(inputs.suite, 'pt2e') }}
uses: .github/actions/pt2e
with:
env_prepare: true
dt: ${{ inputs.dt }}
scenario: ${{ inputs.scenario }}
- name: Get archieve files
if: ${{ ! cancelled() }}
run: |
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/pytorch/inductor_log ${{ github.workspace }}/upload_files
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files
summary:
runs-on: [self-hosted, Linux, X64]
if: ${{ always() }}
needs: e2e_test
permissions:
issues: write
container:
image: ubuntu:latest
env:
AGENT_TOOLSDIRECTORY: /tmp/_tools
GH_TOKEN: ${{ github.token }}
REFERENCE_ISSUE_ID: 1645
defaults:
run:
shell: bash -xe {0}
steps:
- name: Install gh
run: |
apt-get update
apt-get install gh rsync ca-certificates -y
- name: Setup python-${{ inputs.python }}
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python }}
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Download Target Artifact
run: |
mkdir target/
cd target/
target_dir="Inductor-${{ inputs.test_type }}-LTS2-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}"
gh --repo ${GITHUB_REPOSITORY} run download ${GITHUB_RUN_ID} -n "${target_dir}"
- name: Download Baseline Artifact
run: |
mkdir baseline/
artifact_type="$(echo ${{ inputs.test_type }} |sed 's/ondemand/weekly/;s/cicd/weekly/')"
gh --repo intel/torch-xpu-ops issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee body.txt
REFERENCE_RUN_ID="$(cat body.txt |grep "Inductor-${artifact_type}-LTS2" |sed 's/.*: *//' || echo '')"
if [ "${REFERENCE_RUN_ID}" != "" ];then
gh --repo intel/torch-xpu-ops run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
baseline_dir="$(find . -name 'Inductor-*-XPU-E2E-*' -type d)"
if [ -d "${baseline_dir}" ];then
rsync -avzq --delete ${baseline_dir}/ baseline/
ls -al baseline/
rm -rf ${baseline_dir}/
fi
fi
- name: Get summary
if: ${{ ! cancelled() }}
run: |
pip install pandas requests
if [ "${{ inputs.suite }}" != 'pt2e' ];then
bash .github/scripts/e2e_summary.sh ./target ./baseline >> ${GITHUB_STEP_SUMMARY}
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
fi
pt2e_summary_csv="$(find ./target/ -name "summary.csv")"
if [ -f "${pt2e_summary_csv}" ];then
cat ${pt2e_summary_csv}
failed_num=$(grep ',failed' ${pt2e_summary_csv} |wc -l)
if [ ${failed_num} -ne 0 ];then
echo "[Warning] PT2E has failures!"
fi
fi
- name: Upload Reference Run ID
if: ${{ ! (contains(inputs.test_type, 'ondemand') && contains(inputs.test_type, 'cicd')) && github.repository_owner == 'intel' }}
run: |
gh --repo ${GITHUB_REPOSITORY} issue view ${REFERENCE_ISSUE_ID} --json body -q .body 2>&1 |tee new_body.txt 2>&1
has_or_not="$(grep 'Inductor-${{ inputs.test_type }}-LTS2' new_body.txt |wc -l)"
if [ ${has_or_not} -ne 0 ];then
sed -i "s/Inductor-${{ inputs.test_type }}-LTS2:.*/Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}/" new_body.txt
else
echo "Inductor-${{ inputs.test_type }}-LTS2: ${GITHUB_RUN_ID}" |tee -a new_body.txt
fi
gh --repo ${GITHUB_REPOSITORY} issue edit ${REFERENCE_ISSUE_ID} --body-file new_body.txt