Skip to content

Nightly Test

Nightly Test #623

Workflow file for this run

name: Nightly Test
on:
schedule:
- cron: '0 0 * * *'
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
concurrency:
group: nightly-test-${{ github.ref }}
cancel-in-progress: true
jobs:
nightly-test-eval-text-models:
if: github.repository == 'sgl-project/sglang'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run eval test for text models
timeout-minutes: 120
run: |
cd test/srt
python3 nightly/test_text_models_gsm8k_eval.py
nightly-test-perf-text-models:
if: github.repository == 'sgl-project/sglang'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run performance test for text models
timeout-minutes: 180
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
run: |
cd test/srt
rm -rf performance_profiles_text_models/
python3 nightly/test_text_models_perf.py
- name: Publish traces to storage repo
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
run: |
python3 scripts/ci/publish_traces.py --traces-dir test/srt/performance_profiles_text_models
nightly-test-eval-vlms:
if: github.repository == 'sgl-project/sglang'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run eval test for VLM models (fixed MMMU-100)
timeout-minutes: 240
run: |
cd test/srt
python3 nightly/test_vlms_mmmu_eval.py
nightly-test-perf-vlms:
if: github.repository == 'sgl-project/sglang'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run perf test for VLM models (MMMU)
timeout-minutes: 240
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
run: |
cd test/srt
rm -rf performance_profiles_vlms/
python3 nightly/test_vlms_perf.py
- name: Publish traces to storage repo
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
run: |
python3 scripts/ci/publish_traces.py --traces-dir test/srt/performance_profiles_vlms
nightly-test-1-gpu:
if: github.repository == 'sgl-project/sglang'
runs-on: 1-gpu-runner
env:
RUNNER_LABELS: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: |
cd test/srt
python3 run_suite.py --suite nightly-1-gpu --continue-on-error
nightly-test-4-gpu:
if: github.repository == 'sgl-project/sglang'
runs-on: 4-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test/srt
python3 run_suite.py --suite nightly-4-gpu --continue-on-error
nightly-test-8-gpu-h200:
if: github.repository == 'sgl-project/sglang'
runs-on: 8-gpu-h200
env:
RUNNER_LABELS: 8-gpu-h200
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test/srt
python3 run_suite.py --suite nightly-8-gpu-h200 --continue-on-error
nightly-test-8-gpu-h20:
if: github.repository == 'sgl-project/sglang'
runs-on: 8-gpu-h20
env:
SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test/srt
python3 run_suite.py --suite nightly-8-gpu-h20 --continue-on-error
check-all-jobs:
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'workflow_dispatch')
needs:
- nightly-test-eval-text-models
- nightly-test-perf-text-models
- nightly-test-eval-vlms
- nightly-test-perf-vlms
- nightly-test-1-gpu
- nightly-test-4-gpu
- nightly-test-8-gpu-h200
- nightly-test-8-gpu-h20
runs-on: ubuntu-latest
steps:
- name: Check if any job failed
run: |
# Now that continue-on-error is removed, failures will be properly reported
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more nightly test jobs were cancelled"
exit 1
fi
echo "All nightly test jobs passed"