Support JetVLM #46241
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| types: [synchronize, labeled] | |
| workflow_dispatch: | |
| inputs: | |
| version: | |
| description: "FlashInfer version" | |
| required: true | |
| type: choice | |
| default: "release" | |
| options: | |
| - "release" | |
| - "nightly" | |
| concurrency: | |
| group: pr-test-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| # =============================================== check changes ==================================================== | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| main_package: ${{ steps.filter.outputs.main_package }} | |
| sgl_kernel: ${{ steps.filter.outputs.sgl_kernel }} | |
| multimodal_gen: ${{ steps.filter.outputs.multimodal_gen }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Fail if the PR does not have the 'run-ci' label | |
| if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| run: | | |
| echo "This pull request does not have the 'run-ci' label. Failing the workflow." | |
| exit 1 | |
| - name: Fail if the PR is a draft | |
| if: github.event_name == 'pull_request' && github.event.pull_request.draft == true | |
| run: | | |
| echo "This pull request is a draft. Failing the workflow." | |
| exit 1 | |
| - name: Enforce rate limit for low-permission actors | |
| if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' | |
| uses: actions/github-script@v7 | |
| with: | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| script: | | |
| const HOURS = 2; | |
| const owner = context.repo.owner; | |
| const repo = context.repo.repo; | |
| const eventName = context.eventName; | |
| const curRun = await github.rest.actions.getWorkflowRun({ | |
| owner, repo, run_id: context.runId | |
| }); | |
| const triggeringActor = curRun.data.triggering_actor?.login || context.actor; | |
| async function hasHighPermission(username) { | |
| try { | |
| const { data } = await github.rest.repos.getCollaboratorPermissionLevel({ owner, repo, username }); | |
| const perm = data.permission || 'none'; | |
| return perm === 'write' || perm === 'maintain' || perm === 'admin'; | |
| } catch (e) { | |
| if (e.status === 404 || e.status === 403) return false; | |
| throw e; | |
| } | |
| } | |
| if (await hasHighPermission(triggeringActor)) { | |
| core.info(`Triggering user '${triggeringActor}' has high permission. No rate limit applied.`); | |
| return; | |
| } | |
| const cutoff = new Date(Date.now() - HOURS * 60 * 60 * 1000); | |
| core.info(`Checking for workflow runs since ${cutoff.toISOString()} (last ${HOURS} hours) for event '${eventName}'.`); | |
| const { data } = await github.rest.actions.listWorkflowRuns({ | |
| owner, | |
| repo, | |
| workflow_id: 'pr-test.yml', | |
| event: eventName, | |
| per_page: 100, | |
| }); | |
| const runs = data.workflow_runs || []; | |
| const recentFound = runs.find((run) => { | |
| if (String(run.id) === String(context.runId)) return false; | |
| if (new Date(run.created_at) < cutoff) return false; | |
| return (run.actor?.login === triggeringActor) || (run.triggering_actor?.login === triggeringActor); | |
| }); | |
| if (recentFound) { | |
| core.setFailed( | |
| `User '${triggeringActor}' already triggered '${context.workflow}' via '${eventName}' at ${recentFound.created_at}. ` + | |
| `Please wait ${HOURS} hours before triggering again.` | |
| ); | |
| } else { | |
| core.info(`No recent runs detected within the last ${HOURS} hours; proceeding.`); | |
| } | |
| - name: Detect file changes | |
| id: filter | |
| uses: dorny/paths-filter@v3 | |
| with: | |
| filters: | | |
| main_package: | |
| - "python/sglang/!(multimodal_gen)/**" | |
| - "python/*.toml" | |
| - "scripts/ci/**" | |
| - "test/**" | |
| - ".github/workflows/pr-test.yml" | |
| sgl_kernel: | |
| - "sgl-kernel/**" | |
| multimodal_gen: | |
| - "python/sglang/multimodal_gen/**" | |
| # =============================================== sgl-kernel ==================================================== | |
| sgl-kernel-build-wheels: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: x64-kernel-build-node | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: "3.10" | |
| cuda-version: "12.9" | |
| # Add back when CUDA 13.0 is supported on CI | |
| # - python-version: "3.10" | |
| # cuda-version: "13.0" | |
| name: Build Wheel | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| sudo rm -rf $GITHUB_WORKSPACE/* || true | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} | |
| run: | | |
| cd sgl-kernel | |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} | |
| path: sgl-kernel/dist/* | |
| sgl-kernel-build-wheels-arm: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: arm-kernel-build-node | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: "3.10" | |
| cuda-version: "12.9" | |
| name: Build Wheel Arm | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| if [ -d "$GITHUB_WORKSPACE" ]; then | |
| sudo rm -rf "$GITHUB_WORKSPACE"/* || true | |
| else | |
| echo "$GITHUB_WORKSPACE does not exist, nothing to clean" | |
| fi | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} | |
| run: | | |
| cd sgl-kernel | |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64 | |
| path: sgl-kernel/dist/* | |
| sgl-kernel-unit-test: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd sgl-kernel | |
| pytest tests/ | |
| sgl-kernel-mla-test: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/srt | |
| python3 test_mla_deepseek_v3.py | |
| sgl-kernel-benchmark-test: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| env: | |
| CI: true | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run benchmark tests | |
| timeout-minutes: 45 | |
| run: | | |
| cd sgl-kernel/benchmark | |
| echo "Running sgl-kernel benchmark tests in CI mode..." | |
| echo "CI environment variable: $CI" | |
| echo "GITHUB_ACTIONS environment variable: $GITHUB_ACTIONS" | |
| for bench_file in bench_*.py; do | |
| echo "Testing $bench_file..." | |
| timeout 60 python3 "$bench_file" || echo "Warning: $bench_file timed out or failed, continuing..." | |
| echo "Completed $bench_file" | |
| echo "---" | |
| done | |
| echo "All benchmark tests completed!" | |
| # =============================================== multimodal_gen ==================================================== | |
| multimodal-gen-test: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.multimodal_gen == 'true' | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Run placeholder test | |
| run: | | |
| echo "Running multimodal_gen tests..." | |
| echo "This is a placeholder for future tests." | |
| # Adding a single CUDA13 smoke test to verify that the kernel builds and runs | |
| # TODO: Add back this test when it can pass on CI | |
| # cuda13-kernel-smoke-test: | |
| # needs: [check-changes, sgl-kernel-build-wheels] | |
| # if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| # runs-on: x64-cu13-kernel-tests | |
| # steps: | |
| # - uses: actions/checkout@v4 | |
| # - name: Cleanup | |
| # run: | | |
| # ls -alh sgl-kernel/dist || true | |
| # rm -rf sgl-kernel/dist/* || true | |
| # - name: Download CUDA 13.0 artifacts | |
| # uses: actions/download-artifact@v4 | |
| # with: | |
| # path: sgl-kernel/dist/ | |
| # merge-multiple: true | |
| # pattern: wheel-python3.10-cuda13.0 | |
| # - name: Install dependencies | |
| # run: | | |
| # CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| # - name: Run kernel unit tests | |
| # timeout-minutes: 30 | |
| # run: | | |
| # cd sgl-kernel | |
| # pytest tests/ | |
| # =============================================== primary ==================================================== | |
| unit-test-frontend: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/lang | |
| python3 run_suite.py --suite per-commit | |
| unit-test-backend-1-gpu: | |
| needs: [check-changes, unit-test-frontend, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 5 | |
| matrix: | |
| part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-1-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 15 | |
| unit-test-backend-2-gpu: | |
| needs: [check-changes, unit-test-backend-1-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 2-gpu-runner | |
| env: | |
| RUNNER_LABELS: 2-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-2-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
| unit-test-backend-4-gpu: | |
| needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 4-gpu-h100 | |
| env: | |
| RUNNER_LABELS: 4-gpu-h100 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-4-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
| unit-test-backend-8-gpu-h200: | |
| needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 8-gpu-h200 | |
| env: | |
| RUNNER_LABELS: 8-gpu-h200 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 | |
| unit-test-backend-8-gpu-h20: | |
| needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 8-gpu-h20 | |
| env: | |
| SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4" | |
| RUNNER_LABELS: 8-gpu-h20 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
| performance-test-1-gpu-part-1: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Benchmark single latency | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small | |
| python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default | |
| - name: Benchmark online latency | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default | |
| - name: Benchmark offline throughput | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default | |
| - name: Benchmark offline throughput (Non-streaming, small batch size) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size | |
| - name: Benchmark online latency (EAGLE) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle | |
| - name: Benchmark online latency (LoRA) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency_with_concurrent_adapter_updates | |
| performance-test-1-gpu-part-2: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Benchmark offline throughput (w/o RadixAttention) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache | |
| - name: Benchmark offline throughput (w/ Triton) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend | |
| - name: Benchmark offline throughput (w/ FP8) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8 | |
| - name: Benchmark VLM offline throughput | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_offline_throughput | |
| - name: Benchmark VLM online latency | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_online_latency | |
| performance-test-1-gpu-part-3: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Benchmark Scores online latency and throughput | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_score_api_latency_throughput | |
| - name: Benchmark Scores online latency and throughput (batch size scaling) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_score_api_batch_scaling | |
| performance-test-2-gpu: | |
| needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 2-gpu-runner | |
| env: | |
| RUNNER_LABELS: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| - name: Benchmark single latency (TP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 | |
| - name: Benchmark single latency + torch.compile (TP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1 | |
| - name: Benchmark offline throughput (TP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default | |
| - name: Benchmark offline throughput (w/o RadixAttention) (TP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache | |
| - name: Benchmark offline PP decode throughput (PP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_pp_offline_throughput_default_decode | |
| - name: Benchmark offline PP prefill throughput (PP=2) | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/srt | |
| python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill | |
| accuracy-test-1-gpu: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 1-gpu-runner | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| git clone https://github.com/merrymercy/human-eval.git | |
| cd human-eval | |
| pip install -e . | |
| - name: Evaluate accuracy | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| python3 test_eval_accuracy_large.py | |
| accuracy-test-2-gpu: | |
| needs: [check-changes, accuracy-test-1-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 2-gpu-runner | |
| env: | |
| RUNNER_LABELS: 2-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh | |
| git clone https://github.com/merrymercy/human-eval.git | |
| cd human-eval | |
| pip install -e . | |
| - name: Evaluate accuracy (TP=2) | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| python3 test_moe_eval_accuracy_large.py | |
| unit-test-deepep-4-gpu: | |
| needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 4-gpu-h100 | |
| env: | |
| RUNNER_LABELS: 4-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-4-gpu-deepep | |
| unit-test-deepep-8-gpu: | |
| needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 8-gpu-h200 | |
| env: | |
| RUNNER_LABELS: 8-gpu-h200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-8-gpu-h200-deepep | |
| unit-test-backend-4-gpu-b200: | |
| needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 4-gpu-b200 | |
| env: | |
| RUNNER_LABELS: 4-gpu-b200 | |
| strategy: | |
| fail-fast: false | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 45 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600 | |
| unit-test-backend-4-gpu-gb200: | |
| needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels-arm] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| runs-on: 4-gpu-gb200 | |
| env: | |
| RUNNER_LABELS: 4-gpu-gb200 | |
| strategy: | |
| fail-fast: false | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9-aarch64 | |
| - name: Install dependencies | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 GRACE_BLACKWELL=1 bash scripts/ci/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 45 | |
| run: | | |
| cd test/srt | |
| python3 run_suite.py --suite per-commit-4-gpu-gb200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600 | |
| pr-test-finish: | |
| needs: | |
| [ | |
| check-changes, | |
| sgl-kernel-build-wheels, | |
| sgl-kernel-unit-test, | |
| sgl-kernel-mla-test, | |
| sgl-kernel-benchmark-test, | |
| multimodal-gen-test, | |
| unit-test-frontend, | |
| unit-test-backend-1-gpu, | |
| unit-test-backend-2-gpu, | |
| unit-test-backend-4-gpu, | |
| unit-test-backend-8-gpu-h200, | |
| performance-test-1-gpu-part-1, | |
| performance-test-1-gpu-part-2, | |
| performance-test-1-gpu-part-3, | |
| performance-test-2-gpu, | |
| accuracy-test-1-gpu, | |
| accuracy-test-2-gpu, | |
| unit-test-deepep-4-gpu, | |
| unit-test-deepep-8-gpu, | |
| unit-test-backend-4-gpu-b200, | |
| unit-test-backend-4-gpu-gb200, | |
| ] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check all dependent job statuses | |
| run: | | |
| # Convert the 'needs' context to a JSON string | |
| json_needs='${{ toJson(needs) }}' | |
| # Get a list of all job names from the JSON keys | |
| job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]') | |
| for job in $job_names; do | |
| # For each job, extract its result | |
| result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result') | |
| # Print the job name and its result | |
| echo "$job: $result" | |
| # Check for failure or cancellation and exit if found | |
| if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then | |
| echo "The above jobs failed." | |
| exit 1 | |
| fi | |
| done | |
| # If the loop completes, all jobs were successful | |
| echo "All jobs completed successfully" | |
| exit 0 |