chore: bump sgl-kernel version to 0.3.17.post1 #38252
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test (AMD) | |
| on: | |
| push: | |
| branches: [ main ] | |
| paths: | |
| - "python/**" | |
| - "!python/sglang/multimodal_gen/**" | |
| - "scripts/ci/**" | |
| - "test/**" | |
| - "sgl-kernel/**" | |
| - ".github/workflows/pr-test-amd.yml" | |
| pull_request: | |
| branches: [ main ] | |
| paths: | |
| - "python/**" | |
| - "!python/sglang/multimodal_gen/**" | |
| - "scripts/ci/**" | |
| - "test/**" | |
| - "sgl-kernel/**" | |
| - ".github/workflows/pr-test-amd.yml" | |
| types: [synchronize, labeled] | |
| workflow_dispatch: | |
| concurrency: | |
| group: pr-test-amd-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| main_package: ${{ steps.filter.outputs.main_package }} | |
| sgl_kernel: ${{ steps.filter.outputs.sgl_kernel }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Fail if the PR does not have the 'run-ci' label | |
| if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| run: | | |
| echo "This pull request does not have the 'run-ci' label. Failing the workflow." | |
| exit 1 | |
| - name: Fail if the PR is a draft | |
| if: github.event_name == 'pull_request' && github.event.pull_request.draft == true | |
| run: | | |
| echo "This pull request is a draft. Failing the workflow." | |
| exit 1 | |
| - name: Detect file changes | |
| id: filter | |
| uses: dorny/paths-filter@v3 | |
| with: | |
| filters: | | |
| main_package: | |
| - "python/**" | |
| - "scripts/ci/**" | |
| - "test/**" | |
| - ".github/workflows/pr-test-amd.yml" | |
| sgl_kernel: | |
| - "sgl-kernel/**" | |
| # =============================================== sgl-kernel ==================================================== | |
| sgl-kernel-unit-test-amd: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 14 | |
| run: | | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py | |
| # =============================================== primary ==================================================== | |
| unit-test-frontend-amd: | |
| needs: [check-changes] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 10 | |
| run: | | |
| docker exec -w /sglang-checkout/test/lang ci_sglang python3 run_suite.py --suite per-commit | |
| unit-test-backend-1-gpu-amd: | |
| needs: [check-changes, unit-test-frontend-amd] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-1] | |
| part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12 | |
| unit-test-backend-2-gpu-amd: | |
| needs: [check-changes] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-2] | |
| part: [0, 1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
| unit-test-backend-8-gpu-amd: | |
| needs: [check-changes, unit-test-backend-2-gpu-amd] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| env: | |
| RUNNER_LABELS: linux-mi300-gpu-8 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-8] | |
| part: [0, 1, 2] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 60 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600 | |
| performance-test-1-gpu-part-1-amd: | |
| needs: [check-changes] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Benchmark single latency | |
| timeout-minutes: 20 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default | |
| - name: Benchmark online latency | |
| timeout-minutes: 15 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default | |
| - name: Benchmark offline throughput | |
| timeout-minutes: 15 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default | |
| - name: Benchmark offline throughput (Non-streaming, small batch size) | |
| timeout-minutes: 15 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size | |
| performance-test-1-gpu-part-2-amd: | |
| needs: [check-changes] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Benchmark offline throughput (w/o RadixAttention) | |
| timeout-minutes: 15 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache | |
| - name: Benchmark offline throughput (w/ Triton) | |
| timeout-minutes: 15 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend | |
| - name: Benchmark offline throughput (w/ FP8) | |
| timeout-minutes: 15 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8 | |
| performance-test-2-gpu-amd: | |
| needs: [check-changes, unit-test-backend-2-gpu-amd] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-2] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Benchmark dummy grok (TP=2) | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 models/test_dummy_grok_models.py | |
| - name: Benchmark single latency (TP=2) | |
| timeout-minutes: 25 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 | |
| - name: Benchmark single latency + torch.compile (TP=2) | |
| timeout-minutes: 25 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1 | |
| - name: Benchmark offline throughput (TP=2) | |
| timeout-minutes: 25 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default | |
| - name: Benchmark offline throughput (w/o RadixAttention) (TP=2) | |
| timeout-minutes: 25 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache | |
| accuracy-test-1-gpu-amd: | |
| needs: [check-changes] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Evaluate Accuracy | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py | |
| bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py | |
| bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py | |
| accuracy-test-2-gpu-amd: | |
| needs: [check-changes, accuracy-test-1-gpu-amd] | |
| if: always() && !failure() && !cancelled() && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi300-gpu-2] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd_ci_install_dependency.sh | |
| - name: Evaluate accuracy (TP=2) | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py | |
| pr-test-amd-finish: | |
| needs: | |
| [ | |
| check-changes, | |
| sgl-kernel-unit-test-amd, | |
| unit-test-frontend-amd, | |
| unit-test-backend-1-gpu-amd, | |
| unit-test-backend-2-gpu-amd, | |
| unit-test-backend-8-gpu-amd, | |
| performance-test-1-gpu-part-1-amd, | |
| performance-test-1-gpu-part-2-amd, | |
| performance-test-2-gpu-amd, | |
| accuracy-test-1-gpu-amd, | |
| accuracy-test-2-gpu-amd, | |
| ] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check all dependent job statuses | |
| run: | | |
| # Convert the 'needs' context to a JSON string | |
| json_needs='${{ toJson(needs) }}' | |
| # Get a list of all job names from the JSON keys | |
| job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]') | |
| for job in $job_names; do | |
| # For each job, extract its result | |
| result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result') | |
| # Print the job name and its result | |
| echo "$job: $result" | |
| # Check for failure or cancellation and exit if found | |
| if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then | |
| echo "The above jobs failed." | |
| exit 1 | |
| fi | |
| done | |
| # If the loop completes, all jobs were successful | |
| echo "All jobs completed successfully" | |
| exit 0 |