Implement nightly test workflow naming conventions (#13170)

alisonshao · web-flow · commit dcc47a56c9ec · 2025-11-14T16:14:53.000-08:00
diff --git a/.github/workflows/nightly-test-intel.yml b/.github/workflows/nightly-test-intel.yml
@@ -0,0 +1,26 @@
+name: Nightly Test (Intel)
+
+on:
+  schedule:
+    - cron: '0 0 * * *'
+  push:
+    branches:
+      - main
+    paths:
+      - "python/sglang/version.py"
+  workflow_dispatch:
+
+concurrency:
+  group: nightly-test-intel-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # Placeholder for Intel GPU tests
+  # Add Intel-specific nightly test workflows here when available
+
+  placeholder:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Placeholder
+        run: echo "Intel nightly tests will be added here"
diff --git a/.github/workflows/nightly-test-nvidia.yml b/.github/workflows/nightly-test-nvidia.yml
@@ -0,0 +1,292 @@
+name: Nightly Test (Nvidia)
+
+on:
+  schedule:
+    - cron: '0 0 * * *'
+  push:
+    branches:
+      - main
+    paths:
+      - "python/sglang/version.py"
+  workflow_dispatch:
+
+concurrency:
+  group: nightly-test-nvidia-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # General tests - 1 GPU
+  nightly-test-general-1-gpu-runner:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 1-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 60
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite nightly-1-gpu --continue-on-error
+
+  # General tests - 4 GPU H100
+  nightly-test-general-4-gpu-h100:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 4-gpu-h100
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 30
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite nightly-4-gpu --continue-on-error
+
+  # General tests - 8 GPU H200
+  nightly-test-general-8-gpu-h200:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 8-gpu-h200
+    env:
+      RUNNER_LABELS: 8-gpu-h200
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 30
+        env:
+          GPU_CONFIG: "8-gpu-h200"
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite nightly-8-gpu-h200 --continue-on-error
+
+  # General tests - 8 GPU H20
+  nightly-test-general-8-gpu-h20:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 8-gpu-h20
+    env:
+      SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 30
+        env:
+          GPU_CONFIG: "8-gpu-h20"
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite nightly-8-gpu-h20 --continue-on-error
+
+  # Text model accuracy tests
+  nightly-test-text-accuracy-2-gpu-runner:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 2-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run eval test for text models
+        timeout-minutes: 120
+        run: |
+          cd test/srt
+          python3 nightly/test_text_models_gsm8k_eval.py
+
+  # Text model performance tests
+  nightly-test-text-perf-2-gpu-runner:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 2-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run performance test for text models
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "2-gpu-runner"
+        run: |
+          cd test/srt
+          rm -rf performance_profiles_text_models/
+          python3 nightly/test_text_models_perf.py
+
+      - name: Publish traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/srt/performance_profiles_text_models
+
+  # VLM accuracy tests
+  nightly-test-vlm-accuracy-2-gpu-runner:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 2-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run eval test for VLM models (fixed MMMU-100)
+        timeout-minutes: 240
+        run: |
+          cd test/srt
+          python3 nightly/test_vlms_mmmu_eval.py
+
+  # VLM performance tests
+  nightly-test-vlm-perf-2-gpu-runner:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 2-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run perf test for VLM models (MMMU)
+        timeout-minutes: 240
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "2-gpu-runner"
+        run: |
+          cd test/srt
+          rm -rf performance_profiles_vlms/
+          python3 nightly/test_vlms_perf.py
+
+      - name: Publish traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/srt/performance_profiles_vlms
+
+  # B200 Performance tests - 4 GPU
+  nightly-test-perf-4-gpu-b200:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 4-gpu-b200
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 60
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite nightly-4-gpu-b200 --continue-on-error
+
+  # B200 Performance tests - 8 GPU
+  nightly-test-perf-8-gpu-b200:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 8-gpu-b200
+    env:
+      RUNNER_LABELS: 8-gpu-b200
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
+
+      - name: Run DeepSeek v3.1 nightly performance test
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "8-gpu-b200"
+        run: |
+          rm -rf test/srt/performance_profiles_deepseek_v31/
+          cd test/srt
+          IS_BLACKWELL=1 python3 nightly/test_deepseek_v31_perf.py
+
+      - name: Publish DeepSeek v3.1 traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/srt/performance_profiles_deepseek_v31
+
+      - name: Run DeepSeek v3.2 nightly performance test
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "8-gpu-b200"
+        run: |
+          rm -rf test/srt/performance_profiles_deepseek_v32/
+          cd test/srt
+          IS_BLACKWELL=1 python3 nightly/test_deepseek_v32_perf.py
+
+      - name: Publish DeepSeek v3.2 traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/srt/performance_profiles_deepseek_v32
+
+  # Final check job
+  check-all-jobs:
+    if: github.repository == 'sgl-project/sglang' && always()
+    needs:
+      - nightly-test-general-1-gpu-runner
+      - nightly-test-general-4-gpu-h100
+      - nightly-test-general-8-gpu-h200
+      - nightly-test-general-8-gpu-h20
+      - nightly-test-text-accuracy-2-gpu-runner
+      - nightly-test-text-perf-2-gpu-runner
+      - nightly-test-vlm-accuracy-2-gpu-runner
+      - nightly-test-vlm-perf-2-gpu-runner
+      - nightly-test-perf-4-gpu-b200
+      - nightly-test-perf-8-gpu-b200
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check if any job failed
+        run: |
+          if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
+            echo "One or more nightly test jobs failed"
+            exit 1
+          fi
+          if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
+            echo "One or more nightly test jobs were cancelled"
+            exit 1
+          fi
+          echo "All nightly test jobs passed"
diff --git a/python/sglang/bench_one_batch_server.py b/python/sglang/bench_one_batch_server.py
@@ -130,7 +130,17 @@ def generate_markdown_report(trace_dir, results: List["BenchmarkResult"]) -> str
     """Generate a markdown report from a list of BenchmarkResult object from a single run."""
     import os
 
-    summary = f"### {results[0].model_path}\n"
+    # Build model header with run_name if it's not "default"
+    model_header = results[0].model_path
+    if results[0].run_name and results[0].run_name != "default":
+        model_header += f" ({results[0].run_name})"
+
+    # Include GPU config in model header if available
+    gpu_config = os.getenv("GPU_CONFIG", "")
+    if gpu_config:
+        model_header += f" [{gpu_config}]"
+
+    summary = f"### {model_header}\n"
 
     # summary += (
     #     f"Input lens: {result.input_len}. Output lens: {result.output_len}.\n"
diff --git a/test/srt/nightly/nightly_utils.py b/test/srt/nightly/nightly_utils.py
@@ -29,18 +29,27 @@ def __init__(
         profile_dir: str,
         test_name: str,
         base_url: str,
+        gpu_config: str = None,
     ):
         """Initialize the benchmark runner.
 
         Args:
             profile_dir: Directory to store performance profiles
             test_name: Name of the test (used for reporting)
             base_url: Base URL for the server
+            gpu_config: Optional GPU configuration string (e.g., "2-gpu-h100", "8-gpu-b200")
         """
         self.profile_dir = profile_dir
         self.test_name = test_name
         self.base_url = base_url
-        self.full_report = f"## {test_name}\n" + BenchmarkResult.help_str()
+        self.gpu_config = gpu_config or os.environ.get("GPU_CONFIG", "")
+
+        # Include GPU config in report header if available
+        header = f"## {test_name}"
+        if self.gpu_config:
+            header += f" ({self.gpu_config})"
+        header += "\n"
+        self.full_report = header + BenchmarkResult.help_str()
 
     def setup_profile_directory(self) -> None:
         """Create the profile directory if it doesn't exist."""
@@ -241,14 +250,19 @@ def run_benchmark_for_model(
             )
 
             # Build and run benchmark command
+            # Prepare extra args with run_name if variant is specified
+            bench_args = list(extra_bench_args) if extra_bench_args else []
+            if variant:
+                bench_args.extend(["--run-name", variant])
+
             command = self.build_benchmark_command(
                 model_path,
                 batch_sizes,
                 input_lens,
                 output_lens,
                 profile_path_prefix,
                 json_output_file,
-                extra_args=extra_bench_args,
+                extra_args=bench_args,
             )
 
             result, cmd_success = self.run_benchmark_command(command, model_description)
diff --git a/test/srt/test_deepseek_v32_nsabackend.py b/test/srt/test_deepseek_v32_nsabackend.py