[router][quick fix] Add minimal option for reasoning effort in spec (… #3150
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test (SMG) | |
| on: | |
| push: | |
| branches: [ main ] | |
| paths: | |
| - "sgl-router/**" | |
| pull_request: | |
| branches: [ main ] | |
| paths: | |
| - "sgl-router/**" | |
| types: [synchronize, labeled] | |
| workflow_dispatch: | |
| concurrency: | |
| group: router-tests-${{ github.ref }} | |
| cancel-in-progress: true | |
| env: | |
| RUSTC_WRAPPER: sccache | |
| SCCACHE_GHA_ENABLED: "true" | |
| jobs: | |
| maturin-build-test: | |
| if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| path: sglang-repo | |
| - name: Move sgl-router folder to root | |
| run: | | |
| mv sglang-repo/sgl-router/* . | |
| rm -rf sglang-repo | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.13" | |
| - name: Install protoc and dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y wget unzip gcc g++ perl make | |
| cd /tmp | |
| wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip | |
| sudo unzip protoc-32.0-linux-x86_64.zip -d /usr/local | |
| rm protoc-32.0-linux-x86_64.zip | |
| protoc --version | |
| - name: Configure sccache | |
| uses: mozilla-actions/[email protected] | |
| with: | |
| version: "v0.10.0" | |
| - name: Test maturin build | |
| uses: PyO3/maturin-action@v1 | |
| with: | |
| args: --release --out dist --features vendored-openssl | |
| rust-toolchain: stable | |
| sccache: true | |
| - name: List built wheel | |
| run: ls -lh dist/ | |
| - name: Test wheel install | |
| run: | | |
| pip install dist/*.whl | |
| python -c "import sglang_router; print('Python package: OK')" | |
| python -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')" | |
| python -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK" | |
| router-unit-tests: | |
| if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/ci_install_rust.sh | |
| - name: Configure sccache | |
| uses: mozilla-actions/[email protected] | |
| with: | |
| version: "v0.10.0" | |
| - name: Rust cache | |
| uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: sgl-router | |
| cache-all-crates: true | |
| cache-on-failure: true | |
| - name: Run lint | |
| run: | | |
| source "$HOME/.cargo/env" | |
| cd sgl-router/ | |
| rustup component add clippy | |
| cargo clippy --all-targets --all-features -- -D warnings | |
| - name: Run fmt | |
| run: | | |
| source "$HOME/.cargo/env" | |
| cd sgl-router/ | |
| rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt | |
| rustup toolchain install nightly --profile minimal | |
| cargo +nightly fmt -- --check | |
| - name: Run Rust tests | |
| timeout-minutes: 20 | |
| run: | | |
| source "$HOME/.cargo/env" | |
| cd sgl-router/ | |
| cargo test | |
| - name: Check benchmark compilation | |
| run: | | |
| source "$HOME/.cargo/env" | |
| cd sgl-router/ | |
| cargo check --benches | |
| - name: Quick benchmark sanity check | |
| timeout-minutes: 15 | |
| run: | | |
| source "$HOME/.cargo/env" | |
| cd sgl-router/ | |
| # Run quick benchmarks to ensure they work using Python script | |
| python3 scripts/run_benchmarks.py --quick | |
| - name: Show sccache stats | |
| if: always() | |
| run: sccache --show-stats | |
| router-http-tests: | |
| if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: 4-gpu-a10 | |
| timeout-minutes: 32 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Install rust dependencies | |
| run: | | |
| bash scripts/ci/ci_install_rust.sh | |
| - name: Configure sccache | |
| uses: mozilla-actions/[email protected] | |
| with: | |
| version: "v0.10.0" | |
| - name: Rust cache | |
| uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: sgl-router | |
| cache-all-crates: true | |
| cache-on-failure: true | |
| - name: Cache router build | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| sgl-router/target | |
| sgl-router/.venv | |
| key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }} | |
| restore-keys: | | |
| ${{ runner.os }}-router- | |
| - name: Install SGLang dependencies | |
| run: | | |
| sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh | |
| - name: Build python binding | |
| run: | | |
| source "$HOME/.cargo/env" | |
| export RUSTC_WRAPPER=sccache | |
| cd sgl-router | |
| python3 -m venv .venv | |
| python3 -m pip install --upgrade pip maturin | |
| maturin develop --release --profile ci --features vendored-openssl | |
| - name: Run Python unit tests | |
| run: | | |
| cd sgl-router | |
| source "$HOME/.cargo/env" | |
| python3 -m pip install pytest pytest-cov pytest-xdist | |
| pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80 | |
| - name: Run Python integration tests | |
| run: | | |
| cd sgl-router | |
| source "$HOME/.cargo/env" | |
| # Integration tests use FastAPI/uvicorn for mock workers | |
| python3 -m pip install fastapi uvicorn orjson | |
| pytest -q py_test/integration_mock | |
| - name: Run Python E2E tests | |
| run: | | |
| bash scripts/killall_sglang.sh "nuk_gpus" | |
| cd sgl-router | |
| source "$HOME/.cargo/env" | |
| python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker | |
| python3 -m pip --no-cache-dir install --upgrade genai-bench==0.0.2 | |
| pytest py_test/e2e_http -s -vv -o log_cli=true --log-cli-level=INFO | |
| - name: Upload benchmark results | |
| if: success() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: genai-bench-results-all-policies | |
| path: sgl-router/benchmark_**/ | |
| router-grpc-response-api-tests: | |
| if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: 4-gpu-a10 | |
| timeout-minutes: 32 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Install rust dependencies | |
| run: | | |
| bash scripts/ci/ci_install_rust.sh | |
| - name: Configure sccache | |
| uses: mozilla-actions/[email protected] | |
| with: | |
| version: "v0.10.0" | |
| - name: Rust cache | |
| uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: sgl-router | |
| cache-all-crates: true | |
| cache-on-failure: true | |
| - name: Cache router build | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| sgl-router/target | |
| sgl-router/.venv | |
| key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }} | |
| restore-keys: | | |
| ${{ runner.os }}-router- | |
| - name: Install SGLang dependencies | |
| run: | | |
| sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh | |
| - name: Setup Oracle Instant Client | |
| run: | | |
| sudo apt-get install -y unzip | |
| INSTANT_CLIENT_DIR="/home/ubuntu/instant-client" | |
| INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip" | |
| if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then | |
| echo "Downloading Oracle Instant Client..." | |
| mkdir -p "$INSTANT_CLIENT_DIR" | |
| cd "$INSTANT_CLIENT_DIR" | |
| wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP | |
| unzip $INSTANT_CLIENT_ZIP | |
| rm $INSTANT_CLIENT_ZIP | |
| else | |
| echo "Oracle Instant Client already exists, skipping download" | |
| fi | |
| echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Start Oracle Database | |
| run: | | |
| docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim | |
| echo "Starting Oracle DB..." | |
| # Export Oracle connection environment variables | |
| echo "ATP_USER=system" >> $GITHUB_ENV | |
| echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV | |
| echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV | |
| - name: Build python binding | |
| run: | | |
| source "$HOME/.cargo/env" | |
| export RUSTC_WRAPPER=sccache | |
| cd sgl-router | |
| python3 -m venv .venv | |
| python3 -m pip install --upgrade pip maturin | |
| maturin develop --release --profile ci --features vendored-openssl | |
| - name: Run Python E2E response API tests | |
| run: | | |
| bash scripts/killall_sglang.sh "nuk_gpus" | |
| cd sgl-router | |
| source "$HOME/.cargo/env" | |
| SHOW_ROUTER_LOGS=1 pytest py_test/e2e_response_api -s -vv -o log_cli=true --log-cli-level=INFO | |
| - name: Run Python E2E gRPC tests | |
| run: | | |
| bash scripts/killall_sglang.sh "nuk_gpus" | |
| cd sgl-router | |
| source "$HOME/.cargo/env" | |
| SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest py_test/e2e_grpc -s -vv -o log_cli=true --log-cli-level=INFO | |
| - name: Cleanup Oracle Database | |
| if: always() | |
| run: | | |
| docker stop oracle-db || true | |
| docker rm oracle-db || true | |
| finish: | |
| needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests] | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Finish | |
| run: echo "This is an empty step to ensure that all jobs are completed." | |
| summarize-benchmarks: | |
| needs: router-http-tests | |
| runs-on: ubuntu-latest | |
| if: success() | |
| steps: | |
| - name: Install jq | |
| run: sudo apt-get update && sudo apt-get install -y jq bc | |
| - name: Download benchmark results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: genai-bench-results-all-policies | |
| - name: List downloaded contents | |
| run: | | |
| echo "Contents after download:" | |
| ls -la | |
| find . -name "benchmark_*" -type d | |
| echo "JSON files found:" | |
| find . -name "*.json" | head -10 | |
| - name: Create benchmark summary | |
| run: | | |
| echo "=== DEBUG: Creating benchmark summary ===" | |
| echo "Available benchmark directories:" | |
| find . -name "benchmark_*" -type d || true | |
| echo "==========================================" | |
| echo "## Router E2E Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "Results captured from E2E tests for two scenarios: regular router (2 workers, dp=2) and PD router (2 prefill + 2 decode)." >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "| Scenario | Status | TTFT (s) | E2E Latency (s) | Input Throughput (tok/s) | Output Throughput (tok/s) |" >> $GITHUB_STEP_SUMMARY | |
| echo "|----------|--------|----------|-----------------|--------------------------|---------------------------|" >> $GITHUB_STEP_SUMMARY | |
| scenarios=$'Regular (dp=2, round_robin)|benchmark_round_robin_regular\nPD (2 prefill + 2 decode, round_robin)|benchmark_round_robin_pd' | |
| echo "$scenarios" | sed 's/^\s*//' | while IFS='|' read -r label pattern; do | |
| [ -z "$label" ] && continue | |
| # Find the result folder (handle different extraction layouts) | |
| result_folder=$(find . -maxdepth 3 \( -name "$pattern" -o -path "*${pattern}*" \) -type d | head -1) | |
| if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then | |
| json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1) | |
| if [ -n "$json_file" ] && [ -f "$json_file" ]; then | |
| ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file") | |
| e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file") | |
| input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file") | |
| output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file") | |
| ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null || echo "$ttft_mean") | |
| e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null || echo "$e2e_latency_mean") | |
| input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null || echo "$input_throughput_mean") | |
| output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null || echo "$output_throughput_mean") | |
| echo "| ${label} | ✅ Success | $ttft_display | $e2e_display | $input_display | $output_display |" >> $GITHUB_STEP_SUMMARY | |
| # Optional GPU utilization table if monitor output exists | |
| gpu_json="$result_folder/gpu_utilization.json" | |
| if [ -f "$gpu_json" ]; then | |
| overall_mean=$(jq -r '.overall.mean // 0' "$gpu_json") | |
| printf "\n#### GPU Utilization — %s\n\n" "$label" >> $GITHUB_STEP_SUMMARY | |
| printf "Overall mean: %.2f%%\n\n" "$overall_mean" >> $GITHUB_STEP_SUMMARY | |
| echo "| GPU | Mean (%) | p5 | p10 | p25 | p50 | p75 | p90 | p95 |" >> $GITHUB_STEP_SUMMARY | |
| echo "|-----|----------|----|-----|-----|-----|-----|-----|-----|" >> $GITHUB_STEP_SUMMARY | |
| jq -r ' | |
| .per_gpu | |
| | to_entries[] | |
| | [ .key, | |
| (.value.mean // 0), | |
| (.value.p5 // 0), | |
| (.value.p10 // 0), | |
| (.value.p25 // 0), | |
| (.value.p50 // 0), | |
| (.value.p75 // 0), | |
| (.value.p90 // 0), | |
| (.value.p95 // 0) | |
| ] | |
| | @tsv' "$gpu_json" \ | |
| | while IFS=$'\t' read -r gpu m p5 p10 p25 p50 p75 p90 p95; do | |
| printf "| %s | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f |\n" "$gpu" "$m" "$p5" "$p10" "$p25" "$p50" "$p75" "$p90" "$p95" >> $GITHUB_STEP_SUMMARY | |
| done | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| fi | |
| fi | |
| done |