Skip to content

[router][quick fix] Add minimal option for reasoning effort in spec (… #3150

[router][quick fix] Add minimal option for reasoning effort in spec (…

[router][quick fix] Add minimal option for reasoning effort in spec (… #3150

Workflow file for this run

name: PR Test (SMG)
on:
push:
branches: [ main ]
paths:
- "sgl-router/**"
pull_request:
branches: [ main ]
paths:
- "sgl-router/**"
types: [synchronize, labeled]
workflow_dispatch:
concurrency:
group: router-tests-${{ github.ref }}
cancel-in-progress: true
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
jobs:
maturin-build-test:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move sgl-router folder to root
run: |
mv sglang-repo/sgl-router/* .
rm -rf sglang-repo
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Install protoc and dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget unzip gcc g++ perl make
cd /tmp
wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip
sudo unzip protoc-32.0-linux-x86_64.zip -d /usr/local
rm protoc-32.0-linux-x86_64.zip
protoc --version
- name: Configure sccache
uses: mozilla-actions/[email protected]
with:
version: "v0.10.0"
- name: Test maturin build
uses: PyO3/maturin-action@v1
with:
args: --release --out dist --features vendored-openssl
rust-toolchain: stable
sccache: true
- name: List built wheel
run: ls -lh dist/
- name: Test wheel install
run: |
pip install dist/*.whl
python -c "import sglang_router; print('Python package: OK')"
python -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')"
python -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"
router-unit-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_rust.sh
- name: Configure sccache
uses: mozilla-actions/[email protected]
with:
version: "v0.10.0"
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
cache-all-crates: true
cache-on-failure: true
- name: Run lint
run: |
source "$HOME/.cargo/env"
cd sgl-router/
rustup component add clippy
cargo clippy --all-targets --all-features -- -D warnings
- name: Run fmt
run: |
source "$HOME/.cargo/env"
cd sgl-router/
rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
rustup toolchain install nightly --profile minimal
cargo +nightly fmt -- --check
- name: Run Rust tests
timeout-minutes: 20
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cargo test
- name: Check benchmark compilation
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cargo check --benches
- name: Quick benchmark sanity check
timeout-minutes: 15
run: |
source "$HOME/.cargo/env"
cd sgl-router/
# Run quick benchmarks to ensure they work using Python script
python3 scripts/run_benchmarks.py --quick
- name: Show sccache stats
if: always()
run: sccache --show-stats
router-http-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: 4-gpu-a10
timeout-minutes: 32
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install rust dependencies
run: |
bash scripts/ci/ci_install_rust.sh
- name: Configure sccache
uses: mozilla-actions/[email protected]
with:
version: "v0.10.0"
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
cache-all-crates: true
cache-on-failure: true
- name: Cache router build
uses: actions/cache@v4
with:
path: |
sgl-router/target
sgl-router/.venv
key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-router-
- name: Install SGLang dependencies
run: |
sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh
- name: Build python binding
run: |
source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache
cd sgl-router
python3 -m venv .venv
python3 -m pip install --upgrade pip maturin
maturin develop --release --profile ci --features vendored-openssl
- name: Run Python unit tests
run: |
cd sgl-router
source "$HOME/.cargo/env"
python3 -m pip install pytest pytest-cov pytest-xdist
pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80
- name: Run Python integration tests
run: |
cd sgl-router
source "$HOME/.cargo/env"
# Integration tests use FastAPI/uvicorn for mock workers
python3 -m pip install fastapi uvicorn orjson
pytest -q py_test/integration_mock
- name: Run Python E2E tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
source "$HOME/.cargo/env"
python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker
python3 -m pip --no-cache-dir install --upgrade genai-bench==0.0.2
pytest py_test/e2e_http -s -vv -o log_cli=true --log-cli-level=INFO
- name: Upload benchmark results
if: success()
uses: actions/upload-artifact@v4
with:
name: genai-bench-results-all-policies
path: sgl-router/benchmark_**/
router-grpc-response-api-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: 4-gpu-a10
timeout-minutes: 32
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install rust dependencies
run: |
bash scripts/ci/ci_install_rust.sh
- name: Configure sccache
uses: mozilla-actions/[email protected]
with:
version: "v0.10.0"
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
cache-all-crates: true
cache-on-failure: true
- name: Cache router build
uses: actions/cache@v4
with:
path: |
sgl-router/target
sgl-router/.venv
key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-router-
- name: Install SGLang dependencies
run: |
sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh
- name: Setup Oracle Instant Client
run: |
sudo apt-get install -y unzip
INSTANT_CLIENT_DIR="/home/ubuntu/instant-client"
INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip"
if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then
echo "Downloading Oracle Instant Client..."
mkdir -p "$INSTANT_CLIENT_DIR"
cd "$INSTANT_CLIENT_DIR"
wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP
unzip $INSTANT_CLIENT_ZIP
rm $INSTANT_CLIENT_ZIP
else
echo "Oracle Instant Client already exists, skipping download"
fi
echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Start Oracle Database
run: |
docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim
echo "Starting Oracle DB..."
# Export Oracle connection environment variables
echo "ATP_USER=system" >> $GITHUB_ENV
echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV
echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV
- name: Build python binding
run: |
source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache
cd sgl-router
python3 -m venv .venv
python3 -m pip install --upgrade pip maturin
maturin develop --release --profile ci --features vendored-openssl
- name: Run Python E2E response API tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
source "$HOME/.cargo/env"
SHOW_ROUTER_LOGS=1 pytest py_test/e2e_response_api -s -vv -o log_cli=true --log-cli-level=INFO
- name: Run Python E2E gRPC tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
source "$HOME/.cargo/env"
SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest py_test/e2e_grpc -s -vv -o log_cli=true --log-cli-level=INFO
- name: Cleanup Oracle Database
if: always()
run: |
docker stop oracle-db || true
docker rm oracle-db || true
finish:
needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."
summarize-benchmarks:
needs: router-http-tests
runs-on: ubuntu-latest
if: success()
steps:
- name: Install jq
run: sudo apt-get update && sudo apt-get install -y jq bc
- name: Download benchmark results
uses: actions/download-artifact@v4
with:
name: genai-bench-results-all-policies
- name: List downloaded contents
run: |
echo "Contents after download:"
ls -la
find . -name "benchmark_*" -type d
echo "JSON files found:"
find . -name "*.json" | head -10
- name: Create benchmark summary
run: |
echo "=== DEBUG: Creating benchmark summary ==="
echo "Available benchmark directories:"
find . -name "benchmark_*" -type d || true
echo "=========================================="
echo "## Router E2E Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Results captured from E2E tests for two scenarios: regular router (2 workers, dp=2) and PD router (2 prefill + 2 decode)." >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Scenario | Status | TTFT (s) | E2E Latency (s) | Input Throughput (tok/s) | Output Throughput (tok/s) |" >> $GITHUB_STEP_SUMMARY
echo "|----------|--------|----------|-----------------|--------------------------|---------------------------|" >> $GITHUB_STEP_SUMMARY
scenarios=$'Regular (dp=2, round_robin)|benchmark_round_robin_regular\nPD (2 prefill + 2 decode, round_robin)|benchmark_round_robin_pd'
echo "$scenarios" | sed 's/^\s*//' | while IFS='|' read -r label pattern; do
[ -z "$label" ] && continue
# Find the result folder (handle different extraction layouts)
result_folder=$(find . -maxdepth 3 \( -name "$pattern" -o -path "*${pattern}*" \) -type d | head -1)
if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1)
if [ -n "$json_file" ] && [ -f "$json_file" ]; then
ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file")
e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file")
input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file")
output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file")
ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null || echo "$ttft_mean")
e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null || echo "$e2e_latency_mean")
input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null || echo "$input_throughput_mean")
output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null || echo "$output_throughput_mean")
echo "| ${label} | ✅ Success | $ttft_display | $e2e_display | $input_display | $output_display |" >> $GITHUB_STEP_SUMMARY
# Optional GPU utilization table if monitor output exists
gpu_json="$result_folder/gpu_utilization.json"
if [ -f "$gpu_json" ]; then
overall_mean=$(jq -r '.overall.mean // 0' "$gpu_json")
printf "\n#### GPU Utilization — %s\n\n" "$label" >> $GITHUB_STEP_SUMMARY
printf "Overall mean: %.2f%%\n\n" "$overall_mean" >> $GITHUB_STEP_SUMMARY
echo "| GPU | Mean (%) | p5 | p10 | p25 | p50 | p75 | p90 | p95 |" >> $GITHUB_STEP_SUMMARY
echo "|-----|----------|----|-----|-----|-----|-----|-----|-----|" >> $GITHUB_STEP_SUMMARY
jq -r '
.per_gpu
| to_entries[]
| [ .key,
(.value.mean // 0),
(.value.p5 // 0),
(.value.p10 // 0),
(.value.p25 // 0),
(.value.p50 // 0),
(.value.p75 // 0),
(.value.p90 // 0),
(.value.p95 // 0)
]
| @tsv' "$gpu_json" \
| while IFS=$'\t' read -r gpu m p5 p10 p25 p50 p75 p90 p95; do
printf "| %s | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f |\n" "$gpu" "$m" "$p5" "$p10" "$p25" "$p50" "$p75" "$p90" "$p95" >> $GITHUB_STEP_SUMMARY
done
echo "" >> $GITHUB_STEP_SUMMARY
fi
fi
fi
done