[router][quick fix] Add minimal option for reasoning effort in spec (… #3150

Workflow file for this run

.github/workflows/pr-test-rust.yml at b0476a0

	name: PR Test (SMG)

	on:
	push:
	branches: [ main ]
	paths:
	- "sgl-router/**"
	pull_request:
	branches: [ main ]
	paths:
	- "sgl-router/**"
	types: [synchronize, labeled]
	workflow_dispatch:

	concurrency:
	group: router-tests-${{ github.ref }}
	cancel-in-progress: true

	env:
	RUSTC_WRAPPER: sccache
	SCCACHE_GHA_ENABLED: "true"

	jobs:
	maturin-build-test:
	if: github.event_name != 'pull_request' \|\| contains(github.event.pull_request.labels.*.name, 'run-ci')
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	with:
	path: sglang-repo

	- name: Move sgl-router folder to root
	run: \|
	mv sglang-repo/sgl-router/* .
	rm -rf sglang-repo

	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version: "3.13"

	- name: Install protoc and dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y wget unzip gcc g++ perl make
	cd /tmp
	wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip
	sudo unzip protoc-32.0-linux-x86_64.zip -d /usr/local
	rm protoc-32.0-linux-x86_64.zip
	protoc --version

	- name: Configure sccache
	uses: mozilla-actions/[email protected]
	with:
	version: "v0.10.0"

	- name: Test maturin build
	uses: PyO3/maturin-action@v1
	with:
	args: --release --out dist --features vendored-openssl
	rust-toolchain: stable
	sccache: true

	- name: List built wheel
	run: ls -lh dist/

	- name: Test wheel install
	run: \|
	pip install dist/*.whl
	python -c "import sglang_router; print('Python package: OK')"
	python -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')"
	python -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"
	router-unit-tests:
	if: github.event_name != 'pull_request' \|\| contains(github.event.pull_request.labels.*.name, 'run-ci')
	runs-on: ubuntu-latest
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Install dependencies
	run: \|
	bash scripts/ci/ci_install_rust.sh

	- name: Configure sccache
	uses: mozilla-actions/[email protected]
	with:
	version: "v0.10.0"

	- name: Rust cache
	uses: Swatinem/rust-cache@v2
	with:
	workspaces: sgl-router
	cache-all-crates: true
	cache-on-failure: true

	- name: Run lint
	run: \|
	source "$HOME/.cargo/env"
	cd sgl-router/
	rustup component add clippy
	cargo clippy --all-targets --all-features -- -D warnings

	- name: Run fmt
	run: \|
	source "$HOME/.cargo/env"
	cd sgl-router/
	rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
	rustup toolchain install nightly --profile minimal
	cargo +nightly fmt -- --check

	- name: Run Rust tests
	timeout-minutes: 20
	run: \|
	source "$HOME/.cargo/env"
	cd sgl-router/
	cargo test

	- name: Check benchmark compilation
	run: \|
	source "$HOME/.cargo/env"
	cd sgl-router/
	cargo check --benches

	- name: Quick benchmark sanity check
	timeout-minutes: 15
	run: \|
	source "$HOME/.cargo/env"
	cd sgl-router/
	# Run quick benchmarks to ensure they work using Python script
	python3 scripts/run_benchmarks.py --quick

	- name: Show sccache stats
	if: always()
	run: sccache --show-stats

	router-http-tests:
	if: github.event_name != 'pull_request' \|\| contains(github.event.pull_request.labels.*.name, 'run-ci')
	runs-on: 4-gpu-a10
	timeout-minutes: 32
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Install rust dependencies
	run: \|
	bash scripts/ci/ci_install_rust.sh

	- name: Configure sccache
	uses: mozilla-actions/[email protected]
	with:
	version: "v0.10.0"

	- name: Rust cache
	uses: Swatinem/rust-cache@v2
	with:
	workspaces: sgl-router
	cache-all-crates: true
	cache-on-failure: true

	- name: Cache router build
	uses: actions/cache@v4
	with:
	path: \|
	sgl-router/target
	sgl-router/.venv
	key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }}
	restore-keys: \|
	${{ runner.os }}-router-

	- name: Install SGLang dependencies
	run: \|
	sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh

	- name: Build python binding
	run: \|
	source "$HOME/.cargo/env"
	export RUSTC_WRAPPER=sccache
	cd sgl-router
	python3 -m venv .venv
	python3 -m pip install --upgrade pip maturin
	maturin develop --release --profile ci --features vendored-openssl


	- name: Run Python unit tests
	run: \|
	cd sgl-router
	source "$HOME/.cargo/env"
	python3 -m pip install pytest pytest-cov pytest-xdist
	pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80

	- name: Run Python integration tests
	run: \|
	cd sgl-router
	source "$HOME/.cargo/env"
	# Integration tests use FastAPI/uvicorn for mock workers
	python3 -m pip install fastapi uvicorn orjson
	pytest -q py_test/integration_mock

	- name: Run Python E2E tests
	run: \|
	bash scripts/killall_sglang.sh "nuk_gpus"
	cd sgl-router
	source "$HOME/.cargo/env"
	python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker
	python3 -m pip --no-cache-dir install --upgrade genai-bench==0.0.2
	pytest py_test/e2e_http -s -vv -o log_cli=true --log-cli-level=INFO

	- name: Upload benchmark results
	if: success()
	uses: actions/upload-artifact@v4
	with:
	name: genai-bench-results-all-policies
	path: sgl-router/benchmark_**/

	router-grpc-response-api-tests:
	if: github.event_name != 'pull_request' \|\| contains(github.event.pull_request.labels.*.name, 'run-ci')
	runs-on: 4-gpu-a10
	timeout-minutes: 32
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Install rust dependencies
	run: \|
	bash scripts/ci/ci_install_rust.sh

	- name: Configure sccache
	uses: mozilla-actions/[email protected]
	with:
	version: "v0.10.0"

	- name: Rust cache
	uses: Swatinem/rust-cache@v2
	with:
	workspaces: sgl-router
	cache-all-crates: true
	cache-on-failure: true

	- name: Cache router build
	uses: actions/cache@v4
	with:
	path: \|
	sgl-router/target
	sgl-router/.venv
	key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }}
	restore-keys: \|
	${{ runner.os }}-router-

	- name: Install SGLang dependencies
	run: \|
	sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh

	- name: Setup Oracle Instant Client
	run: \|
	sudo apt-get install -y unzip
	INSTANT_CLIENT_DIR="/home/ubuntu/instant-client"
	INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip"

	if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then
	echo "Downloading Oracle Instant Client..."
	mkdir -p "$INSTANT_CLIENT_DIR"
	cd "$INSTANT_CLIENT_DIR"
	wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP
	unzip $INSTANT_CLIENT_ZIP
	rm $INSTANT_CLIENT_ZIP
	else
	echo "Oracle Instant Client already exists, skipping download"
	fi

	echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV

	- name: Start Oracle Database
	run: \|
	docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim
	echo "Starting Oracle DB..."

	# Export Oracle connection environment variables
	echo "ATP_USER=system" >> $GITHUB_ENV
	echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV
	echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV

	- name: Build python binding
	run: \|
	source "$HOME/.cargo/env"
	export RUSTC_WRAPPER=sccache
	cd sgl-router
	python3 -m venv .venv
	python3 -m pip install --upgrade pip maturin
	maturin develop --release --profile ci --features vendored-openssl

	- name: Run Python E2E response API tests
	run: \|
	bash scripts/killall_sglang.sh "nuk_gpus"
	cd sgl-router
	source "$HOME/.cargo/env"
	SHOW_ROUTER_LOGS=1 pytest py_test/e2e_response_api -s -vv -o log_cli=true --log-cli-level=INFO

	- name: Run Python E2E gRPC tests
	run: \|
	bash scripts/killall_sglang.sh "nuk_gpus"
	cd sgl-router
	source "$HOME/.cargo/env"
	SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest py_test/e2e_grpc -s -vv -o log_cli=true --log-cli-level=INFO

	- name: Cleanup Oracle Database
	if: always()
	run: \|
	docker stop oracle-db \|\| true
	docker rm oracle-db \|\| true


	finish:
	needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests]
	runs-on: ubuntu-latest
	steps:
	- name: Finish
	run: echo "This is an empty step to ensure that all jobs are completed."

	summarize-benchmarks:
	needs: router-http-tests
	runs-on: ubuntu-latest
	if: success()

	steps:
	- name: Install jq
	run: sudo apt-get update && sudo apt-get install -y jq bc

	- name: Download benchmark results
	uses: actions/download-artifact@v4
	with:
	name: genai-bench-results-all-policies

	- name: List downloaded contents
	run: \|
	echo "Contents after download:"
	ls -la
	find . -name "benchmark_*" -type d
	echo "JSON files found:"
	find . -name "*.json" \| head -10

	- name: Create benchmark summary
	run: \|
	echo "=== DEBUG: Creating benchmark summary ==="
	echo "Available benchmark directories:"
	find . -name "benchmark_*" -type d \|\| true
	echo "=========================================="

	echo "## Router E2E Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "Results captured from E2E tests for two scenarios: regular router (2 workers, dp=2) and PD router (2 prefill + 2 decode)." >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "\| Scenario \| Status \| TTFT (s) \| E2E Latency (s) \| Input Throughput (tok/s) \| Output Throughput (tok/s) \|" >> $GITHUB_STEP_SUMMARY
	echo "\|----------\|--------\|----------\|-----------------\|--------------------------\|---------------------------\|" >> $GITHUB_STEP_SUMMARY

	scenarios=$'Regular (dp=2, round_robin)\|benchmark_round_robin_regular\nPD (2 prefill + 2 decode, round_robin)\|benchmark_round_robin_pd'

	echo "$scenarios" \| sed 's/^\s*//' \| while IFS='\|' read -r label pattern; do
	[ -z "$label" ] && continue
	# Find the result folder (handle different extraction layouts)
	result_folder=$(find . -maxdepth 3 $ -name "$pattern" -o -path "${pattern}" $ -type d \| head -1)

	if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
	json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" \| head -1)

	if [ -n "$json_file" ] && [ -f "$json_file" ]; then
	ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file")
	e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file")
	input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file")
	output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file")

	ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null \|\| echo "$ttft_mean")
	e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null \|\| echo "$e2e_latency_mean")
	input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null \|\| echo "$input_throughput_mean")
	output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null \|\| echo "$output_throughput_mean")

	echo "\| ${label} \| ✅ Success \| $ttft_display \| $e2e_display \| $input_display \| $output_display \|" >> $GITHUB_STEP_SUMMARY

	# Optional GPU utilization table if monitor output exists
	gpu_json="$result_folder/gpu_utilization.json"
	if [ -f "$gpu_json" ]; then
	overall_mean=$(jq -r '.overall.mean // 0' "$gpu_json")
	printf "\n#### GPU Utilization — %s\n\n" "$label" >> $GITHUB_STEP_SUMMARY
	printf "Overall mean: %.2f%%\n\n" "$overall_mean" >> $GITHUB_STEP_SUMMARY
	echo "\| GPU \| Mean (%) \| p5 \| p10 \| p25 \| p50 \| p75 \| p90 \| p95 \|" >> $GITHUB_STEP_SUMMARY
	echo "\|-----\|----------\|----\|-----\|-----\|-----\|-----\|-----\|-----\|" >> $GITHUB_STEP_SUMMARY
	jq -r '
	.per_gpu
	\| to_entries[]
	\| [ .key,
	(.value.mean // 0),
	(.value.p5 // 0),
	(.value.p10 // 0),
	(.value.p25 // 0),
	(.value.p50 // 0),
	(.value.p75 // 0),
	(.value.p90 // 0),
	(.value.p95 // 0)
	]
	\| @tsv' "$gpu_json" \
	\| while IFS=$'\t' read -r gpu m p5 p10 p25 p50 p75 p90 p95; do
	printf "\| %s \| %.2f \| %.2f \| %.2f \| %.2f \| %.2f \| %.2f \| %.2f \| %.2f \|\n" "$gpu" "$m" "$p5" "$p10" "$p25" "$p50" "$p75" "$p90" "$p95" >> $GITHUB_STEP_SUMMARY
	done
	echo "" >> $GITHUB_STEP_SUMMARY
	fi
	fi
	fi
	done

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[router][quick fix] Add minimal option for reasoning effort in spec (… #3150

Workflow file

[router][quick fix] Add minimal option for reasoning effort in spec (… #3150

Uh oh!

Jobs

Run details

Workflow file for this run