Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 33 additions & 32 deletions benchmark/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ def get_output_file(model_path, backend, server_config):
params = [
('bs', server_config['max_batch_size']),
('tp', server_config.get('tp', 1)),
('dp', server_config.get('dp', '')),
('ep', server_config.get('ep', '')),
('cache', server_config.get('cache_max_entry_count', 0.8)),
('mptk', server_config.get('max_prefill_token_num', '')),
]
Expand All @@ -57,15 +55,8 @@ def get_output_file(model_path, backend, server_config):

def get_server_ip_port(backend: str, server_config: Dict) -> Tuple[str, int]:
if backend in ['turbomind', 'pytorch']:
if server_config.get('proxy_url'):
# If proxy_url is set, we use the proxy server's IP and port
parts = server_config['proxy_url'].split(':')
server_ip = parts[1].lstrip('//')
server_port = int(parts[2])
else:
# Default to the server IP and port specified in the config
server_ip = server_config.get('server_ip', '0.0.0.0')
server_port = server_config.get('server_port', 23333)
server_ip = server_config.get('server_ip', '0.0.0.0')
server_port = server_config.get('server_port', 23333)
elif backend == 'sglang':
return (server_config.get('server_ip', '0.0.0.0'), server_config.get('port', 30000))
elif backend == 'vllm':
Expand All @@ -75,7 +66,7 @@ def get_server_ip_port(backend: str, server_config: Dict) -> Tuple[str, int]:
return server_ip, server_port


def wait_server_ready(server_ip: str, server_port: int) -> bool:
def get_served_model_name(server_ip: str, server_port: int) -> bool:
"""Wait for the API server to become ready."""
from openai import OpenAI
while True:
Expand All @@ -84,7 +75,7 @@ def wait_server_ready(server_ip: str, server_port: int) -> bool:
model_name = client.models.list().data[0].id
if model_name:
print('Server is ready.')
return True
return model_name
except Exception as e:
print(f'connect to server http://{server_ip}:{server_port} failed {e}')
time.sleep(5)
Expand Down Expand Up @@ -135,7 +126,7 @@ def benchmark(model_path: str, backend: str, server_config: Dict, data_config: D
print(f"Starting api_server: {' '.join(server_cmd)}", flush=True)
proc = subprocess.Popen(server_cmd)
# Wait for the server to be ready
wait_server_ready(server_ip, server_port)
get_served_model_name(server_ip, server_port)
# Run benchmarks
output_file = get_output_file(model_path, backend, server_config)
for data in data_config:
Expand Down Expand Up @@ -166,25 +157,30 @@ def benchmark(model_path: str, backend: str, server_config: Dict, data_config: D
proc.kill()


def validate_config(config: Dict) -> None:
"""Validate the configuration structure.
def benchmark_proxy(backend: str, server_config: Dict, data_config: Dict | List[Dict]):
server_ip = server_config.get('server_ip', '0.0.0.0')
server_port = server_config.get('server_port', 8000)

Args:
config: Loaded configuration dictionary

Raises:
BenchmarkConfigError: If configuration is invalid
"""
required_sections = ['api_server', 'engine', 'data']
for section in required_sections:
if section not in config:
raise ValueError(f'Missing required config section: {section}')

if not isinstance(config['engine'], (Dict, List)):
raise ValueError('engine config must be a dict or list of dicts')
if isinstance(data_config, Dict):
data_config = [data_config]
if not (isinstance(data_config, List) and all(isinstance(d, Dict) for d in data_config)):
raise ValueError('data_config must be a dict or list of dicts')

if not isinstance(config['data'], (Dict, List)):
raise ValueError('data config must be a dict or list of dicts')
try:
# Wait for the proxy_server to be ready
model_name = get_served_model_name(server_ip, server_port)
model_name = model_name.replace('/', '_')
# Run benchmarks
output_file = f'benchmark_proxy_{model_name}_{backend}.csv'
for data in data_config:
data = data.copy()
data['output_file'] = output_file
client_cmd = get_client_cmd(backend, server_ip, server_port, data)
print(f"Running benchmark: {' '.join(client_cmd)}")
subprocess.run(client_cmd, check=True)
except Exception as e:
print(f'Unexpected error: {e}')
raise


def main(backend: str, config_path: str, model_path: Optional[str] = None):
Expand All @@ -197,11 +193,16 @@ def main(backend: str, config_path: str, model_path: Optional[str] = None):
Raises:
BenchmarkConfigError: If required parameters are missing or config is invalid
"""
with open(config_path, 'r') as f:
with open(config_path, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
server_config = config['server']
engine_configs = config['engine']
data_config = config['data']

server_type = server_config.get('type', 'api_server')
if server_type == 'proxy':
benchmark_proxy(backend, server_config, data_config)
return
if isinstance(engine_configs, Dict):
engine_configs = [engine_configs]
assert isinstance(engine_configs, List) and all(isinstance(s, Dict) for s in engine_configs)
Expand Down
9 changes: 2 additions & 7 deletions benchmark/lmdeploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ dataset_name: &dataset_name "sharegpt"
model_path: &model_path "Qwen/Qwen3-30B-A3B-FP8"
server:
server_port: 23333
# The type of the server. It is either "api_server" or "proxy".
type: "api_server"
# Inference engine configuration
engine:
- model_path: *model_path
Expand All @@ -15,13 +17,6 @@ engine:
cache_max_entry_count: 0.9
max_prefill_token_num: 4096
tp: 1
- model_path: "Qwen/Qwen3-235B-A22B-FP8"
max_batch_size: 64
cache_max_entry_count: 0.7
max_prefill_token_num: 4096
dp: 8
ep: 8
proxy_url: "http://localhost:8000"
# Benchmark test configuration for profile_restful_api.py
# Defines multiple test cases with different output lengths to evaluate API performance
data:
Expand Down
24 changes: 20 additions & 4 deletions docs/en/benchmark/evaluate_with_opencompass.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,44 @@ If sufficient computational resources are available, please refer to the [End-to

## Environment Setup

Install LMDeploy and OpenCompass in separate Python virtual environments to avoid potential dependency conflicts.

- install lmdeploy

```shell
conda create -n lmdeploy python=3.10 -y
pip install lmdeploy
pip install "opencompass[full]"

# Download the lmdeploy source code, which will be used in subsequent steps to access eval script and configuration
git clone --depth=1 https://github.com/InternLM/lmdeploy.git
```

It is recommended to install LMDeploy and OpenCompass in separate Python virtual environments to avoid potential dependency conflicts.
- install opencompass

```shell
conda create -n opencompass python=3.10 -y
pip install "opencompass[full]"
```

## End-to-End Evaluation

1. **Deploy Target Model**

```shell
conda activate lmdeploy
lmdeploy serve api_server <model_path> --server-port 10000 <--other-options>
```

2. **Deploy Evaluation Model (Judger)**

```shell
conda activate lmdeploy
lmdeploy serve api_server opencompass/CompassVerifier-32B --server-port 20000 --tp 2
```

3. **Generate Evaluation Configuration and Execute**

```shell
conda activate opencompass

cd {the/root/path/of/lmdeploy/repo}

Expand Down Expand Up @@ -64,12 +75,14 @@ This stage generates model responses for the dataset.
1. **Deploy Target Model**

```shell
conda activate lmdeploy
lmdeploy serve api_server <model_path> --server-port 10000 <--other-options>
```

2. **Generate Inference Configuration and Execute**

```shell
conda activate opencompass
cd {the/root/path/of/lmdeploy/repo}

## Specify the dataset path. OC will download the datasets automatically if they are
Expand All @@ -92,20 +105,23 @@ This stage uses the evaluation model (Judger) to assess the quality of inference
1. **Deploy Evaluation Model (Judger)**

```shell
conda activate lmdeploy
lmdeploy serve api_server opencompass/CompassVerifier-32B --server-port 20000 --tp 2 --session-len 65536
```

2. **Generate Evaluation Configuration and Execute**

```shell
conda activate opencompass

cd {the/root/path/of/lmdeploy/repo}

## Specify the dataset path. OC will download the datasets automatically if they are
## not found in the path
export COMPASS_DATA_CACHE=/nvme1/shared/opencompass/.cache
export HF_DATASETS_CACHE=/nvme4/huggingface_hub/datasets
# Run evaluation task
opencompass /path/to/judger_config.py -m eval -w {oc_output_dir} -r {yyyymmdd_hhmmss}
python eval/eval.py {task_name} --mode eval --judger-server http://{judger-server-ip}:20000 -w {oc_output_dir} -r {yyyymmdd_hhmmss}
```

Important Notes:
Expand Down
23 changes: 20 additions & 3 deletions docs/zh_cn/benchmark/evaluate_with_opencompass.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,44 @@

## 环境准备

在不同的 Python 虚拟环境中分别安装 LMDeploy 和 OpenCompass,以避免可能的依赖冲突。

- 安装 lmdeploy

```shell
conda create -n lmdeploy python=3.10 -y
pip install lmdeploy
pip install "opencompass[full]"

# 下载 lmdeploy 源码,在后续步骤中会使用到 eval/* 中的评测脚本和配置文件
git clone --depth=1 https://github.com/InternLM/lmdeploy.git
```

建议将 LMDeploy 和 OpenCompass 安装在不同的 Python 虚拟环境中,以避免可能的依赖冲突。
- 安装 opencompass

```shell
pip install "opencompass[full]"
```

## 端到端评测

1. **部署待评测模型**

```shell
conda activate lmdeploy
lmdeploy serve api_server <model_path> --server-port 10000 <--other-options>
```

2. **部署评测模型(Judger)**

```shell
conda activate lmdeploy
lmdeploy serve api_server opencompass/CompassVerifier-32B --server-port 20000 --tp 2 --session-len 65536
```

3. **生成评测配置并执行评测**

```shell
conda activate opencompass

cd {the/root/path/of/lmdeploy/repo}

## 指定数据集路径。如果在路径下没有找到评测数据集,OC会自动下载
Expand All @@ -62,12 +73,15 @@ python eval/eval.py {task_name} \
1. **部署待评测模型**

```shell
conda activate lmdeploy
lmdeploy serve api_server <model_path> --server-port 10000 <--other-options>
```

2. **生成推理配置并执行推理**

```shell
conda activate opencompass

cd {the/root/path/of/lmdeploy/repo}

## 指定数据集路径。如果在路径下没有找到评测数据集,OC会自动下载
Expand All @@ -91,12 +105,15 @@ python eval/eval.py {task_name} \
1. **部署评测模型(Judger)**

```shell
conda activate lmdeploy
lmdeploy serve api_server opencompass/CompassVerifier-32B --server-port 20000 --tp 2
```

2. **生成评判配置并执行评判**

```shell
conda activate opencompass

cd {the/root/path/of/lmdeploy/repo}

## 指定数据集路径。如果在路径下没有找到评测数据集,OC会自动下载
Expand Down