diff --git a/benchmark/benchmark_serving.py b/benchmark/benchmark_serving.py index 2527507d33..b66a253ed9 100644 --- a/benchmark/benchmark_serving.py +++ b/benchmark/benchmark_serving.py @@ -42,8 +42,6 @@ def get_output_file(model_path, backend, server_config): params = [ ('bs', server_config['max_batch_size']), ('tp', server_config.get('tp', 1)), - ('dp', server_config.get('dp', '')), - ('ep', server_config.get('ep', '')), ('cache', server_config.get('cache_max_entry_count', 0.8)), ('mptk', server_config.get('max_prefill_token_num', '')), ] @@ -57,15 +55,8 @@ def get_output_file(model_path, backend, server_config): def get_server_ip_port(backend: str, server_config: Dict) -> Tuple[str, int]: if backend in ['turbomind', 'pytorch']: - if server_config.get('proxy_url'): - # If proxy_url is set, we use the proxy server's IP and port - parts = server_config['proxy_url'].split(':') - server_ip = parts[1].lstrip('//') - server_port = int(parts[2]) - else: - # Default to the server IP and port specified in the config - server_ip = server_config.get('server_ip', '0.0.0.0') - server_port = server_config.get('server_port', 23333) + server_ip = server_config.get('server_ip', '0.0.0.0') + server_port = server_config.get('server_port', 23333) elif backend == 'sglang': return (server_config.get('server_ip', '0.0.0.0'), server_config.get('port', 30000)) elif backend == 'vllm': @@ -75,7 +66,7 @@ def get_server_ip_port(backend: str, server_config: Dict) -> Tuple[str, int]: return server_ip, server_port -def wait_server_ready(server_ip: str, server_port: int) -> bool: +def get_served_model_name(server_ip: str, server_port: int) -> bool: """Wait for the API server to become ready.""" from openai import OpenAI while True: @@ -84,7 +75,7 @@ def wait_server_ready(server_ip: str, server_port: int) -> bool: model_name = client.models.list().data[0].id if model_name: print('Server is ready.') - return True + return model_name except Exception as e: print(f'connect to server http://{server_ip}:{server_port} failed {e}') time.sleep(5) @@ -135,7 +126,7 @@ def benchmark(model_path: str, backend: str, server_config: Dict, data_config: D print(f"Starting api_server: {' '.join(server_cmd)}", flush=True) proc = subprocess.Popen(server_cmd) # Wait for the server to be ready - wait_server_ready(server_ip, server_port) + get_served_model_name(server_ip, server_port) # Run benchmarks output_file = get_output_file(model_path, backend, server_config) for data in data_config: @@ -166,25 +157,30 @@ def benchmark(model_path: str, backend: str, server_config: Dict, data_config: D proc.kill() -def validate_config(config: Dict) -> None: - """Validate the configuration structure. +def benchmark_proxy(backend: str, server_config: Dict, data_config: Dict | List[Dict]): + server_ip = server_config.get('server_ip', '0.0.0.0') + server_port = server_config.get('server_port', 8000) - Args: - config: Loaded configuration dictionary - - Raises: - BenchmarkConfigError: If configuration is invalid - """ - required_sections = ['api_server', 'engine', 'data'] - for section in required_sections: - if section not in config: - raise ValueError(f'Missing required config section: {section}') - - if not isinstance(config['engine'], (Dict, List)): - raise ValueError('engine config must be a dict or list of dicts') + if isinstance(data_config, Dict): + data_config = [data_config] + if not (isinstance(data_config, List) and all(isinstance(d, Dict) for d in data_config)): + raise ValueError('data_config must be a dict or list of dicts') - if not isinstance(config['data'], (Dict, List)): - raise ValueError('data config must be a dict or list of dicts') + try: + # Wait for the proxy_server to be ready + model_name = get_served_model_name(server_ip, server_port) + model_name = model_name.replace('/', '_') + # Run benchmarks + output_file = f'benchmark_proxy_{model_name}_{backend}.csv' + for data in data_config: + data = data.copy() + data['output_file'] = output_file + client_cmd = get_client_cmd(backend, server_ip, server_port, data) + print(f"Running benchmark: {' '.join(client_cmd)}") + subprocess.run(client_cmd, check=True) + except Exception as e: + print(f'Unexpected error: {e}') + raise def main(backend: str, config_path: str, model_path: Optional[str] = None): @@ -197,11 +193,16 @@ def main(backend: str, config_path: str, model_path: Optional[str] = None): Raises: BenchmarkConfigError: If required parameters are missing or config is invalid """ - with open(config_path, 'r') as f: + with open(config_path, 'r', encoding='utf-8') as f: config = yaml.safe_load(f) server_config = config['server'] engine_configs = config['engine'] data_config = config['data'] + + server_type = server_config.get('type', 'api_server') + if server_type == 'proxy': + benchmark_proxy(backend, server_config, data_config) + return if isinstance(engine_configs, Dict): engine_configs = [engine_configs] assert isinstance(engine_configs, List) and all(isinstance(s, Dict) for s in engine_configs) diff --git a/benchmark/lmdeploy.yml b/benchmark/lmdeploy.yml index 539da0a68e..1705ae23b7 100644 --- a/benchmark/lmdeploy.yml +++ b/benchmark/lmdeploy.yml @@ -4,6 +4,8 @@ dataset_name: &dataset_name "sharegpt" model_path: &model_path "Qwen/Qwen3-30B-A3B-FP8" server: server_port: 23333 + # The type of the server. It is either "api_server" or "proxy". + type: "api_server" # Inference engine configuration engine: - model_path: *model_path @@ -15,13 +17,6 @@ engine: cache_max_entry_count: 0.9 max_prefill_token_num: 4096 tp: 1 - - model_path: "Qwen/Qwen3-235B-A22B-FP8" - max_batch_size: 64 - cache_max_entry_count: 0.7 - max_prefill_token_num: 4096 - dp: 8 - ep: 8 - proxy_url: "http://localhost:8000" # Benchmark test configuration for profile_restful_api.py # Defines multiple test cases with different output lengths to evaluate API performance data: diff --git a/docs/en/benchmark/evaluate_with_opencompass.md b/docs/en/benchmark/evaluate_with_opencompass.md index 0cf0439221..6847d2a9bd 100644 --- a/docs/en/benchmark/evaluate_with_opencompass.md +++ b/docs/en/benchmark/evaluate_with_opencompass.md @@ -10,33 +10,44 @@ If sufficient computational resources are available, please refer to the [End-to ## Environment Setup +Install LMDeploy and OpenCompass in separate Python virtual environments to avoid potential dependency conflicts. + +- install lmdeploy + ```shell +conda create -n lmdeploy python=3.10 -y pip install lmdeploy -pip install "opencompass[full]" - # Download the lmdeploy source code, which will be used in subsequent steps to access eval script and configuration git clone --depth=1 https://github.com/InternLM/lmdeploy.git ``` -It is recommended to install LMDeploy and OpenCompass in separate Python virtual environments to avoid potential dependency conflicts. +- install opencompass + +```shell +conda create -n opencompass python=3.10 -y +pip install "opencompass[full]" +``` ## End-to-End Evaluation 1. **Deploy Target Model** ```shell +conda activate lmdeploy lmdeploy serve api_server --server-port 10000 <--other-options> ``` 2. **Deploy Evaluation Model (Judger)** ```shell +conda activate lmdeploy lmdeploy serve api_server opencompass/CompassVerifier-32B --server-port 20000 --tp 2 ``` 3. **Generate Evaluation Configuration and Execute** ```shell +conda activate opencompass cd {the/root/path/of/lmdeploy/repo} @@ -64,12 +75,14 @@ This stage generates model responses for the dataset. 1. **Deploy Target Model** ```shell +conda activate lmdeploy lmdeploy serve api_server --server-port 10000 <--other-options> ``` 2. **Generate Inference Configuration and Execute** ```shell +conda activate opencompass cd {the/root/path/of/lmdeploy/repo} ## Specify the dataset path. OC will download the datasets automatically if they are @@ -92,12 +105,15 @@ This stage uses the evaluation model (Judger) to assess the quality of inference 1. **Deploy Evaluation Model (Judger)** ```shell +conda activate lmdeploy lmdeploy serve api_server opencompass/CompassVerifier-32B --server-port 20000 --tp 2 --session-len 65536 ``` 2. **Generate Evaluation Configuration and Execute** ```shell +conda activate opencompass + cd {the/root/path/of/lmdeploy/repo} ## Specify the dataset path. OC will download the datasets automatically if they are @@ -105,7 +121,7 @@ cd {the/root/path/of/lmdeploy/repo} export COMPASS_DATA_CACHE=/nvme1/shared/opencompass/.cache export HF_DATASETS_CACHE=/nvme4/huggingface_hub/datasets # Run evaluation task -opencompass /path/to/judger_config.py -m eval -w {oc_output_dir} -r {yyyymmdd_hhmmss} +python eval/eval.py {task_name} --mode eval --judger-server http://{judger-server-ip}:20000 -w {oc_output_dir} -r {yyyymmdd_hhmmss} ``` Important Notes: diff --git a/docs/zh_cn/benchmark/evaluate_with_opencompass.md b/docs/zh_cn/benchmark/evaluate_with_opencompass.md index 3f46c3f6de..dc96ebb0ee 100644 --- a/docs/zh_cn/benchmark/evaluate_with_opencompass.md +++ b/docs/zh_cn/benchmark/evaluate_with_opencompass.md @@ -10,33 +10,44 @@ ## 环境准备 +在不同的 Python 虚拟环境中分别安装 LMDeploy 和 OpenCompass,以避免可能的依赖冲突。 + +- 安装 lmdeploy + ```shell +conda create -n lmdeploy python=3.10 -y pip install lmdeploy -pip install "opencompass[full]" - # 下载 lmdeploy 源码,在后续步骤中会使用到 eval/* 中的评测脚本和配置文件 git clone --depth=1 https://github.com/InternLM/lmdeploy.git ``` -建议将 LMDeploy 和 OpenCompass 安装在不同的 Python 虚拟环境中,以避免可能的依赖冲突。 +- 安装 opencompass + +```shell +pip install "opencompass[full]" +``` ## 端到端评测 1. **部署待评测模型** ```shell +conda activate lmdeploy lmdeploy serve api_server --server-port 10000 <--other-options> ``` 2. **部署评测模型(Judger)** ```shell +conda activate lmdeploy lmdeploy serve api_server opencompass/CompassVerifier-32B --server-port 20000 --tp 2 --session-len 65536 ``` 3. **生成评测配置并执行评测** ```shell +conda activate opencompass + cd {the/root/path/of/lmdeploy/repo} ## 指定数据集路径。如果在路径下没有找到评测数据集,OC会自动下载 @@ -62,12 +73,15 @@ python eval/eval.py {task_name} \ 1. **部署待评测模型** ```shell +conda activate lmdeploy lmdeploy serve api_server --server-port 10000 <--other-options> ``` 2. **生成推理配置并执行推理** ```shell +conda activate opencompass + cd {the/root/path/of/lmdeploy/repo} ## 指定数据集路径。如果在路径下没有找到评测数据集,OC会自动下载 @@ -91,12 +105,15 @@ python eval/eval.py {task_name} \ 1. **部署评测模型(Judger)** ```shell +conda activate lmdeploy lmdeploy serve api_server opencompass/CompassVerifier-32B --server-port 20000 --tp 2 ``` 2. **生成评判配置并执行评判** ```shell +conda activate opencompass + cd {the/root/path/of/lmdeploy/repo} ## 指定数据集路径。如果在路径下没有找到评测数据集,OC会自动下载