Skip to content

Commit 92f0c3a

Browse files
committed
Merge remote-tracking branch 'upstream/main' into rhoai-3.2
2 parents ae8382b + efe44c6 commit 92f0c3a

File tree

1,927 files changed

+87305
-34937
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,927 files changed

+87305
-34937
lines changed

.buildkite/check-wheel-size.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
23

34
import os
45
import sys
@@ -8,12 +9,12 @@
89
# Note that we have 400 MiB quota, please use it wisely.
910
# See https://github.com/pypi/support/issues/3792 .
1011
# Please also sync the value with the one in Dockerfile.
11-
VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
12+
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 400))
1213

1314

1415
def print_top_10_largest_files(zip_file):
1516
"""Print the top 10 largest files in the given zip file."""
16-
with zipfile.ZipFile(zip_file, 'r') as z:
17+
with zipfile.ZipFile(zip_file, "r") as z:
1718
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
1819
file_sizes.sort(key=lambda x: x[1], reverse=True)
1920
for f, size in file_sizes[:10]:
@@ -28,14 +29,18 @@ def check_wheel_size(directory):
2829
wheel_path = os.path.join(root, file_name)
2930
wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
3031
if wheel_size_mb > VLLM_MAX_SIZE_MB:
31-
print(f"Not allowed: Wheel {wheel_path} is larger "
32-
f"({wheel_size_mb:.2f} MB) than the limit "
33-
f"({VLLM_MAX_SIZE_MB} MB).")
32+
print(
33+
f"Not allowed: Wheel {wheel_path} is larger "
34+
f"({wheel_size_mb:.2f} MB) than the limit "
35+
f"({VLLM_MAX_SIZE_MB} MB)."
36+
)
3437
print_top_10_largest_files(wheel_path)
3538
return 1
3639
else:
37-
print(f"Wheel {wheel_path} is within the allowed size "
38-
f"({wheel_size_mb:.2f} MB).")
40+
print(
41+
f"Wheel {wheel_path} is within the allowed size "
42+
f"({wheel_size_mb:.2f} MB)."
43+
)
3944
return 0
4045

4146

@@ -45,4 +50,4 @@ def check_wheel_size(directory):
4550
sys.exit(1)
4651

4752
directory = sys.argv[1]
48-
sys.exit(check_wheel_size(directory))
53+
sys.exit(check_wheel_size(directory))

.buildkite/generate_index.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
23

34
import argparse
45
import os
@@ -22,5 +23,5 @@
2223
print(f"Generated index.html for {args.wheel}")
2324
# cloudfront requires escaping the '+' character
2425
f.write(
25-
template.format(wheel=filename,
26-
wheel_html_escaped=filename.replace("+", "%2B")))
26+
template.format(wheel=filename, wheel_html_escaped=filename.replace("+", "%2B"))
27+
)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Llama-3.2-1B-Instruct-FP8 -b "auto" -l 1319 -f 5 -t 1
2+
model_name: "RedHatAI/Llama-3.2-1B-Instruct-FP8"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.335
8+
- name: "exact_match,flexible-extract"
9+
value: 0.323
10+
limit: 1319
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m Qwen/Qwen2.5-1.5B-Instruct -b auto -l 1319 -f 5 -t 1
2+
model_name: "Qwen/Qwen2.5-1.5B-Instruct"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.54
8+
- name: "exact_match,flexible-extract"
9+
value: 0.59
10+
limit: 1319
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -b auto -l 1319 -f 5 -t 1
2+
model_name: "RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.47
8+
- name: "exact_match,flexible-extract"
9+
value: 0.64
10+
limit: 1319
11+
num_fewshot: 5

.buildkite/lm-eval-harness/configs/models-large.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ Meta-Llama-3-70B-Instruct.yaml
33
Mixtral-8x7B-Instruct-v0.1.yaml
44
Qwen2-57B-A14-Instruct.yaml
55
DeepSeek-V2-Lite-Chat.yaml
6+
Meta-Llama-3-8B-QQQ.yaml
Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
Meta-Llama-3-8B-Instruct.yaml
2-
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
1+
Qwen2.5-1.5B-Instruct.yaml
32
Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
43
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
54
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
6-
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
5+
Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
76
Qwen1.5-MoE-W4A16-compressed-tensors.yaml
8-
Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml
9-
Qwen2-1.5B-Instruct-FP8W8.yaml
10-
Meta-Llama-3-8B-QQQ.yaml
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
8+
def pytest_addoption(parser):
9+
parser.addoption(
10+
"--config-list-file",
11+
action="store",
12+
help="Path to the file listing model config YAMLs (one per line)",
13+
)
14+
parser.addoption(
15+
"--tp-size",
16+
action="store",
17+
default="1",
18+
help="Tensor parallel size to use for evaluation",
19+
)
20+
21+
22+
@pytest.fixture(scope="session")
23+
def config_list_file(pytestconfig, config_dir):
24+
rel_path = pytestconfig.getoption("--config-list-file")
25+
return config_dir / rel_path
26+
27+
28+
@pytest.fixture(scope="session")
29+
def tp_size(pytestconfig):
30+
return pytestconfig.getoption("--tp-size")
31+
32+
33+
def pytest_generate_tests(metafunc):
34+
if "config_filename" in metafunc.fixturenames:
35+
rel_path = metafunc.config.getoption("--config-list-file")
36+
config_list_file = Path(rel_path).resolve()
37+
config_dir = config_list_file.parent
38+
with open(config_list_file, encoding="utf-8") as f:
39+
configs = [
40+
config_dir / line.strip()
41+
for line in f
42+
if line.strip() and not line.startswith("#")
43+
]
44+
metafunc.parametrize("config_filename", configs)
Lines changed: 24 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,55 @@
11
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
23
"""
34
LM eval harness on model to compare vs HF baseline computed offline.
45
Configs are found in configs/$MODEL.yaml
56
6-
* export LM_EVAL_TEST_DATA_FILE=configs/Meta-Llama-3-70B-Instruct.yaml
7-
* export LM_EVAL_TP_SIZE=4
8-
* pytest -s test_lm_eval_correctness.py
7+
pytest -s -v test_lm_eval_correctness.py \
8+
--config-list-file=configs/models-small.txt \
9+
--tp-size=1
910
"""
1011

11-
import os
12-
from pathlib import Path
13-
1412
import lm_eval
15-
import numpy
16-
import pytest
13+
import numpy as np
1714
import yaml
1815

1916
RTOL = 0.08
20-
TEST_DATA_FILE = os.environ.get(
21-
"LM_EVAL_TEST_DATA_FILE",
22-
".buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml")
23-
24-
TP_SIZE = os.environ.get("LM_EVAL_TP_SIZE", 1)
25-
2617

27-
def launch_lm_eval(eval_config):
28-
trust_remote_code = eval_config.get('trust_remote_code', False)
29-
30-
model_args = f"pretrained={eval_config['model_name']}," \
31-
f"tensor_parallel_size={TP_SIZE}," \
32-
f"add_bos_token=true," \
33-
f"trust_remote_code={trust_remote_code}"
3418

19+
def launch_lm_eval(eval_config, tp_size):
20+
trust_remote_code = eval_config.get("trust_remote_code", False)
21+
model_args = (
22+
f"pretrained={eval_config['model_name']},"
23+
f"tensor_parallel_size={tp_size},"
24+
f"enforce_eager=true,"
25+
f"add_bos_token=true,"
26+
f"trust_remote_code={trust_remote_code}"
27+
)
3528
results = lm_eval.simple_evaluate(
3629
model="vllm",
3730
model_args=model_args,
3831
tasks=[task["name"] for task in eval_config["tasks"]],
3932
num_fewshot=eval_config["num_fewshot"],
4033
limit=eval_config["limit"],
41-
batch_size="auto")
42-
34+
batch_size="auto",
35+
)
4336
return results
4437

4538

46-
def test_lm_eval_correctness():
47-
eval_config = yaml.safe_load(
48-
Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
49-
50-
if eval_config[
51-
"model_name"] == "nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform": #noqa: E501
52-
pytest.skip("FBGEMM is currently failing on main.")
39+
def test_lm_eval_correctness_param(config_filename, tp_size):
40+
eval_config = yaml.safe_load(config_filename.read_text(encoding="utf-8"))
5341

54-
# Launch eval requests.
55-
results = launch_lm_eval(eval_config)
42+
results = launch_lm_eval(eval_config, tp_size)
5643

57-
# Confirm scores match ground truth.
5844
success = True
5945
for task in eval_config["tasks"]:
6046
for metric in task["metrics"]:
6147
ground_truth = metric["value"]
6248
measured_value = results["results"][task["name"]][metric["name"]]
63-
print(f'{task["name"]} | {metric["name"]}: '
64-
f'ground_truth={ground_truth} | measured={measured_value}')
65-
success = success and numpy.isclose(
66-
ground_truth, measured_value, rtol=RTOL)
49+
print(
50+
f"{task['name']} | {metric['name']}: "
51+
f"ground_truth={ground_truth} | measured={measured_value}"
52+
)
53+
success = success and np.isclose(ground_truth, measured_value, rtol=RTOL)
6754

68-
# Assert at the end, print all scores even on failure for debugging.
6955
assert success

.buildkite/nightly-benchmarks/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ WARNING: The benchmarking script will save json results by itself, so please do
113113

114114
### Visualizing the results
115115

116-
The `convert-results-json-to-markdown.py` helps you put the benchmarking results inside a markdown table, by formatting [descriptions.md](tests/descriptions.md) with real benchmarking results.
116+
The `convert-results-json-to-markdown.py` helps you put the benchmarking results inside a markdown table, by formatting [descriptions.md](performance-benchmarks-descriptions.md) with real benchmarking results.
117117
You can find the result presented as a table inside the `buildkite/performance-benchmark` job page.
118118
If you do not see the table, please wait till the benchmark finish running.
119119
The json version of the table (together with the json version of the benchmark) will be also attached to the markdown file.

0 commit comments

Comments
 (0)