Skip to content
Open
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
f636e2c
Allow using vllm image
dhuangnm Oct 29, 2025
afbf811
fix a typo
dhuangnm Oct 31, 2025
a55e5c8
fix typo again
dhuangnm Oct 31, 2025
ceee681
fix an issue
dhuangnm Oct 31, 2025
bcc7a50
fix an issue
dhuangnm Oct 31, 2025
665cd1e
fix cmd string
dhuangnm Oct 31, 2025
4bf0dc1
fix an issue
dhuangnm Oct 31, 2025
59cea15
add debugging
dhuangnm Oct 31, 2025
be75c8d
don't delete run folder if using image
dhuangnm Nov 3, 2025
586dcc1
allow using pulled image or deployed runner
dhuangnm Nov 5, 2025
c1dde7f
fix a typo
dhuangnm Nov 5, 2025
ae9e526
remove extra )
dhuangnm Nov 5, 2025
80352db
run vllm with podman
dhuangnm Nov 5, 2025
8461d03
fix error
dhuangnm Nov 5, 2025
5704e62
fix issues
dhuangnm Nov 5, 2025
098f561
fix path
dhuangnm Nov 5, 2025
d564408
improve output
dhuangnm Nov 5, 2025
5da7eee
fix typo
dhuangnm Nov 5, 2025
4cb2251
fix format
dhuangnm Nov 5, 2025
d2cb646
fix command
dhuangnm Nov 5, 2025
5cdb543
allow file to execute
dhuangnm Nov 5, 2025
6dc42c4
minor update
dhuangnm Nov 5, 2025
84634e0
copy file
dhuangnm Nov 5, 2025
57c99ac
fix issue
dhuangnm Nov 5, 2025
7cdedbb
run vllm in deployed pod
dhuangnm Nov 7, 2025
3951475
missed ,
dhuangnm Nov 7, 2025
5c401fc
fix command
dhuangnm Nov 7, 2025
870b6ee
remove VLLM_VOLUME_MOUNT_DIR
dhuangnm Nov 11, 2025
d23bdf4
fix missing path
dhuangnm Nov 11, 2025
625c9db
clean up
dhuangnm Nov 11, 2025
264fdcb
final update
dhuangnm Nov 13, 2025
318bd3d
clean up
dhuangnm Nov 13, 2025
117ec9d
fix quality failures
dhuangnm Nov 14, 2025
8b41d5f
reorg test code and remove env var
dhuangnm Nov 24, 2025
1b2530e
fix error
dhuangnm Nov 25, 2025
3d889c6
fix another error
dhuangnm Nov 25, 2025
7e77202
fix style
dhuangnm Nov 25, 2025
7662699
clean up and fix format
dhuangnm Nov 25, 2025
abb6bab
fix format
dhuangnm Nov 25, 2025
de58b02
rename file to be rhaiis specific
dhuangnm Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions tests/e2e/vLLM/e2e-smoke.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
fp4_nvfp4.yaml
fp8_dynamic_per_token.yaml
kv_cache_gptq_tinyllama.yaml
sparse2of4_fp8_dynamic.yaml
w4a16_grouped_quant_asym_awq.yaml
w4a16_actorder_weight.yaml
int8_channel_weight_static_per_tensor_act.yaml
12 changes: 11 additions & 1 deletion tests/e2e/vLLM/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,18 @@ while getopts "c:t:" OPT; do
esac
done

script_path=$(dirname "${BASH_SOURCE[0]}")
if [ -d "$CONFIG" ]; then
echo "Config is provided as a folder: $CONFIG"
CONFIGS=`ls "$CONFIG"`
elif [ -f "$CONFIG" ]; then
echo "Config is provided as a file: $CONFIG"
CONFIGS=`cat "$CONFIG"`
fi
echo "$CONFIGS"

# Parse list of configs.
for MODEL_CONFIG in "$CONFIG"/*
for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using ls and cat to build a list of files, and then iterating over it with an unquoted for loop is not robust. This will fail if any filenames contain spaces or other special characters due to word splitting. A safer approach would be to use a while read loop to process the configs line by line, which correctly handles such filenames.

do
LOCAL_SUCCESS=0

Expand Down
126 changes: 104 additions & 22 deletions tests/e2e/vLLM/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,15 @@
"TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
)
SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
# vllm python environment
# vllm environment: same (default), the path of vllm virtualenv, deployed runner name
VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
IS_VLLM_IMAGE = False
RUN_SAVE_DIR = os.environ.get("RUN_SAVE_DIR", "none")
# when using vllm image, needs to save the generated model
if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
IS_VLLM_IMAGE = True
assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!"

TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
EXPECTED_SAVED_FILES = [
Expand Down Expand Up @@ -76,18 +83,32 @@ def set_up(self, test_data_file: str):
self.max_seq_length = eval_config.get("max_seq_length", 2048)
# GPU memory utilization - only set if explicitly provided in config
self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization")
# vllm python env - if same, use the current python env, otherwise use
# the python passed in VLLM_PYTHON_ENV
if VLLM_PYTHON_ENV.lower() != "same":
self.vllm_env = VLLM_PYTHON_ENV
else:
if VLLM_PYTHON_ENV.lower() == "same":
self.vllm_env = sys.executable
else:
self.vllm_env = VLLM_PYTHON_ENV

if RUN_SAVE_DIR != "none":
assert Path(
RUN_SAVE_DIR
).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
self.run_save_dir = RUN_SAVE_DIR
# RUN_SAVE_DIR overwrites config save_dir if specified
self.save_dir = os.path.join(
RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}"
)

if not self.save_dir:
self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"

logger.info("========== RUNNING ==============")
logger.info(self.save_dir)
logger.info(f"model save dir: {self.save_dir}")

# script to run vllm if using vllm image
if IS_VLLM_IMAGE:
# script file containing vllm commands to run in the image
self.vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash")
logger.info(f"vllm bash save dir: {self.vllm_bash}")

self.prompts = [
"The capital of France is",
Expand All @@ -100,8 +121,6 @@ def test_vllm(self, test_data_file: str):
# Run vLLM with saved model

self.set_up(test_data_file)
if not self.save_dir:
self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
model=self.model,
model_class=self.model_class,
Expand Down Expand Up @@ -134,7 +153,8 @@ def test_vllm(self, test_data_file: str):
fp.write(recipe_yaml_str)
session.reset()

if SKIP_HF_UPLOAD.lower() != "yes":
# if vllm image is used, don't upload
if SKIP_HF_UPLOAD.lower() != "yes" and not IS_VLLM_IMAGE:
logger.info("================= UPLOADING TO HUB ======================")

stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e"
Expand All @@ -151,17 +171,27 @@ def test_vllm(self, test_data_file: str):
folder_path=self.save_dir,
)

if VLLM_PYTHON_ENV.lower() == "same":
logger.info("========== RUNNING vLLM in the same python env ==========")
if IS_VLLM_IMAGE:
logger.info("========== To run vLLM with vllm image ==========")
else:
logger.info("========== RUNNING vLLM in a separate python env ==========")
if VLLM_PYTHON_ENV.lower() == "same":
logger.info("========== RUNNING vLLM in the same python env ==========")
else:
logger.info(
"========== RUNNING vLLM in a separate python env =========="
)

self._run_vllm(logger)

self.tear_down()

def tear_down(self):
if self.save_dir is not None and os.path.isdir(self.save_dir):
# model save_dir is needed for vllm image testing
if (
not IS_VLLM_IMAGE
and self.save_dir is not None
and os.path.isdir(self.save_dir)
):
shutil.rmtree(self.save_dir)

timer = get_singleton_manager()
Expand Down Expand Up @@ -198,17 +228,69 @@ def _run_vllm(self, logger):
json_prompts = json.dumps(self.prompts)

test_file_dir = os.path.dirname(os.path.abspath(__file__))
run_file_path = os.path.join(test_file_dir, "run_vllm.py")

logger.info("Run vllm in subprocess.Popen() using python env:")
logger.info("Run vllm using env:")
logger.info(self.vllm_env)

result = subprocess.Popen(
[self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
if IS_VLLM_IMAGE:
# generate python command to run in the vllm image
run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
shutil.copy(
os.path.join(test_file_dir, "run_vllm.py"),
os.path.join(RUN_SAVE_DIR, "run_vllm.py"),
)
cmds = [
"python",
run_file_path,
f"'{json_scheme}'",
f"'{json_llm_kwargs}'",
f"'{json_prompts}'",
]
vllm_cmd = " ".join(cmds)
with open(self.vllm_bash, "w") as cf:
cf.write(
f"""#!/bin/bash
export HF_HUB_OFFLINE=0
export VLLM_NO_USAGE_STATS=1
{vllm_cmd}
"""
)
os.chmod(self.vllm_bash, 0o755)
logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
logger.info("vllm image. Run vllm cmd with kubectl.")
result = subprocess.Popen(
[
"kubectl",
"exec",
"-it",
VLLM_PYTHON_ENV,
"-n",
"arc-runners",
"--",
"/bin/bash",
self.vllm_bash,
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
else:
run_file_path = os.path.join(test_file_dir, "run_vllm.py")
logger.info("Run vllm in subprocess.Popen using python env:")
logger.info(self.vllm_env)
result = subprocess.Popen(
[
self.vllm_env,
run_file_path,
json_scheme,
json_llm_kwargs,
json_prompts,
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)

stdout, stderr = result.communicate()
logger.info(stdout)

Expand Down