Skip to content

Commit 0a02f5f

Browse files
authored
[None][chore] Use a cached model path for Ray integration test (#8660)
Signed-off-by: Aurelien Chartier <[email protected]>
1 parent 49974ee commit 0a02f5f

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

examples/ray_orchestrator/llm_inference_async_ray.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,30 @@
11
# Generate text asynchronously with Ray orchestrator.
2+
import argparse
23
import asyncio
34

45
from tensorrt_llm import LLM, SamplingParams
56
from tensorrt_llm.llmapi import KvCacheConfig
67

78

89
def main():
10+
parser = argparse.ArgumentParser(
11+
description="Generate text asynchronously with Ray orchestrator.")
12+
parser.add_argument(
13+
"--model",
14+
type=str,
15+
default="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
16+
help=
17+
"HuggingFace model name or path to local HF model (default: TinyLlama/TinyLlama-1.1B-Chat-v1.0)"
18+
)
19+
args = parser.parse_args()
920
# Configure KV cache memory usage fraction.
1021
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5,
1122
max_tokens=4096,
1223
enable_block_reuse=True)
1324

1425
# model could accept HF model name or a path to local HF model.
1526
llm = LLM(
16-
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
27+
model=args.model,
1728
kv_cache_config=kv_cache_config,
1829
max_seq_len=1024,
1930
max_batch_size=1,

tests/integration/defs/examples/test_ray.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ def ray_example_root(llm_root):
1414

1515
def test_llm_inference_async_ray(ray_example_root, llm_venv):
1616
script_path = os.path.join(ray_example_root, "llm_inference_async_ray.py")
17-
venv_check_call(llm_venv, [script_path])
17+
model_path = f"{llm_models_root()}/llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
18+
venv_check_call(llm_venv, [script_path, "--model", model_path])
1819

1920

2021
@pytest.mark.skip_less_device(2)

0 commit comments

Comments
 (0)