[None][chore] Add placement test for ray executor (#9122)

hchings · web-flow · commit fe6924315724 · 2025-11-14T23:10:59.000-08:00
Signed-off-by: Erin Ho &lt;14718778+hchings@users.noreply.github.com&gt;
diff --git a/tests/unittest/_torch/ray_orchestrator/multi_gpu/test_executor.py b/tests/unittest/_torch/ray_orchestrator/multi_gpu/test_executor.py
@@ -1,10 +1,14 @@
 import os
 
 import pytest
+import ray
+from ray.util.placement_group import (PlacementGroupSchedulingStrategy,
+                                      placement_group, remove_placement_group)
 from utils.llm_data import llm_models_root
 
 from tensorrt_llm import LLM
 from tensorrt_llm._torch.utils import get_device_uuid
+from tensorrt_llm.llmapi import KvCacheConfig
 
 
 class DummyWorkerExtension:
@@ -22,17 +26,68 @@ def test_worker_extension():
     assert result[0] == "SUCCESS"
 
 
+@pytest.mark.gpu4
+def test_bundle_indices(monkeypatch):
+    """Placement via bundle indices"""
+
+    monkeypatch.setenv("RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES", "1")
+    monkeypatch.setenv("TLLM_RAY_USE_RPC", "1")
+
+    pg = None
+    try:
+        ray.init()
+        pg = placement_group([{"GPU": 1, "CPU": 1}] * 4)
+        ray.get(pg.ready())
+        print(f"Placement group ready with bundles {pg.bundle_specs}")
+
+        bundle_indices = [2, 3]
+        runtime_env = {
+            "env_vars": {
+                "TRTLLM_RAY_PER_WORKER_GPUS": "0.8",
+                "TRTLLM_RAY_BUNDLE_INDICES": ",".join(map(str, bundle_indices))
+            }
+        }
+
+        llm = ray.remote(
+            num_cpus=0,
+            num_gpus=0,
+            runtime_env=runtime_env,
+            scheduling_strategy=PlacementGroupSchedulingStrategy(
+                placement_group=pg,
+                placement_group_capture_child_tasks=True,
+            ),
+        )(LLM).remote(
+            model=os.path.join(llm_models_root(), "llama-models-v2",
+                               "TinyLlama-1.1B-Chat-v1.0"),
+            kv_cache_config=KvCacheConfig(free_gpu_memory_fraction=0.1),
+            tensor_parallel_size=2,
+            orchestrator_type="ray",
+        )
+
+        inference_actor_uuids = ray.get(
+            llm._collective_rpc.remote("report_device_id"))
+
+        expected_uuids = [get_device_uuid(idx) for idx in bundle_indices]
+
+        assert sorted(inference_actor_uuids) == sorted(expected_uuids), \
+            f"Workers not placed on expected GPUs. Expected UUIDs: {expected_uuids}, Got: {inference_actor_uuids}"
+
+    finally:
+        if pg is not None:
+            remove_placement_group(pg)
+        ray.shutdown()
+
+
 @pytest.mark.gpu2
-def test_cuda_visible_device():
+def test_cuda_visible_device(monkeypatch):
     """Placement via cuda_visible_device"""
-    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+    monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "1")
 
     llm = LLM(model=llm_models_root() /
               "llama-models-v2/TinyLlama-1.1B-Chat-v1.0",
               orchestrator_type="ray")
 
     infer_actor_uuids = llm._collective_rpc("report_device_id")
 
-    del os.environ["CUDA_VISIBLE_DEVICES"]
     assert infer_actor_uuids[0] == get_device_uuid(1)
     print(f"{infer_actor_uuids=}")