Skip to content

Commit 4392b76

Browse files
committed
waive a ray test
Signed-off-by: Yan Chunwei <[email protected]>
1 parent ab7e56f commit 4392b76

File tree

2 files changed

+2
-9
lines changed

2 files changed

+2
-9
lines changed

tensorrt_llm/executor/rpc/rpc_client.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,6 @@ def remote_streaming(self,
8080
class RPCClient:
8181
"""
8282
An RPC Client that connects to the RPCServer.
83-
84-
Design contract: **all ZeroMQ socket I/O is performed from a single
85-
dedicated asyncio event-loop (self._loop) that lives in its own
86-
background thread**. Synchronous helpers marshal work onto that loop via
87-
run_coroutine_threadsafe(); asynchronous streaming helpers submit the
88-
actual socket operation to the loop and then yield results in the caller’s
89-
loop. Violating this invariant (i.e. touching self._client_socket from
90-
multiple event-loops) leads to rare hangs / message loss because pyzmq
91-
sockets are not thread-safe.
9283
"""
9384

9485
def __init__(self,

tests/unittest/llmapi/test_llm_pytorch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ def _check_llama_7b_multi_lora_evict_load_new_adapters(
360360

361361

362362
@skip_gpu_memory_less_than_40gb
363+
@skip_ray # https://nvbugs/5682551
363364
def test_llama_7b_multi_lora_evict_and_reload_lora_gpu_cache():
364365
"""Test eviction and re-loading a previously evicted adapter from the LoRA GPU cache, within a single
365366
llm.generate call, that's repeated twice.
@@ -454,6 +455,7 @@ def test_llama_7b_peft_cache_config_affects_peft_cache_size():
454455
cuda_graph_config=None)
455456

456457

458+
@skip_ray # https://nvbugs/5682551
457459
@skip_gpu_memory_less_than_40gb
458460
def test_llama_7b_lora_config_overrides_peft_cache_config():
459461
"""Tests that cache size args in lora_config LLM arg override the cache size

0 commit comments

Comments
 (0)