Skip to content

Commit 79b8ea4

Browse files
committed
waive a ray test
Signed-off-by: Yan Chunwei <[email protected]>
1 parent 2158eae commit 79b8ea4

File tree

2 files changed

+2
-9
lines changed

2 files changed

+2
-9
lines changed

tensorrt_llm/executor/rpc/rpc_client.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,6 @@ def remote_streaming(self,
8080
class RPCClient:
8181
"""
8282
An RPC Client that connects to the RPCServer.
83-
84-
Design contract: **all ZeroMQ socket I/O is performed from a single
85-
dedicated asyncio event-loop (self._loop) that lives in its own
86-
background thread**. Synchronous helpers marshal work onto that loop via
87-
run_coroutine_threadsafe(); asynchronous streaming helpers submit the
88-
actual socket operation to the loop and then yield results in the caller’s
89-
loop. Violating this invariant (i.e. touching self._client_socket from
90-
multiple event-loops) leads to rare hangs / message loss because pyzmq
91-
sockets are not thread-safe.
9283
"""
9384

9485
def __init__(self,

tests/unittest/llmapi/test_llm_pytorch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ def _check_llama_7b_multi_lora_evict_load_new_adapters(
366366

367367

368368
@skip_gpu_memory_less_than_40gb
369+
@skip_ray # https://nvbugs/5682551
369370
def test_llama_7b_multi_lora_evict_and_reload_lora_gpu_cache():
370371
"""Test eviction and re-loading a previously evicted adapter from the LoRA GPU cache, within a single
371372
llm.generate call, that's repeated twice.
@@ -460,6 +461,7 @@ def test_llama_7b_peft_cache_config_affects_peft_cache_size():
460461
cuda_graph_config=None)
461462

462463

464+
@skip_ray # https://nvbugs/5682551
463465
@skip_gpu_memory_less_than_40gb
464466
def test_llama_7b_lora_config_overrides_peft_cache_config():
465467
"""Tests that cache size args in lora_config LLM arg override the cache size

0 commit comments

Comments
 (0)