waive a ray test

Superjomn · Superjomn · commit 4392b76a50a1 · 2025-11-25T13:16:56.000+08:00
Signed-off-by: Yan Chunwei &lt;328693+Superjomn@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/executor/rpc/rpc_client.py b/tensorrt_llm/executor/rpc/rpc_client.py
@@ -80,15 +80,6 @@ def remote_streaming(self,
 class RPCClient:
     """
     An RPC Client that connects to the RPCServer.
-
-    Design contract: **all ZeroMQ socket I/O is performed from a single
-    dedicated asyncio event-loop (self._loop) that lives in its own
-    background thread**.  Synchronous helpers marshal work onto that loop via
-    run_coroutine_threadsafe(); asynchronous streaming helpers submit the
-    actual socket operation to the loop and then yield results in the caller’s
-    loop.  Violating this invariant (i.e. touching self._client_socket from
-    multiple event-loops) leads to rare hangs / message loss because pyzmq
-    sockets are not thread-safe.
     """
 
     def __init__(self,
diff --git a/tests/unittest/llmapi/test_llm_pytorch.py b/tests/unittest/llmapi/test_llm_pytorch.py
@@ -360,6 +360,7 @@ def _check_llama_7b_multi_lora_evict_load_new_adapters(
 
 
 @skip_gpu_memory_less_than_40gb
+@skip_ray  # https://nvbugs/5682551
 def test_llama_7b_multi_lora_evict_and_reload_lora_gpu_cache():
     """Test eviction and re-loading a previously evicted adapter from the LoRA GPU cache, within a single
     llm.generate call, that's repeated twice.
@@ -454,6 +455,7 @@ def test_llama_7b_peft_cache_config_affects_peft_cache_size():
             cuda_graph_config=None)
 
 
+@skip_ray  # https://nvbugs/5682551
 @skip_gpu_memory_less_than_40gb
 def test_llama_7b_lora_config_overrides_peft_cache_config():
     """Tests that cache size args in lora_config LLM arg override the cache size