File tree Expand file tree Collapse file tree 2 files changed +2
-9
lines changed
tensorrt_llm/executor/rpc Expand file tree Collapse file tree 2 files changed +2
-9
lines changed Original file line number Diff line number Diff line change @@ -80,15 +80,6 @@ def remote_streaming(self,
8080class RPCClient :
8181 """
8282 An RPC Client that connects to the RPCServer.
83-
84- Design contract: **all ZeroMQ socket I/O is performed from a single
85- dedicated asyncio event-loop (self._loop) that lives in its own
86- background thread**. Synchronous helpers marshal work onto that loop via
87- run_coroutine_threadsafe(); asynchronous streaming helpers submit the
88- actual socket operation to the loop and then yield results in the caller’s
89- loop. Violating this invariant (i.e. touching self._client_socket from
90- multiple event-loops) leads to rare hangs / message loss because pyzmq
91- sockets are not thread-safe.
9283 """
9384
9485 def __init__ (self ,
Original file line number Diff line number Diff line change @@ -366,6 +366,7 @@ def _check_llama_7b_multi_lora_evict_load_new_adapters(
366366
367367
368368@skip_gpu_memory_less_than_40gb
369+ @skip_ray # https://nvbugs/5682551
369370def test_llama_7b_multi_lora_evict_and_reload_lora_gpu_cache ():
370371 """Test eviction and re-loading a previously evicted adapter from the LoRA GPU cache, within a single
371372 llm.generate call, that's repeated twice.
@@ -460,6 +461,7 @@ def test_llama_7b_peft_cache_config_affects_peft_cache_size():
460461 cuda_graph_config = None )
461462
462463
464+ @skip_ray # https://nvbugs/5682551
463465@skip_gpu_memory_less_than_40gb
464466def test_llama_7b_lora_config_overrides_peft_cache_config ():
465467 """Tests that cache size args in lora_config LLM arg override the cache size
You can’t perform that action at this time.
0 commit comments