File tree Expand file tree Collapse file tree 2 files changed +2
-9
lines changed
tensorrt_llm/executor/rpc Expand file tree Collapse file tree 2 files changed +2
-9
lines changed Original file line number Diff line number Diff line change @@ -80,15 +80,6 @@ def remote_streaming(self,
8080class RPCClient :
8181 """
8282 An RPC Client that connects to the RPCServer.
83-
84- Design contract: **all ZeroMQ socket I/O is performed from a single
85- dedicated asyncio event-loop (self._loop) that lives in its own
86- background thread**. Synchronous helpers marshal work onto that loop via
87- run_coroutine_threadsafe(); asynchronous streaming helpers submit the
88- actual socket operation to the loop and then yield results in the caller’s
89- loop. Violating this invariant (i.e. touching self._client_socket from
90- multiple event-loops) leads to rare hangs / message loss because pyzmq
91- sockets are not thread-safe.
9283 """
9384
9485 def __init__ (self ,
Original file line number Diff line number Diff line change @@ -360,6 +360,7 @@ def _check_llama_7b_multi_lora_evict_load_new_adapters(
360360
361361
362362@skip_gpu_memory_less_than_40gb
363+ @skip_ray # https://nvbugs/5682551
363364def test_llama_7b_multi_lora_evict_and_reload_lora_gpu_cache ():
364365 """Test eviction and re-loading a previously evicted adapter from the LoRA GPU cache, within a single
365366 llm.generate call, that's repeated twice.
@@ -454,6 +455,7 @@ def test_llama_7b_peft_cache_config_affects_peft_cache_size():
454455 cuda_graph_config = None )
455456
456457
458+ @skip_ray # https://nvbugs/5682551
457459@skip_gpu_memory_less_than_40gb
458460def test_llama_7b_lora_config_overrides_peft_cache_config ():
459461 """Tests that cache size args in lora_config LLM arg override the cache size
You can’t perform that action at this time.
0 commit comments