We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4fb2876 commit a0d4d52Copy full SHA for a0d4d52
tensorrt_llm/_torch/pyexecutor/model_engine.py
@@ -439,9 +439,7 @@ def __init__(
439
self.kv_cache_manager_key = ResourceManagerType.KV_CACHE_MANAGER
440
self.lora_model_config: Optional[LoraModelConfig] = None
441
self.cuda_graph_dummy_request = None
442
- self.cuda_graph_model_engine = CUDAGraphModelEngine(
443
- self
444
- ) if self._run_cuda_graphs or self._torch_compile_piecewise_cuda_graph else None
+ self.cuda_graph_model_engine = CUDAGraphModelEngine(self)
445
446
# Setup the local cache indirection buffer only once and reuse it.
447
# This way it can also be used for CUDA graphs.
0 commit comments