File tree Expand file tree Collapse file tree 6 files changed +1
-12
lines changed Expand file tree Collapse file tree 6 files changed +1
-12
lines changed Original file line number Diff line number Diff line change @@ -245,9 +245,7 @@ def forward(
245245 return {"logits" : logits_flat }
246246
247247
248- def create_autodeploy_executor (
249- executor_config : ExecutorConfig , checkpoint_dir : str = None , engine_dir : str = None
250- ):
248+ def create_autodeploy_executor (executor_config : ExecutorConfig , checkpoint_dir : str = None ):
251249 """Create an AutoDeploy executor from the given configuration and checkpoint directory.
252250
253251 This is the entrypoint API to the _autodeploy backend.
Original file line number Diff line number Diff line change @@ -99,7 +99,6 @@ class PyTorchConfig:
9999 'tokens_per_block' ,
100100 'mapping' ,
101101 'hf_model_dir' ,
102- 'trt_engine_dir' ,
103102]
104103
105104
@@ -111,7 +110,6 @@ def update_executor_config(
111110 build_config : Optional [BuildConfig ] = None ,
112111 speculative_config : Optional [SpecConfig ] = None ,
113112 hf_model_dir : Optional [str ] = None ,
114- trt_engine_dir : Optional [str ] = None ,
115113 max_input_len : Optional [int ] = None ,
116114 max_seq_len : Optional [int ] = None ):
117115 if backend is None :
@@ -135,7 +133,6 @@ def update_executor_config(
135133 executor_config .tokens_per_block = executor_config .tokens_per_block or build_config .plugin_config .tokens_per_block
136134
137135 executor_config .hf_model_dir = hf_model_dir
138- executor_config .trt_engine_dir = trt_engine_dir
139136
140137 if max_input_len is not None :
141138 executor_config .max_input_len = max_input_len
Original file line number Diff line number Diff line change @@ -283,8 +283,6 @@ def __init__(
283283 self .is_cuda_graph_dummy = False
284284 self .py_lora_task_layer_module_configs = None
285285
286- self .py_tokens = super ().get_tokens ()
287-
288286 self .py_return_log_probs = return_log_probs
289287 self .py_return_context_logits = return_context_logits
290288 self .py_return_generation_logits = return_generation_logits
Original file line number Diff line number Diff line change @@ -180,7 +180,6 @@ def _get_mapping(executor_config: ExecutorConfig) -> Mapping:
180180def create_py_executor (
181181 executor_config : ExecutorConfig ,
182182 checkpoint_dir : str = None ,
183- engine_dir : str = None ,
184183 lora_config : Optional [LoraConfig ] = None ,
185184 garbage_collection_gen0_threshold : Optional [int ] = None ) -> PyExecutor :
186185 _mangle_executor_config (executor_config )
Original file line number Diff line number Diff line change @@ -119,7 +119,6 @@ def _create_engine():
119119 args = {
120120 "executor_config" : executor_config ,
121121 "checkpoint_dir" : executor_config .hf_model_dir ,
122- "engine_dir" : executor_config .trt_engine_dir ,
123122 }
124123 if executor_config .backend == "pytorch" :
125124 from tensorrt_llm ._torch .pyexecutor .py_executor_creator import \
@@ -135,7 +134,6 @@ def _create_engine():
135134 else :
136135 raise ValueError (
137136 f"Unsupported backend config: { executor_config .backend } " )
138-
139137 return create_executor (** args )
140138
141139 self .engine = _create_engine ()
Original file line number Diff line number Diff line change @@ -695,7 +695,6 @@ def _build_model(self):
695695 if self ._on_trt_backend else None ,
696696 speculative_config = self .args .speculative_config ,
697697 hf_model_dir = self ._hf_model_dir ,
698- trt_engine_dir = self ._engine_dir ,
699698 max_input_len = self .args .max_input_len ,
700699 max_seq_len = max_seq_len )
701700 self ._executor_config .llm_parallel_config = self .args .parallel_config
You can’t perform that action at this time.
0 commit comments