File tree Expand file tree Collapse file tree 1 file changed +3
-8
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 1 file changed +3
-8
lines changed Original file line number Diff line number Diff line change @@ -966,19 +966,14 @@ def _executor_loop(self):
966966 self ._prepare_disagg_gen_transmission_complete (
967967 scheduled_batch )
968968
969+ # Return the first token to the client
970+ self ._handle_first_token_response (scheduled_batch )
971+
969972 self .resource_manager .prepare_resources (scheduled_batch )
970973 if self .drafter is not None :
971974 self .drafter .prepare_draft_tokens (
972975 scheduled_batch , self .resource_manager )
973976
974- if self .kv_cache_transceiver :
975- # For generation requests which have completed KV cache transfer
976- self ._prepare_disagg_gen_transmission_complete (
977- scheduled_batch )
978-
979- # Return the first token to the client
980- self ._handle_first_token_response (scheduled_batch )
981-
982977 batch_outputs = self ._forward_step (scheduled_batch )
983978
984979 if self .guided_decoder is not None :
You can’t perform that action at this time.
0 commit comments