Skip to content

Commit 10dbf4f

Browse files
authored
[fix] Remove duplicated KVCache transmission check (#6022)
Signed-off-by: Iman Tabrizian <[email protected]>
1 parent d71c6fe commit 10dbf4f

File tree

1 file changed

+3
-8
lines changed

1 file changed

+3
-8
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -966,19 +966,14 @@ def _executor_loop(self):
966966
self._prepare_disagg_gen_transmission_complete(
967967
scheduled_batch)
968968

969+
# Return the first token to the client
970+
self._handle_first_token_response(scheduled_batch)
971+
969972
self.resource_manager.prepare_resources(scheduled_batch)
970973
if self.drafter is not None:
971974
self.drafter.prepare_draft_tokens(
972975
scheduled_batch, self.resource_manager)
973976

974-
if self.kv_cache_transceiver:
975-
# For generation requests which have completed KV cache transfer
976-
self._prepare_disagg_gen_transmission_complete(
977-
scheduled_batch)
978-
979-
# Return the first token to the client
980-
self._handle_first_token_response(scheduled_batch)
981-
982977
batch_outputs = self._forward_step(scheduled_batch)
983978

984979
if self.guided_decoder is not None:

0 commit comments

Comments
 (0)