minor changes

brb-nv · brb-nv · commit 2b9acff1d6ad · 2025-11-22T21:22:20.000Z
diff --git a/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py b/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py
@@ -693,6 +693,7 @@ def _merge_helix_requests(self, new_requests: list[RequestQueueItem],
                 input_token_ids=input_ids_this_rank,
                 position_ids=position_ids_this_rank,
             )
+            req.total_input_len_cp = input_len
             req_with_children.append(req)
             if req.child_requests:
                 req_with_children.extend(req.child_requests)
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py
@@ -1148,6 +1148,7 @@ def _executor_loop(self):
                     break
 
                 self._pause_requests(scheduled_batch.paused_requests)
+
                 finished_requests = []
 
                 can_queue = self._can_queue(scheduled_batch)
@@ -1898,7 +1899,6 @@ def _prepare_disagg_gen_transmission_complete(self, scheduled_batch):
                 ctx_draft_tokens = req.context_phase_params.draft_tokens
                 req.py_draft_tokens = [] if ctx_draft_tokens is None else ctx_draft_tokens
                 beam_width = req.sampling_config.beam_width
-
                 for beam in range(0, beam_width):
                     req.add_new_token(first_gen_tokens[beam], beam)
 
diff --git a/tests/unittest/_torch/modules/test_mla_helix.py b/tests/unittest/_torch/modules/test_mla_helix.py
@@ -80,7 +80,7 @@ class Scenario:
     rope_original_max_position_embeddings: int = 4096
     rope_type: str = "yarn"
     model_type: str = "deepseek_v3"
-    kv_cache_tokens_per_block: int = 64
+    kv_cache_tokens_per_block: int = 32
     # TODO only 1 is supported for now here
     predicted_tokens_per_seq: int = 1
     bias: bool = False

Original file line number	Diff line number	Diff line change
`@@ -693,6 +693,7 @@ def _merge_helix_requests(self, new_requests: list[RequestQueueItem],`
`693`	`693`	`input_token_ids=input_ids_this_rank,`
`694`	`694`	`position_ids=position_ids_this_rank,`
`695`	`695`	`)`
	`696`	`+ req.total_input_len_cp = input_len`
`696`	`697`	`req_with_children.append(req)`
`697`	`698`	`if req.child_requests:`
`698`	`699`	`req_with_children.extend(req.child_requests)`