Skip to content

Commit 2b9acff

Browse files
committed
minor changes
1 parent 666df4f commit 2b9acff

File tree

3 files changed

+3
-2
lines changed

3 files changed

+3
-2
lines changed

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,7 @@ def _merge_helix_requests(self, new_requests: list[RequestQueueItem],
693693
input_token_ids=input_ids_this_rank,
694694
position_ids=position_ids_this_rank,
695695
)
696+
req.total_input_len_cp = input_len
696697
req_with_children.append(req)
697698
if req.child_requests:
698699
req_with_children.extend(req.child_requests)

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,7 @@ def _executor_loop(self):
11481148
break
11491149

11501150
self._pause_requests(scheduled_batch.paused_requests)
1151+
11511152
finished_requests = []
11521153

11531154
can_queue = self._can_queue(scheduled_batch)
@@ -1898,7 +1899,6 @@ def _prepare_disagg_gen_transmission_complete(self, scheduled_batch):
18981899
ctx_draft_tokens = req.context_phase_params.draft_tokens
18991900
req.py_draft_tokens = [] if ctx_draft_tokens is None else ctx_draft_tokens
19001901
beam_width = req.sampling_config.beam_width
1901-
19021902
for beam in range(0, beam_width):
19031903
req.add_new_token(first_gen_tokens[beam], beam)
19041904

tests/unittest/_torch/modules/test_mla_helix.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class Scenario:
8080
rope_original_max_position_embeddings: int = 4096
8181
rope_type: str = "yarn"
8282
model_type: str = "deepseek_v3"
83-
kv_cache_tokens_per_block: int = 64
83+
kv_cache_tokens_per_block: int = 32
8484
# TODO only 1 is supported for now here
8585
predicted_tokens_per_seq: int = 1
8686
bias: bool = False

0 commit comments

Comments
 (0)