Skip to content

Commit 77871c0

Browse files
committed
Fix warmup phase batch size out of range.
Signed-off-by: Yukun He <[email protected]>
1 parent ec0d984 commit 77871c0

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

3rdparty/cutlass

Submodule cutlass updated 671 files

tensorrt_llm/_torch/pyexecutor/model_engine.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -584,16 +584,17 @@ def get_autotune_warmup_request():
584584

585585
available_blocks = kv_cache_manager.get_num_free_blocks()
586586

587+
maximum_tunable_num_tokens = min(
588+
self.batch_size * num_tokens_per_request, self.max_num_tokens,
589+
available_blocks * kv_cache_manager.tokens_per_block)
590+
587591
# Calculate number of full-length requests and remaining tokens
588592
# Each request has num_tokens_per_request tokens, except possibly the last one
589-
full_len_request_num = self.max_num_tokens // num_tokens_per_request
590-
remaining_tokens = self.max_num_tokens % num_tokens_per_request
593+
full_len_request_num = maximum_tunable_num_tokens // num_tokens_per_request
594+
remaining_tokens = maximum_tunable_num_tokens % num_tokens_per_request
591595

592596
request_num = full_len_request_num if remaining_tokens == 0 else full_len_request_num + 1
593597

594-
if self.max_num_tokens > available_blocks * kv_cache_manager.tokens_per_block:
595-
return None, None
596-
597598
requests = kv_cache_manager.add_dummy_requests(
598599
request_ids=list(range(full_len_request_num)),
599600
token_nums=[num_tokens_per_request] * full_len_request_num,
@@ -617,7 +618,7 @@ def get_autotune_warmup_request():
617618
result.context_requests = requests
618619
result.generation_requests = []
619620

620-
return result, _create_extra_inputs(1, self.max_num_tokens)
621+
return result, _create_extra_inputs(1, maximum_tunable_num_tokens)
621622

622623
@contextlib.contextmanager
623624
def release_batch(result):

0 commit comments

Comments
 (0)