[TRTLLM-6756][chore] Update test cases for beam search sampling after merge

stnie · stnie · commit 326305d5405d · 2025-11-26T08:56:34.000Z
- Modified `model_kwargs` to include `sampler_type` for improved test configuration.
- Adjusted `llm_cuda_graph` fixture to remove unnecessary `sampler_type` parameter.
- Enhanced clarity in `test_torch_sampler.py` by adding comments regarding the `is_context_init_state` attribute.

Signed-off-by: Stefan Niebler &lt;82932102+stnie@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/sampler.py b/tensorrt_llm/_torch/pyexecutor/sampler.py
@@ -1093,12 +1093,16 @@ def _process_draft_tokens_tree(
         # Take the longest accepted path as the next new token.
         num_accepted_draft_tokens = 0
         for idx in eagle_paths[longest_match_path_idx][:longest_accepted_len]:
-            add_token(request, new_tokens_list, beam_idx=self.DEFAULT_BEAM_IDX, step=cast(int, idx.item()))
+            add_token(
+                request, new_tokens_list, beam_idx=DEFAULT_BEAM_IDX, step=cast(int, idx.item())
+            )
             num_accepted_draft_tokens += 1
-            if self.finish_if_reason(request,
-                        finish_reasons,
-                        step=num_accepted_draft_tokens,
-                        beam_idx=DEFAULT_BEAM_IDX,):
+            if self.finish_if_reason(
+                request,
+                finish_reasons,
+                step=num_accepted_draft_tokens,
+                beam_idx=DEFAULT_BEAM_IDX,
+            ):
                 break
 
         assert num_accepted_draft_tokens <= longest_accepted_len
@@ -1108,7 +1112,6 @@ def _process_draft_tokens_tree(
 
         return num_accepted_draft_tokens - 1
 
-
     def setup_sampler_step(self, requests: ScheduledRequests):
         """Setup the sampler step for the requests
 
diff --git a/tests/unittest/_torch/sampler/test_beam_search.py b/tests/unittest/_torch/sampler/test_beam_search.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import pathlib as _pl
+from typing import Any
 
 import pytest
 import torch
@@ -49,7 +50,7 @@ def sampler_type(request):
 
 
 @pytest.fixture(scope="module")
-def model_kwargs(fixed_params) -> dict[str, Any]:
+def model_kwargs(fixed_params, sampler_type) -> dict[str, Any]:
     assert fixed_params[
         "max_beam_width"] == 2, "This test only works for a beam width of 2"
     return dict(
@@ -58,6 +59,7 @@ def model_kwargs(fixed_params) -> dict[str, Any]:
             weight_loader=DummyWeightLoader(),
             config_loader=DummyConfigLoader(),
         ),
+        sampler_type=sampler_type,
     )
 
 
@@ -72,19 +74,18 @@ def _build_llm(fixed_params, input_prompts, model_kwargs):
         max_beam_width=fixed_params["max_beam_width"],
         disable_overlap_scheduler=True,
         cuda_graph_config=None,
-        sampler_type=sampler_type,
     )
 
 
 @pytest.fixture(scope="module")
 def llm(fixed_params, input_prompts, model_kwargs):
-    return _build_llm(fixed_params, input_prompts, model_kwargs)
+    llm = _build_llm(fixed_params, input_prompts, model_kwargs)
     yield llm
     llm.shutdown()
 
 
 @pytest.fixture(scope="module")
-def llm_cuda_graph(fixed_params, input_prompts, sampler_type, model_kwargs):
+def llm_cuda_graph(fixed_params, input_prompts, model_kwargs):
     llm = LLM(
         **model_kwargs,
         kv_cache_config=KvCacheConfig(max_tokens=10000),
@@ -96,7 +97,6 @@ def llm_cuda_graph(fixed_params, input_prompts, sampler_type, model_kwargs):
         disable_overlap_scheduler=False,
         cuda_graph_config=CudaGraphConfig(batch_sizes=[1, 2, 4, 8],
                                           enable_padding=True),
-        sampler_type=sampler_type,
     )
     yield llm
     llm.shutdown()
@@ -327,7 +327,9 @@ def test_beam_search_e2e_cuda_graph_and_overlap(
                      sampling_params)
 
 
+###########################################################################
 # Unit tests
+###########################################################################
 class GeneralTestParams:
     # Test Parameters for the update_beam_history and finish_beams tests
     beam_width = 3
diff --git a/tests/unittest/_torch/sampler/test_torch_sampler.py b/tests/unittest/_torch/sampler/test_torch_sampler.py
@@ -83,9 +83,11 @@ class TestStrategySelection:
 
     class MockLlmRequest:
         sampling_config: SamplingConfig
-        is_context_init_state: bool  # Not used in this test
+        is_context_init_state: bool  # Torch sampler accesses this, but it does not affect this test
 
-        def get_beam_width_by_iter(self, for_next_iteration: bool) -> int:
+        def get_beam_width_by_iter(
+            self, for_next_iteration: bool
+        ) -> int:  # Torch sampler accesses this, but it does not affect this test
             return self.sampling_config.beam_width
 
     def _check_params(self, params: SamplingParams):