Fix greedy search sampling batch return value mismatch in flashinfer sampling.

dominicshanshan · dominicshanshan · commit a84e87352921 · 2025-11-21T22:18:12.000-08:00
Signed-off-by: Wangshanshan &lt;30051912+dominicshanshan@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/sampling_utils_flashinfer.py b/tensorrt_llm/_torch/pyexecutor/sampling_utils_flashinfer.py
@@ -137,7 +137,7 @@ def _sample_greedy_with_probs(
             group_logit_indices: Optional[torch.Tensor],
         ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
             probs = self._prepare_probs_with_temperature(logits, group_logit_indices, None)
-            new_tokens, _ = greedy_search_sampling_batch(probs, return_probs=False)
+            new_tokens, _, _ = greedy_search_sampling_batch(probs, return_probs=False)
             return new_tokens, probs
 
         @classmethod
@@ -370,7 +370,8 @@ def sample(
         ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
             if group_logit_indices is not None:
                 logits = logits[group_logit_indices]
-            return greedy_search_sampling_batch(logits, return_probs=False)
+            tokens, probs, _ = greedy_search_sampling_batch(logits, return_probs=False)
+            return tokens, probs
 
     class TopKTopPSampleOnly(StrategyImplSampleOnly):
         def __init__(self, top_k: torch.Tensor, top_p: torch.Tensor, temperature: torch.Tensor):