fix ci

QiJune · QiJune · commit a396d0695c73 · 2025-10-10T14:51:45.000+08:00
Signed-off-by: junq &lt;22017000+QiJune@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py b/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py
@@ -80,7 +80,7 @@ def __init__(self, config: CUDAGraphRunnerConfig):
 
     def _create_shared_static_tensors(self):
         """Allocates static tensors sized for the largest possible batch."""
-        max_draft_len = self.config.original_max_draft_len if self.config.is_spec_decode else 0
+        max_draft_len = self.config.original_max_draft_len if self.config.spec_config is not None else 0
         token_per_request = max_draft_len + 1
         max_total_tokens = (self.max_supported_batch_size *
                             self.max_beam_width * token_per_request)
@@ -192,7 +192,7 @@ def capture(self,
                 key: Tuple[int, int, int],
                 forward_fn: Callable,
                 initial_inputs: Dict[str, Any],
-                enable_spec_decode: bool,
+                enable_spec_decode: bool = False,
                 postprocess_fn: Optional[Callable] = None):
         """Captures the forward pass for a given batch size."""
         batch_size = key[0]
@@ -358,8 +358,10 @@ def _round_up_batch_size(self, batch_size: int) -> int:
         return self.supported_batch_sizes[idx]
 
     @contextlib.contextmanager
-    def pad_batch(self, scheduled_requests: ScheduledRequests,
-                  resource_manager: ResourceManager, runtime_draft_len: int):
+    def pad_batch(self,
+                  scheduled_requests: ScheduledRequests,
+                  resource_manager: ResourceManager,
+                  runtime_draft_len: int = 0):
         """Context manager to pad a batch to a graph-compatible size."""
         padding_size = self._get_padded_batch(scheduled_requests,
                                               resource_manager,
diff --git a/tests/unittest/_torch/helpers.py b/tests/unittest/_torch/helpers.py
@@ -173,16 +173,17 @@ def create_mock_cuda_graph_runner(batch_size: int, use_mrope: bool = False):
     config = CUDAGraphRunnerConfig(
         use_cuda_graph=True,
         cuda_graph_padding_enabled=False,
-        supported_batch_sizes=[batch_size],
-        max_supported_batch_size=batch_size,
-        max_batch_size=batch_size,
+        cuda_graph_batch_sizes=[batch_size],
+        max_cuda_graph_batch_size=batch_size,
+        batch_size=batch_size,
         max_beam_width=1,
-        max_draft_len=0,
         max_num_tokens=1,
         use_mrope=use_mrope,
         spec_config=None,
         cuda_graph_mem_pool=None,
         enable_attention_dp=False,
+        original_max_draft_len=0,
+        is_draft_model=False,
         mapping=Mapping(),
         dist=None,
         kv_cache_manager_key=ResourceManagerType.KV_CACHE_MANAGER)