short circuit autotune logics when not in autotune mode; address feedback

rosenrodt · rosenrodt · commit 89e52f6c5939 · 2025-12-04T00:14:15.000+08:00
Signed-off-by: Anthony Chang &lt;27950904+rosenrodt@users.noreply.github.com&gt;
diff --git a/.clangd b/.clangd
@@ -23,6 +23,7 @@ CompileFlags:
     - cuda
     # Allow variadic CUDA functions
     - "-Xclang=-fcuda-allow-variadic-functions"
+    - "-I/mnt/trtllm-gen/amodel/cuda/gpgpu_internal/include"
 
 ---
 
diff --git a/tensorrt_llm/_torch/custom_ops/trtllm_gen_custom_ops.py b/tensorrt_llm/_torch/custom_ops/trtllm_gen_custom_ops.py
@@ -56,6 +56,11 @@ def prepare_dummy_topk_and_hook(
         Tuple of (routing_logits_for_tuner, topk_weights_for_tuner, topk_ids_for_tuner, tuning_config_with_hook)
     """
 
+    # NOTE: This prevents auto-tuning related code from being executed in actual runs
+    tuner = AutoTuner.get()
+    if not tuner.is_tuning_mode:
+        return routing_logits, topk_weights, topk_ids, base_tuning_config
+
     if routing_logits is None:
         routing_logits_for_tuner = torch.randn(hidden_states.shape[0],
                                                num_experts,
diff --git a/tensorrt_llm/_torch/modules/fused_moe/routing.py b/tensorrt_llm/_torch/modules/fused_moe/routing.py
@@ -219,7 +219,7 @@ def routing_method_type(self):
         return RoutingMethodType.Default
 
 
-class Deepseekv3RoutingImpl():
+class Deepseekv3RoutingImpl:
 
     def __init__(
             self,
@@ -556,7 +556,6 @@ def routing_method_type(self) -> RoutingMethodType:
         return RoutingMethodType.RenormalizeNaive
 
 
-# Mapping from RoutingMethodType to the corresponding class
 ROUTING_METHOD_TYPE_TO_CLASS: Dict[RoutingMethodType,
                                    Type[BaseMoeRoutingMethod]] = {
                                        RoutingMethodType.Default: