Skip to content

Commit 89e52f6

Browse files
committed
short circuit autotune logics when not in autotune mode; address feedback
Signed-off-by: Anthony Chang <[email protected]>
1 parent ae7eace commit 89e52f6

File tree

3 files changed

+7
-2
lines changed

3 files changed

+7
-2
lines changed

.clangd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ CompileFlags:
2323
- cuda
2424
# Allow variadic CUDA functions
2525
- "-Xclang=-fcuda-allow-variadic-functions"
26+
- "-I/mnt/trtllm-gen/amodel/cuda/gpgpu_internal/include"
2627

2728
---
2829

tensorrt_llm/_torch/custom_ops/trtllm_gen_custom_ops.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ def prepare_dummy_topk_and_hook(
5656
Tuple of (routing_logits_for_tuner, topk_weights_for_tuner, topk_ids_for_tuner, tuning_config_with_hook)
5757
"""
5858

59+
# NOTE: This prevents auto-tuning related code from being executed in actual runs
60+
tuner = AutoTuner.get()
61+
if not tuner.is_tuning_mode:
62+
return routing_logits, topk_weights, topk_ids, base_tuning_config
63+
5964
if routing_logits is None:
6065
routing_logits_for_tuner = torch.randn(hidden_states.shape[0],
6166
num_experts,

tensorrt_llm/_torch/modules/fused_moe/routing.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def routing_method_type(self):
219219
return RoutingMethodType.Default
220220

221221

222-
class Deepseekv3RoutingImpl():
222+
class Deepseekv3RoutingImpl:
223223

224224
def __init__(
225225
self,
@@ -556,7 +556,6 @@ def routing_method_type(self) -> RoutingMethodType:
556556
return RoutingMethodType.RenormalizeNaive
557557

558558

559-
# Mapping from RoutingMethodType to the corresponding class
560559
ROUTING_METHOD_TYPE_TO_CLASS: Dict[RoutingMethodType,
561560
Type[BaseMoeRoutingMethod]] = {
562561
RoutingMethodType.Default:

0 commit comments

Comments
 (0)