Re-enable the activation fusion

nvchenghaoz · nvchenghaoz · commit ad94c87e4b24 · 2025-11-24T13:15:25.000-08:00
Signed-off-by: Chenghao Zhang &lt;211069071+nvchenghaoz@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/fuse_causal_conv.py b/tensorrt_llm/_torch/auto_deploy/transform/library/fuse_causal_conv.py
@@ -85,10 +85,17 @@ def _apply(
     ) -> Tuple[GraphModule, TransformInfo]:
         graph = gm.graph
 
+        # Import wrapper to match against
+        # We use the wrapper because the underlying op returns None (void) to avoid aliasing,
+        # but the wrapper returns the tensor to maintain graph data flow.
+        from ...custom_ops.mamba.cuda_backend_causal_conv import cuda_cached_causal_conv1d_wrapper
+
+        target_op = cuda_cached_causal_conv1d_wrapper
+
         # Step 1: Identify causal_conv + activation pattern
         matches = _match_causal_conv_activation_pattern(
             graph,
-            target_op=torch.ops.auto_deploy.cuda_cached_causal_conv1d,
+            target_op=target_op,
         )
 
         # Step 2: Replace matched patterns with fused version
@@ -98,7 +105,7 @@ def _apply(
                 # Replace the last arg (activation=None) with activation_name
                 new_args = list(conv_node.args[:-1]) + [activation_name]
                 fused_node = graph.call_function(
-                    torch.ops.auto_deploy.cuda_cached_causal_conv1d,
+                    target_op,
                     args=tuple(new_args),
                 )