fixed nemotron sharding

greg-kwasniewski1 · greg-kwasniewski1 · commit 48e7178f066c · 2025-11-18T06:53:41.000-08:00
Signed-off-by: greg-kwasniewski1 &lt;213329731+greg-kwasniewski1@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py b/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py
@@ -641,7 +641,7 @@ def detect_sharding_from_factory_config(
                             world_size=world_size,
                             dist_op=None,
                             min_local_shape=min_local_shape,
-                            layer_type=LayerType.MAMBA,
+                            layer_type=LayerType.MAMBA_FULL,
                         )
                     )
                     num_row_col_shards += 1
diff --git a/tensorrt_llm/_torch/auto_deploy/utils/sharding_utils.py b/tensorrt_llm/_torch/auto_deploy/utils/sharding_utils.py
@@ -557,6 +557,7 @@ def check_and_apply(self, gm: GraphModule, node: Node) -> bool:
 class LayerType(Enum):
     ATTENTION = "attention"
     MAMBA = "mamba"
+    MAMBA_FULL = "mamba_full"
     MLP = "mlp"
     MOE = "moe"
 
@@ -612,7 +613,7 @@ def validate(self, gm: GraphModule = None, node: Node = None) -> bool:
 
     def apply(self, gm: GraphModule, node: Node) -> None:
         """Apply TP sharding transformation to the graph module."""
-        if self.layer_type == LayerType.MAMBA:
+        if self.layer_type == LayerType.MAMBA_FULL:
             _insert_sharded_mamba(
                 gm=gm,
                 entry_node=node,

Original file line number	Diff line number	Diff line change
`@@ -641,7 +641,7 @@ def detect_sharding_from_factory_config(`
`641`	`641`	`world_size=world_size,`
`642`	`642`	`dist_op=None,`
`643`	`643`	`min_local_shape=min_local_shape,`
`644`		`- layer_type=LayerType.MAMBA,`
	`644`	`+ layer_type=LayerType.MAMBA_FULL,`
`645`	`645`	`)`
`646`	`646`	`)`
`647`	`647`	`num_row_col_shards += 1`