Update layernorm to directly use TRT API (#624)

akhilg-nv · web-flow · commit 236905b5deab · 2025-05-09T14:13:59.000-07:00
diff --git a/tripy/nvtripy/frontend/module/layernorm.py b/tripy/nvtripy/frontend/module/layernorm.py
@@ -24,6 +24,41 @@
 from nvtripy.frontend.module.parameter import DefaultParameter
 from nvtripy.frontend.tensor import Tensor
 
+from nvtripy.frontend.ops import utils as op_utils
+from nvtripy.utils import wrappers
+from nvtripy.trace.ops.layernorm import LayerNorm as LayerNormOp
+
+
+@wrappers.interface(
+    dtype_constraints={"input": "T1", "weight": "T1", "bias": "T1", wrappers.RETURN_VALUE: "T1"},
+    dtype_variables={"T1": ["float32", "float16", "bfloat16"]},
+)
+def layernorm(
+    input: "nvtripy.Tensor",
+    weight: "nvtripy.Tensor",
+    bias: "nvtripy.Tensor",
+    eps: float,
+) -> "nvtripy.Tensor":
+
+    normalized_shape = weight.shape
+    D = len(normalized_shape)
+    input_rank = input.rank
+
+    # Reshape weight and bias to match input rank for TensorRT normalization (expects [1, ...] + normalized_shape)
+    if input_rank > D:
+        from nvtripy.frontend.ops.reshape import reshape
+
+        broadcast_shape = (1,) * (input_rank - D) + normalized_shape
+        weight = reshape(weight, broadcast_shape)
+        bias = reshape(bias, broadcast_shape)
+
+    return op_utils.create_op(
+        LayerNormOp,
+        [input, weight, bias],
+        normalized_shape=normalized_shape,
+        eps=eps,
+    )
+
 
 @export.public_api(document_under="operations/modules")
 @dataclass
@@ -109,14 +144,4 @@ def forward(self, x: "nvtripy.Tensor") -> "nvtripy.Tensor":
         Returns:
             A tensor of the same shape as the input.
         """
-        from nvtripy.frontend.ops.reduce.mean import mean
-        from nvtripy.frontend.ops.reduce.var import var
-        from nvtripy.frontend.ops.unary.rsqrt import rsqrt
-
-        # The mean and the variance are computed over the last D dimensions
-        D = len(self.normalized_shape)
-        reduce_dims = tuple(-i for i in range(D, 0, -1))
-        mean_val = mean(x, dim=reduce_dims, keepdim=True)
-        var_val = var(x, dim=reduce_dims, keepdim=True, correction=0) + self.eps
-        x = (x - mean_val) * rsqrt(var_val)
-        return self.weight * x + self.bias
+        return layernorm(x, self.weight, self.bias, self.eps)
diff --git a/tripy/nvtripy/trace/ops/layernorm.py b/tripy/nvtripy/trace/ops/layernorm.py
@@ -0,0 +1,42 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from dataclasses import dataclass
+from typing import Sequence, Tuple
+import nvtripy.trace.ops.utils as op_utils
+from nvtripy.trace.ops.base import TraceOp
+from mlir_tensorrt.compiler.dialects import tensorrt
+
+from mlir_tensorrt.compiler import ir
+
+
+@dataclass(repr=False)
+class LayerNorm(TraceOp):
+    normalized_shape: Sequence[int]
+    eps: float = 1e-5
+
+    infer_rank = op_utils.InferRankPolicies.same_as_input()
+
+    def infer_dtypes(self):
+        self.outputs[0].dtype = self.inputs[0].dtype
+
+    def to_mlir(self, inputs, outputs):
+        rank = outputs[0].rank
+        D = len(self.normalized_shape)
+        axis = ir.DenseI64ArrayAttr.get(list(range(rank - D, rank)))
+
+        return [tensorrt.normalization(inputs[0], inputs[1], inputs[2], axis=axis, eps=self.eps, num_groups=1)]
diff --git a/tripy/tests/frontend/module/test_layernorm.py b/tripy/tests/frontend/module/test_layernorm.py
@@ -27,5 +27,7 @@ def test_layernorm_improper_dimensions(self):
         tp_layernorm.bias = tp.ones((2, 2))
 
         x = tp.ones((5, 5, 5))
-        with helper.raises(tp.TripyException, match="broadcast dimensions must be conformable"):
+        with helper.raises(
+            tp.TripyException, match="The normalization scale is not broadcast-compatible with the input at dimension 1"
+        ):
             tp_layernorm(x).eval()