fix infinite looping issue in transpose elimination pass that transpose reshape elimination builds on

Matthew Francis-Landau · Matthew Francis-Landau · commit c3b4d85fb644 · 2025-09-25T12:14:20.000-07:00
diff --git a/mlir-tensorrt/tensorrt/lib/TensorRT/Transforms/TransposeReshapeElimination.cpp b/mlir-tensorrt/tensorrt/lib/TensorRT/Transforms/TransposeReshapeElimination.cpp
@@ -64,13 +64,19 @@ static TransposeOp getLowestTransposeCost(ElementWiseOp consumer,
   int64_t cost1 = memoryCost(consumer.getType()) + memoryCost(op2.getType());
   int64_t cost2 = memoryCost(consumer.getType()) + memoryCost(op1.getType());
   LLVM_DEBUG(DBGS() << "cost1=" << cost1 << ", cost2=" << cost2 << "\n");
+  if (cost1 == 0 && cost2 == 0)
+    return {};
   return cost1 <= cost2 ? op1 : op2;
 }
 
 static std::pair<TransposeOp, TransposeOp>
 getTransposeProducers(ElementWiseOp op) {
   auto producer1 = op.getInput1().getDefiningOp<TransposeOp>();
   auto producer2 = op.getInput2().getDefiningOp<TransposeOp>();
+  if (producer1 && producer1.getInput().getDefiningOp<ConstantOp>())
+    producer1 = {};
+  if (producer2 && producer2.getInput().getDefiningOp<ConstantOp>())
+    producer2 = {};
   return std::make_pair(producer1, producer2);
 }
 
@@ -760,6 +766,7 @@ class EinsumPushDownTranspose : public OpRewritePattern<tensorrt::EinsumOp> {
     if (newEinsumRhs == equation.rhs)
       return failure(); // no change
 
+    equation.rhs = newEinsumRhs;
     std::string newEinsumEquation = equation.generateEquation();
 
     auto newEinsum = rewriter.create<tensorrt::EinsumOp>(
@@ -771,6 +778,7 @@ class EinsumPushDownTranspose : public OpRewritePattern<tensorrt::EinsumOp> {
         AffineMap::getPermutationMap(outputPerm, op.getLoc().getContext()));
 
     rewriter.replaceOp(op, newTranspose.getResult());
+
     return success();
   }
 };
diff --git a/mlir-tensorrt/tensorrt/test/Dialect/TensorRT/transpose-elimination.mlir b/mlir-tensorrt/tensorrt/test/Dialect/TensorRT/transpose-elimination.mlir
@@ -453,4 +453,35 @@ func.func @push_up_transpose_elementwise_reshape_transpose_neg(%arg0: tensor<10x
 //  CHECK-NEXT: %[[v1:.+]] = tensorrt.transpose {permutation = #[[$map]]} %[[arg1]]
 //  CHECK-NEXT: %[[v2:.+]] = tensorrt.element_wise <kDIV>(%[[v1]], %[[v0]] : {{.*}})
 //  CHECK-NEXT: %[[v3:.+]] = tensorrt.transpose {permutation = #[[$map1]]} %[[v2]]
-//  CHECK-NEXT: return %[[v3]]
+//  CHECK-NEXT: return %[[v3]]
+
+// -----
+
+#map = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
+func.func @transpose_rearrange_loop(%arg0: tensor<512x7x24xf32>, %arg1: tensor<512x7x7xf32>) -> tensor<7x512x24xf32> {
+  %0 = tensorrt.matrix_multiply {op0 = #tensorrt.matrix_operation<kNONE>, op1 = #tensorrt.matrix_operation<kNONE>} ins(%arg1, %arg0 : tensor<512x7x7xf32>, tensor<512x7x24xf32>) -> tensor<512x7x24xf32>
+  %1 = tensorrt.transpose {permutation = #map} %0 : tensor<512x7x24xf32> to tensor<7x512x24xf32>
+  return %1 : tensor<7x512x24xf32>
+}
+
+// CHECK: @transpose_rearrange_loop(%[[arg0:.+]]: tensor<512x7x24xf32>, %[[arg1:.+]]: tensor<512x7x7xf32>)
+// CHECK: %[[v0:.+]] =  tensorrt.einsum {equation = [[equation:.+]]} ins(%[[arg1]], %[[arg0]] : tensor<512x7x7xf32>, tensor<512x7x24xf32>)
+// CHECK: return %[[v0]]
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4, d2)>
+func.func @element_wise_with_two_constants() -> tensor<1x8x20x35x192xf32> {
+  %cst_f32 = tensorrt.constant dense_resource<__elided__> : tensor<1x8x192x20x35xf32>
+  %cst_f32_0 = tensorrt.constant dense_resource<__elided__> : tensor<1x8x20x35x192xf32>
+  %1 = tensorrt.transpose {permutation = #map} %cst_f32 : tensor<1x8x192x20x35xf32> to tensor<1x8x20x35x192xf32>
+  %2 = tensorrt.element_wise <kSUM>(%1, %cst_f32_0 : tensor<1x8x20x35x192xf32>, tensor<1x8x20x35x192xf32>) -> tensor<1x8x20x35x192xf32>
+  return %2 : tensor<1x8x20x35x192xf32>
+}
+
+// CHECK: @element_wise_with_two_constants()
+// CHECK: %[[const0:.+]] = tensorrt.constant dense_resource<__elided__> : tensor<1x8x192x20x35xf32>
+// CHECK: %[[const1:.+]] = tensorrt.constant dense_resource<__elided__> : tensor<1x8x20x35x192xf32>
+// CHECK: %[[v0:.+]] = tensorrt.transpose {permutation = #map} %[[const0]]
+// CHECK: %[[v1:.+]] = tensorrt.element_wise <kSUM>(%[[v0]], %[[const1]]
+// CHECK: return %[[v1]]
diff --git a/mlir-tensorrt/tensorrt/test/Dialect/TensorRT/transpose-reshape-elimination.mlir b/mlir-tensorrt/tensorrt/test/Dialect/TensorRT/transpose-reshape-elimination.mlir
@@ -177,6 +177,9 @@ func.func @reshape_with_one(%arg0: tensor<2x3x4x5xf32>) -> tensor<2x3x4x6xf32> {
 
 // -----
 
+// CHECK: matmul_eliminate_reshape_lhs_2(%[[arg0:.+]]: tensor<1x2x3x4x5x6xf16>, %[[arg1:.+]]: tensor<1x2x6x8xf16>)
+// CHECK: %[[v0:.+]] = tensorrt.einsum {equation = [[equation:.+]]} ins(%[[arg0]], %[[arg1]] : tensor<1x2x3x4x5x6xf16>, tensor<1x2x6x8xf16>) -> tensor<1x2x3x4x5x8xf16>
+// CHECK: return %[[v0]]
 func.func @matmul_eliminate_reshape_lhs_2(%arg0: tensor<1x2x3x4x5x6xf16>, %arg1: tensor<1x2x6x8xf16>) -> tensor<1x2x3x4x5x8xf16>{
     %0 = tensorrt.reshape %arg0 : tensor<1x2x3x4x5x6xf16> to tensor<1x2x60x6xf16>
     %1 = tensorrt.matrix_multiply {op0 = #tensorrt.matrix_operation<kNONE>, op1 = #tensorrt.matrix_operation<kNONE>}