[compiler] Fix stablehlo-ext-constant-folding bug in "absorb tensor.cast" pattern

shelkesagar29 · Copybara Bot · web-flow · commit 3353e18b8aa7 · 2024-11-25T18:23:49.000-07:00
This PR move the following internal commits to OSS [compiler] Fix `stablehlo-ext-constant-folding` bug in "absorb tensor.cast" pattern Fixes an issue where we incorrectly assume all StableHLO operations have tensor operands. There are other types which can be used by various ops --- `stablehlo.token` and `tuple` at least (see https://openxla.org/stablehlo/spec#types). GitOrigin-RevId: 5415c7a0db725232fa30086c27ca38e70d28d0eb Co-authored-by: Copybara Bot <mlir-tensorrt-devs@exchange.nvidia.com>
diff --git a/mlir-tensorrt/compiler/lib/Dialect/StableHloExt/Transforms/ConstantFolding.cpp b/mlir-tensorrt/compiler/lib/Dialect/StableHloExt/Transforms/ConstantFolding.cpp
@@ -1038,7 +1038,11 @@ struct AbsorbTensorCastProducer : public RewritePattern {
       if (!canUpdateTypeWithoutCast(operand))
         return nullptr;
       Value value = operand.get();
-      auto rtt = cast<RankedTensorType>(value.getType());
+      // Not all stablehlo operands are tensors -- some can have types like
+      // 'tuple' or special quantized types.
+      auto rtt = dyn_cast<RankedTensorType>(value.getType());
+      if (!rtt)
+        return nullptr;
       auto castOp = value.getDefiningOp<tensor::CastOp>();
       if (!castOp)
         return nullptr;
@@ -1273,7 +1277,27 @@ class SimplifyConcatOfConcatPattern
   }
 };
 
+// Pattern: broadcast_in_dim(splat, _) -> constant(splat)
+struct FoldBroadcastInDimSplatPattern final
+    : OpRewritePattern<mlir::stablehlo::BroadcastInDimOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(mlir::stablehlo::BroadcastInDimOp op,
+                                PatternRewriter &rewriter) const override {
+    TypedValue<RankedTensorType> operand = op.getOperand();
+
+    if (SplatElementsAttr cstAttr;
+        matchPattern(operand, m_Constant(&cstAttr))) {
+      rewriter.replaceOpWithNewOp<mlir::stablehlo::ConstantOp>(
+          op, SplatElementsAttr::get(op.getType(),
+                                     cstAttr.getSplatValue<Attribute>()));
+      return success();
+    }
+    return failure();
+  }
+};
+
 void populateFutureUpstreamPatterns(RewritePatternSet &patterns) {
-  patterns.add<SimplifySliceOfConcat, SimplifyConcatOfConcatPattern>(
-      patterns.getContext());
+  patterns.add<SimplifySliceOfConcat, SimplifyConcatOfConcatPattern,
+               FoldBroadcastInDimSplatPattern>(patterns.getContext());
 }
diff --git a/mlir-tensorrt/tensorrt/lib/Target/TranslateToTensorRT.cpp b/mlir-tensorrt/tensorrt/lib/Target/TranslateToTensorRT.cpp
@@ -803,6 +803,9 @@ class TranslateToTensorRTEnginePass
         continue;
       }
 
+      LLVM_DEBUG(DBGS() << "starting to build TensorRT engine for function "
+                        << func.getName() << "\n");
+
       FailureOr<TensorRTEngineResult> engineResult =
           buildFunction(func, *builderContext, *timingCache, translationOptions,
                         layerMetadataCallback);
@@ -811,6 +814,10 @@ class TranslateToTensorRTEnginePass
                          << "' to a TensorRT engine";
         return signalPassFailure();
       }
+
+      LLVM_DEBUG(DBGS() << "done building TensorRT engine for function "
+                        << func.getName() << "\n");
+
       const std::unique_ptr<nvinfer1::IHostMemory> &serializedEngine =
           engineResult->serializedEngine;
 
diff --git a/mlir-tensorrt/test/Dialect/StableHloExt/constant-folding.mlir b/mlir-tensorrt/test/Dialect/StableHloExt/constant-folding.mlir
@@ -1141,3 +1141,19 @@ func.func private @add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf
 //   CHECK-DAG:     %[[cast_0:.+]] = tensor.cast %[[arg1]] : tensor<4xf32> to tensor<?xf32>
 //   CHECK-DAG:     %[[v0:.+]] = stablehlo.composite "foo.bar" %[[cast]], %[[cast_0]] {decomposition = @add} : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
 //   CHECK-DAG:     return %[[v0]] : tensor<?xf32>
+
+
+// -----
+
+// This is a regression check for where we previously had a crash/failure. Not change should be
+// made.
+
+func.func @tuple_regression_check(%arg0: tuple<tensor<1xf32>, tensor<1xf32>>) -> tensor<1xf32> {
+  %0 = stablehlo.get_tuple_element %arg0[0] : (tuple<tensor<1xf32>, tensor<1xf32>>) -> tensor<1xf32>
+  return %0 : tensor<1xf32>
+}
+
+// CHECK-LABEL: func.func @tuple_regression_check
+//  CHECK-SAME: (%[[arg0:.+]]: tuple<tensor<1xf32>, tensor<1xf32>>)
+//       CHECK:     %[[v0:.+]] = stablehlo.get_tuple_element %[[arg0]][0] : (tuple<tensor<1xf32>, tensor<1xf32>>) -> tensor<1xf32>
+//       CHECK:     return %[[v0]] : tensor<1xf32>