NVIDIA · christopherbate · Jun 11, 2025 · Jun 11, 2025
@@ -100,7 +100,7 @@ jobs:
             #!/bin/bash
             set -e
             python3 -m black --check --extend-exclude='.*\.pyi' mlir-tensorrt/compiler/
-            python3 -m black --check --extend-exclude='.*\.pyi' mlir-tensorrt/python/
+            python3 -m black --check --extend-exclude='.*\.pyi' mlir-tensorrt/integrations/python/
             git clang-format HEAD~1 --diff
             EOF
 

@@ -249,4 +249,4 @@ include_directories(${CMAKE_CURRENT_LIST_DIR}/tensorrt/include)
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/tensorrt/include)
 
 add_subdirectory(compiler)
-add_subdirectory(python)
+add_subdirectory(integrations)
@@ -74,8 +74,8 @@ EOF
 ARG PYTHON_VERSION=3.10
 ENV PYENV_ROOT="/pyenv"
 ENV PATH="/pyenv/bin:/pyenv/shims:$PATH"
-COPY python/requirements-dev.txt /tmp/requirements-dev.txt
-COPY python/requirements.txt /tmp/requirements.txt
+COPY integrations/python/requirements-dev.txt /tmp/requirements-dev.txt
+COPY integrations/python/requirements.txt /tmp/requirements.txt
 RUN <<EOF
 set -e
 case "${LINUX_DISTRO}" in

@@ -9,7 +9,7 @@ py_version=${PY_VERSION:-3.10}
 PATH=$PATH:/pyenv/bin
 mkdir -p .private.wheels || true
 pyenv local ${py_version}
-python${py_version} -m pip install -r python/requirements-dev.txt
+python${py_version} -m pip install -r integrations/python/requirements-dev.txt
 
 export DOWNLOAD_TENSORRT_VERSION=${DOWNLOAD_TENSORRT_VERSION:-10.9}
 

@@ -10,7 +10,7 @@ export ENABLE_ASAN=${ENABLE_ASAN:-OFF}
 export CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE:-/.cache.cpm}
 export CCACHE_DIR=${CCACHE_DIR:-/ccache}
 
-python3 -m pip install -r python/requirements-dev.txt
+python3 -m pip install -r integrations/python/requirements-dev.txt
 
 ccache --zero-stats || true
 rm -rf ${BUILD_DIR}  || true

@@ -25,96 +25,13 @@
 #ifndef MLIR_TENSORRT_DIALECT_PLAN_ANALYSIS_BOUNDSANALYSIS
 #define MLIR_TENSORRT_DIALECT_PLAN_ANALYSIS_BOUNDSANALYSIS
 
+#include "mlir-tensorrt/Interfaces/InferTensorValueRangeInterface.h"
 #include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
 #include "mlir/Analysis/DataFlow/SparseAnalysis.h"
-#include "mlir/Interfaces/InferIntRangeInterface.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace mlir::plan {
 
-//===----------------------------------------------------------------------===//
-// BoundsArray
-//===----------------------------------------------------------------------===//
-
-/// A BoundsArray is simply an array of ConstantIntRanges used to represent
-/// either the bounds on a shape of a tensor-typed SSA value or the bounds
-/// of the element values of a statically shaped integer tensor-typed SSA value.
-/// When it is used to represent the bounds for the value of a tensor, we use
-/// a canonical packed generalized row-major layout mapping from tensor
-/// coordinates to storage index.
-class BoundsArray {
-public:
-  BoundsArray(
-      std::optional<SmallVector<ConstantIntRanges>> value = std::nullopt)
-      : value(std::move(value)) {}
-
-  bool isUninitialized() const { return !value.has_value(); }
-
-  bool operator==(const BoundsArray &rhs) const { return value == rhs.value; }
-
-  ArrayRef<ConstantIntRanges> getValue() const {
-    assert(!isUninitialized());
-    return *value;
-  }
-
-  /// Return the most conservative integer scalar bounds for an dynamic/unknown
-  /// dimension extent.
-  static ConstantIntRanges getMaxDimRange();
-
-  /// Create a BoundsValue from the min/max bounds of shape. Using this method
-  /// ensures that the `value` are created with the correct storage bitwidth
-  /// (an implementation detail of the analysis).
-  static BoundsArray fromShapeBounds(ArrayRef<int64_t> min,
-                                     ArrayRef<int64_t> max);
-
-  /// Create a `BoundsValue` using the given scalar values encoded as int64_t
-  /// values. However, when storing the bounds, use the given bitwidth.
-  /// TODO: remove this when we migrate away from using
-  /// `#tensorrt.shape_profile` for value bounds.
-  static BoundsArray fromIntegerValueBounds(unsigned bitwidth,
-                                            ArrayRef<int64_t> min,
-                                            ArrayRef<int64_t> max);
-  static BoundsArray fromIntegerValueBounds(ArrayRef<llvm::APInt> min,
-                                            ArrayRef<llvm::APInt> max);
-
-  /// For the given tensor-typed value, return the most conservative bounds for
-  /// the shape of `v`. For each unknown dimension of the shape of `v` the
-  /// `getMaxDimRange()` bound is used.
-  static BoundsArray getMaxRangeForShapeBounds(Value v);
-
-  /// For the given statically shaped integer tensor-typed value, return the
-  /// most conservative bounds for the value of `v`.
-  static BoundsArray getMaxRangeForValueBounds(Value v);
-
-  /// For the given DenseIntElementsAttr, return a corresponding BoudnsValue
-  /// representing constant bounds as indicated by the attribute.
-  static BoundsArray getFromConstantValue(DenseIntElementsAttr attr);
-
-  /// Join two BoundsValues by performing a pointwise union of the integer
-  /// scalar a ranges.
-  static BoundsArray join(const BoundsArray &lhs, const BoundsArray &rhs);
-
-  /// Meet two BoundsValues by performing a pointwise intersection of the
-  /// integer scalar a ranges.
-  static BoundsArray meet(const BoundsArray &lhs, const BoundsArray &rhs);
-
-  /// Print a human-readable representation of the bounds.
-  void print(raw_ostream &os) const;
-
-  /// Return the min/max bounds representation as two DenseElementsAttrs.
-  std::pair<DenseElementsAttr, DenseElementsAttr>
-  getAsElementsAttr(RankedTensorType type) const;
-
-  /// Returns DenseElementsAttr representation if the element ranges are all
-  /// constant (single-value) ranges, otherwise nullopt.
-  std::optional<DenseElementsAttr>
-  getConstantValues(RankedTensorType type) const;
-
-private:
-  std::optional<SmallVector<ConstantIntRanges>> value;
-};
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const BoundsArray &v);
+using BoundsArray = mlirtrt::compiler::BoundsArray;
 
 //===----------------------------------------------------------------------===//
 // Shape Bounds Analyses

@@ -17,4 +17,6 @@ add_public_tablegen_target(MLIRTensorRTPlanDialectAttributesIncGen)
 set(LLVM_TARGET_DEFINITIONS PlanInterfaces.td)
 mlir_tablegen(PlanAttrInterfaces.h.inc -gen-attr-interface-decls)
 mlir_tablegen(PlanAttrInterfaces.cpp.inc -gen-attr-interface-defs)
+mlir_tablegen(PlanOpInterfaces.h.inc -gen-op-interface-decls)
+mlir_tablegen(PlanOpInterfaces.cpp.inc -gen-op-interface-defs)
 add_public_tablegen_target(MLIRTensorRTPlanDialectAttrInterfacesIncGen)
@@ -27,6 +27,7 @@
 #include "mlir-tensorrt-dialect/Interface/TensorKindOpInterface.h"
 #include "mlir-tensorrt/Compiler/Extension.h"
 #include "mlir-tensorrt/Dialect/Plan/IR/PlanInterfaces.h"
+#include "mlir-tensorrt/Interfaces/InferTensorValueRangeInterface.h"
 #include "mlir/Bytecode/BytecodeOpInterface.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/BuiltinOps.h"

@@ -8,6 +8,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/DestinationStyleOpInterface.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir-tensorrt/Interfaces/InferTensorValueRangeInterface.td"
 include "mlir/IR/OpAsmInterface.td"
 
 class Plan_NativeOpTrait<string name,
@@ -433,6 +434,8 @@ def Plan_WithShapeOp : Plan_Op<"with_shape",
 def Plan_WithValuesOp : Plan_Op<"with_values",
     [Pure,
      DeclareOpInterfaceMethods<TensorKindOpInterface>,
+     DeclareOpInterfaceMethods<InferTensorValueRangeInterface,
+      ["inferResultRangesFromOptional"]>,
      AllTypesMatch<["operand", "result"]>]> {
   let summary =
     "Ties a tensor value with index SSA values representing its element values";

@@ -258,9 +258,9 @@ def ClusteringPass : Pass<"plan-clustering", "::mlir::ModuleOp"> {
     operations will be compiled.
 
     The kinds of clusters that can be formed and the specific rules for
-    clustering are defined by the clustering configuration specified 
+    clustering are defined by the clustering configuration specified
     by the module's `plan.cluster_kinds` attribute. This is an array of
-    attributes which all implement the 
+    attributes which all implement the
     [ClusterKindAttrInterface](../IR/PlanInterfaces.td).
   }];
 
@@ -585,5 +585,35 @@ def PlanOwnershipBasedBufferDeallocationPass : Pass<
   ];
 }
 
+//===----------------------------------------------------------------------===//
+// PlanOutlineConstantFoldableSubgraphs
+//===----------------------------------------------------------------------===//
+
+def PlanOutlineConstantFoldableSubgraphsPass : Pass<
+      "plan-outline-constant-foldable-subgraphs",
+      "::mlir::ModuleOp"> {
+  let summary = "Analyze and outline constant foldable subgraphs";
+
+  let description = [{
+    This pass implements forward dataflow analysis (named `SparseConstantFoldabilityAnalysis`)
+    to find out constant foldable ops. This analysis, unlike upstream
+    `ConstantPropagationAnalysis` is very simple and works only for pure ops.
+    If all operands of an operation are constant foldable, all results are marked
+    as constant foldable.
+    Constant foldability analysis is then used along with clustering to
+    find constant foldable subgraphs. These constant foldable subgraphs are
+    finally outlined to a private function with `plan.constant_foldable` attribute.
+  }];
+
+  let options = [
+    Option<"skipClustering", "skip-clustering",
+           "std::function<bool(Operation*)>", /*default=*/"nullptr",
+           "This option enables user to extend default pass behavior and skip "
+           "more ops from clustering. If this method returns true, `op` is not "
+           "clustered. When op is not clustered, it is not outlined for constant "
+           "folding. This is helpful in avoiding clustering of ops that can't be "
+           "run e2e at compile time, in the workflow of user's choice.">,
+  ];
+}
 
 #endif // MLIR_TENSORRT_DIALECT_PLAN_TRANSFORMS_PASSES_TD
@@ -34,6 +34,10 @@ void registerTensorKindOpInterfaceExternalModels(DialectRegistry &registry);
 /// Register StableHlo op implementations for ReifyRankedShapedTypeOpInterface.
 void registerTypeInferenceExternalModels(DialectRegistry &registry);
 
+/// Register StableHlo op implementations for InferTensorValueRangeInterface.
+void registerInferTensorValueRangeInterfaceExternalModels(
+    DialectRegistry &registry);
+
 } // namespace mlir::stablehlo
 
 #endif // MLIR_TENSORRT_DIALECT_STABLEHLOEXT_IR_STABLEHLOEXT_H
@@ -189,6 +189,8 @@ inline void registerAllDialects(mlir::DialectRegistry &registry) {
   mlir::vector::registerValueBoundsOpInterfaceExternalModels(registry);
 
   IF_MLIR_TRT_ENABLE_HLO({
+    mlir::stablehlo::registerInferTensorValueRangeInterfaceExternalModels(
+        registry);
     mlir::stablehlo::registerTensorKindOpInterfaceExternalModels(registry);
     mlir::stablehlo::registerTypeInferenceExternalModels(registry);
   });

@@ -0,0 +1,154 @@
+//===- InferTensorValueRangeInterface.h --------------------------*- C++
+//-*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Declarations for InferTensorValueRangeInterface.
+///
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_TENSORRT_INTERFACES_INFERTENSORVALUERANGEINTERFACE
+#define MLIR_TENSORRT_INTERFACES_INFERTENSORVALUERANGEINTERFACE
+
+#include "mlir/Interfaces/InferIntRangeInterface.h"
+#include <optional>
+
+namespace mlirtrt::compiler {
+
+//===----------------------------------------------------------------------===//
+// BoundsArray
+//===----------------------------------------------------------------------===//
+
+/// A BoundsArray is simply an array of mlir::ConstantIntRanges used to
+/// represent either the bounds on a shape of a tensor-typed SSA value or the
+/// bounds of the element values of a statically shaped integer tensor-typed SSA
+/// value. When it is used to represent the bounds for the value of a tensor, we
+/// use a canonical packed generalized row-major layout mapping from tensor
+/// coordinates to storage index.
+class BoundsArray {
+public:
+  BoundsArray() : value(std::nullopt) {}
+
+  BoundsArray(llvm::ArrayRef<mlir::ConstantIntRanges> value)
+      : value(std::make_optional(llvm::to_vector(value))) {}
+
+  bool isUninitialized() const { return !value.has_value(); }
+
+  bool operator==(const BoundsArray &rhs) const { return value == rhs.value; }
+
+  llvm::ArrayRef<mlir::ConstantIntRanges> getValue() const {
+    assert(!isUninitialized());
+    return *value;
+  }
+
+  /// Return the most conservative integer scalar bounds for an dynamic/unknown
+  /// dimension extent.
+  static mlir::ConstantIntRanges getMaxDimRange();
+
+  /// Create a BoundsValue from the min/max bounds of shape. Using this method
+  /// ensures that the `value` are created with the correct storage bitwidth
+  /// (an implementation detail of the analysis).
+  static BoundsArray fromShapeBounds(llvm::ArrayRef<int64_t> min,
+                                     llvm::ArrayRef<int64_t> max);
+
+  /// Create a `BoundsValue` using the given scalar values encoded as int64_t
+  /// values. However, when storing the bounds, use the given bitwidth.
+  /// TODO: remove this when we migrate away from using
+  /// `#tensorrt.shape_profile` for value bounds.
+  static BoundsArray fromIntegerValueBounds(unsigned bitwidth,
+                                            llvm::ArrayRef<int64_t> min,
+                                            llvm::ArrayRef<int64_t> max);
+  static BoundsArray fromIntegerValueBounds(llvm::ArrayRef<llvm::APInt> min,
+                                            llvm::ArrayRef<llvm::APInt> max);
+
+  /// For the given tensor-typed value, return the most conservative bounds for
+  /// the shape of `v`. For each unknown dimension of the shape of `v` the
+  /// `getMaxDimRange()` bound is used.
+  static BoundsArray getMaxRangeForShapeBounds(mlir::Value v);
+
+  /// For the given statically shaped integer tensor-typed value, return the
+  /// most conservative bounds for the value of `v`.
+  static BoundsArray getMaxRangeForValueBounds(mlir::Value v);
+
+  /// For the given DenseIntElementsAttr, return a corresponding BoudnsValue
+  /// representing constant bounds as indicated by the attribute.
+  static BoundsArray getFromConstantValue(mlir::DenseIntElementsAttr attr);
+
+  /// Join two BoundsValues by performing a pointwise union of the integer
+  /// scalar a ranges.
+  static BoundsArray join(const BoundsArray &lhs, const BoundsArray &rhs);
+
+  /// Meet two BoundsValues by performing a pointwise intersection of the
+  /// integer scalar a ranges.
+  static BoundsArray meet(const BoundsArray &lhs, const BoundsArray &rhs);
+
+  /// Print a human-readable representation of the bounds.
+  void print(llvm::raw_ostream &os) const;
+
+  /// Return the min/max bounds representation as two DenseElementsAttrs.
+  std::pair<mlir::DenseElementsAttr, mlir::DenseElementsAttr>
+  getAsElementsAttr(mlir::RankedTensorType type) const;
+
+  /// Returns DenseElementsAttr representation if the element ranges are all
+  /// constant (single-value) ranges, otherwise nullopt.
+  std::optional<mlir::DenseElementsAttr>
+  getConstantValues(mlir::RankedTensorType type) const;
+
+  /// The maximum allowed volume of a tensor that we allow tracking the value
+  /// of. This is used to avoid edge cases where tracking the bounds would
+  /// require an excess amount of memory.
+  static constexpr int64_t kMaxVolumeThreshold = 32;
+
+  /// Whether the analysis should consider a value. To consider
+  /// a value, it must be a ranked tensor of static shape and signless-or-index
+  /// integer element type and have a total volume <= kMaxVolumeThreshold.
+  static bool shouldAnalyzeValueBounds(mlir::Type type);
+
+  /// Whether the analysis should consider a value. To consider
+  /// a value, it must be a ranked tensor of static shape and signless-or-index
+  /// integer element type and have a total volume <= kMaxVolumeThreshold.
+  static bool shouldAnalyzeValueBounds(mlir::Value value);
+
+private:
+  std::optional<llvm::SmallVector<mlir::ConstantIntRanges>> value;
+};
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const BoundsArray &v);
+
+/// Represents either a BoundsArray lattice or a InterValueRange lattice.
+struct IntOrTensorValueRange
+    : public llvm::PointerUnion<const BoundsArray *,
+                                const mlir::IntegerValueRange *> {
+  using PointerUnion::PointerUnion;
+};
+
+/// Similar to SetIntRangeFn, but operating on IntegerValueRange lattice values.
+/// This is the `setResultRanges` callback for the BoundsArray based
+/// interface method.
+using SetTensorValueLatticeFn =
+    llvm::function_ref<void(mlir::Value, BoundsArray)>;
+
+class InferTensorValueRangeInterface;
+
+namespace detail {} // namespace detail
+
+} // namespace mlirtrt::compiler
+
+#include "mlir-tensorrt/Interfaces/InferTensorValueRangeInterface.h.inc"
+
+#endif // MLIR_TENSORRT_INTERFACES_INFERTENSORVALUERANGEINTERFACE