Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/mlir-tensorrt-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
#!/bin/bash
set -e
python3 -m black --check --extend-exclude='.*\.pyi' mlir-tensorrt/compiler/
python3 -m black --check --extend-exclude='.*\.pyi' mlir-tensorrt/python/
python3 -m black --check --extend-exclude='.*\.pyi' mlir-tensorrt/integrations/python/
git clang-format HEAD~1 --diff
EOF

Expand Down
2 changes: 1 addition & 1 deletion mlir-tensorrt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,4 @@ include_directories(${CMAKE_CURRENT_LIST_DIR}/tensorrt/include)
include_directories(${CMAKE_CURRENT_BINARY_DIR}/tensorrt/include)

add_subdirectory(compiler)
add_subdirectory(python)
add_subdirectory(integrations)
4 changes: 2 additions & 2 deletions mlir-tensorrt/build_tools/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ EOF
ARG PYTHON_VERSION=3.10
ENV PYENV_ROOT="/pyenv"
ENV PATH="/pyenv/bin:/pyenv/shims:$PATH"
COPY python/requirements-dev.txt /tmp/requirements-dev.txt
COPY python/requirements.txt /tmp/requirements.txt
COPY integrations/python/requirements-dev.txt /tmp/requirements-dev.txt
COPY integrations/python/requirements.txt /tmp/requirements.txt
RUN <<EOF
set -e
case "${LINUX_DISTRO}" in
Expand Down
2 changes: 1 addition & 1 deletion mlir-tensorrt/build_tools/scripts/build_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ py_version=${PY_VERSION:-3.10}
PATH=$PATH:/pyenv/bin
mkdir -p .private.wheels || true
pyenv local ${py_version}
python${py_version} -m pip install -r python/requirements-dev.txt
python${py_version} -m pip install -r integrations/python/requirements-dev.txt

export DOWNLOAD_TENSORRT_VERSION=${DOWNLOAD_TENSORRT_VERSION:-10.9}

Expand Down
2 changes: 1 addition & 1 deletion mlir-tensorrt/build_tools/scripts/cicd_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export ENABLE_ASAN=${ENABLE_ASAN:-OFF}
export CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE:-/.cache.cpm}
export CCACHE_DIR=${CCACHE_DIR:-/ccache}

python3 -m pip install -r python/requirements-dev.txt
python3 -m pip install -r integrations/python/requirements-dev.txt

ccache --zero-stats || true
rm -rf ${BUILD_DIR} || true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,96 +25,13 @@
#ifndef MLIR_TENSORRT_DIALECT_PLAN_ANALYSIS_BOUNDSANALYSIS
#define MLIR_TENSORRT_DIALECT_PLAN_ANALYSIS_BOUNDSANALYSIS

#include "mlir-tensorrt/Interfaces/InferTensorValueRangeInterface.h"
#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
#include "mlir/Analysis/DataFlow/SparseAnalysis.h"
#include "mlir/Interfaces/InferIntRangeInterface.h"
#include "llvm/Support/raw_ostream.h"

namespace mlir::plan {

//===----------------------------------------------------------------------===//
// BoundsArray
//===----------------------------------------------------------------------===//

/// A BoundsArray is simply an array of ConstantIntRanges used to represent
/// either the bounds on a shape of a tensor-typed SSA value or the bounds
/// of the element values of a statically shaped integer tensor-typed SSA value.
/// When it is used to represent the bounds for the value of a tensor, we use
/// a canonical packed generalized row-major layout mapping from tensor
/// coordinates to storage index.
class BoundsArray {
public:
BoundsArray(
std::optional<SmallVector<ConstantIntRanges>> value = std::nullopt)
: value(std::move(value)) {}

bool isUninitialized() const { return !value.has_value(); }

bool operator==(const BoundsArray &rhs) const { return value == rhs.value; }

ArrayRef<ConstantIntRanges> getValue() const {
assert(!isUninitialized());
return *value;
}

/// Return the most conservative integer scalar bounds for an dynamic/unknown
/// dimension extent.
static ConstantIntRanges getMaxDimRange();

/// Create a BoundsValue from the min/max bounds of shape. Using this method
/// ensures that the `value` are created with the correct storage bitwidth
/// (an implementation detail of the analysis).
static BoundsArray fromShapeBounds(ArrayRef<int64_t> min,
ArrayRef<int64_t> max);

/// Create a `BoundsValue` using the given scalar values encoded as int64_t
/// values. However, when storing the bounds, use the given bitwidth.
/// TODO: remove this when we migrate away from using
/// `#tensorrt.shape_profile` for value bounds.
static BoundsArray fromIntegerValueBounds(unsigned bitwidth,
ArrayRef<int64_t> min,
ArrayRef<int64_t> max);
static BoundsArray fromIntegerValueBounds(ArrayRef<llvm::APInt> min,
ArrayRef<llvm::APInt> max);

/// For the given tensor-typed value, return the most conservative bounds for
/// the shape of `v`. For each unknown dimension of the shape of `v` the
/// `getMaxDimRange()` bound is used.
static BoundsArray getMaxRangeForShapeBounds(Value v);

/// For the given statically shaped integer tensor-typed value, return the
/// most conservative bounds for the value of `v`.
static BoundsArray getMaxRangeForValueBounds(Value v);

/// For the given DenseIntElementsAttr, return a corresponding BoudnsValue
/// representing constant bounds as indicated by the attribute.
static BoundsArray getFromConstantValue(DenseIntElementsAttr attr);

/// Join two BoundsValues by performing a pointwise union of the integer
/// scalar a ranges.
static BoundsArray join(const BoundsArray &lhs, const BoundsArray &rhs);

/// Meet two BoundsValues by performing a pointwise intersection of the
/// integer scalar a ranges.
static BoundsArray meet(const BoundsArray &lhs, const BoundsArray &rhs);

/// Print a human-readable representation of the bounds.
void print(raw_ostream &os) const;

/// Return the min/max bounds representation as two DenseElementsAttrs.
std::pair<DenseElementsAttr, DenseElementsAttr>
getAsElementsAttr(RankedTensorType type) const;

/// Returns DenseElementsAttr representation if the element ranges are all
/// constant (single-value) ranges, otherwise nullopt.
std::optional<DenseElementsAttr>
getConstantValues(RankedTensorType type) const;

private:
std::optional<SmallVector<ConstantIntRanges>> value;
};

llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const BoundsArray &v);
using BoundsArray = mlirtrt::compiler::BoundsArray;

//===----------------------------------------------------------------------===//
// Shape Bounds Analyses
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ add_public_tablegen_target(MLIRTensorRTPlanDialectAttributesIncGen)
set(LLVM_TARGET_DEFINITIONS PlanInterfaces.td)
mlir_tablegen(PlanAttrInterfaces.h.inc -gen-attr-interface-decls)
mlir_tablegen(PlanAttrInterfaces.cpp.inc -gen-attr-interface-defs)
mlir_tablegen(PlanOpInterfaces.h.inc -gen-op-interface-decls)
mlir_tablegen(PlanOpInterfaces.cpp.inc -gen-op-interface-defs)
add_public_tablegen_target(MLIRTensorRTPlanDialectAttrInterfacesIncGen)
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "mlir-tensorrt-dialect/Interface/TensorKindOpInterface.h"
#include "mlir-tensorrt/Compiler/Extension.h"
#include "mlir-tensorrt/Dialect/Plan/IR/PlanInterfaces.h"
#include "mlir-tensorrt/Interfaces/InferTensorValueRangeInterface.h"
#include "mlir/Bytecode/BytecodeOpInterface.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/DestinationStyleOpInterface.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir-tensorrt/Interfaces/InferTensorValueRangeInterface.td"
include "mlir/IR/OpAsmInterface.td"

class Plan_NativeOpTrait<string name,
Expand Down Expand Up @@ -433,6 +434,8 @@ def Plan_WithShapeOp : Plan_Op<"with_shape",
def Plan_WithValuesOp : Plan_Op<"with_values",
[Pure,
DeclareOpInterfaceMethods<TensorKindOpInterface>,
DeclareOpInterfaceMethods<InferTensorValueRangeInterface,
["inferResultRangesFromOptional"]>,
AllTypesMatch<["operand", "result"]>]> {
let summary =
"Ties a tensor value with index SSA values representing its element values";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,9 @@ def ClusteringPass : Pass<"plan-clustering", "::mlir::ModuleOp"> {
operations will be compiled.

The kinds of clusters that can be formed and the specific rules for
clustering are defined by the clustering configuration specified
clustering are defined by the clustering configuration specified
by the module's `plan.cluster_kinds` attribute. This is an array of
attributes which all implement the
attributes which all implement the
[ClusterKindAttrInterface](../IR/PlanInterfaces.td).
}];

Expand Down Expand Up @@ -585,5 +585,35 @@ def PlanOwnershipBasedBufferDeallocationPass : Pass<
];
}

//===----------------------------------------------------------------------===//
// PlanOutlineConstantFoldableSubgraphs
//===----------------------------------------------------------------------===//

def PlanOutlineConstantFoldableSubgraphsPass : Pass<
"plan-outline-constant-foldable-subgraphs",
"::mlir::ModuleOp"> {
let summary = "Analyze and outline constant foldable subgraphs";

let description = [{
This pass implements forward dataflow analysis (named `SparseConstantFoldabilityAnalysis`)
to find out constant foldable ops. This analysis, unlike upstream
`ConstantPropagationAnalysis` is very simple and works only for pure ops.
If all operands of an operation are constant foldable, all results are marked
as constant foldable.
Constant foldability analysis is then used along with clustering to
find constant foldable subgraphs. These constant foldable subgraphs are
finally outlined to a private function with `plan.constant_foldable` attribute.
}];

let options = [
Option<"skipClustering", "skip-clustering",
"std::function<bool(Operation*)>", /*default=*/"nullptr",
"This option enables user to extend default pass behavior and skip "
"more ops from clustering. If this method returns true, `op` is not "
"clustered. When op is not clustered, it is not outlined for constant "
"folding. This is helpful in avoiding clustering of ops that can't be "
"run e2e at compile time, in the workflow of user's choice.">,
];
}

#endif // MLIR_TENSORRT_DIALECT_PLAN_TRANSFORMS_PASSES_TD
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ void registerTensorKindOpInterfaceExternalModels(DialectRegistry &registry);
/// Register StableHlo op implementations for ReifyRankedShapedTypeOpInterface.
void registerTypeInferenceExternalModels(DialectRegistry &registry);

/// Register StableHlo op implementations for InferTensorValueRangeInterface.
void registerInferTensorValueRangeInterfaceExternalModels(
DialectRegistry &registry);

} // namespace mlir::stablehlo

#endif // MLIR_TENSORRT_DIALECT_STABLEHLOEXT_IR_STABLEHLOEXT_H
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ inline void registerAllDialects(mlir::DialectRegistry &registry) {
mlir::vector::registerValueBoundsOpInterfaceExternalModels(registry);

IF_MLIR_TRT_ENABLE_HLO({
mlir::stablehlo::registerInferTensorValueRangeInterfaceExternalModels(
registry);
mlir::stablehlo::registerTensorKindOpInterfaceExternalModels(registry);
mlir::stablehlo::registerTypeInferenceExternalModels(registry);
});
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
//===- InferTensorValueRangeInterface.h --------------------------*- C++
//-*-===//
//
// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
// All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
///
/// Declarations for InferTensorValueRangeInterface.
///
//===----------------------------------------------------------------------===//
#ifndef MLIR_TENSORRT_INTERFACES_INFERTENSORVALUERANGEINTERFACE
#define MLIR_TENSORRT_INTERFACES_INFERTENSORVALUERANGEINTERFACE

#include "mlir/Interfaces/InferIntRangeInterface.h"
#include <optional>

namespace mlirtrt::compiler {

//===----------------------------------------------------------------------===//
// BoundsArray
//===----------------------------------------------------------------------===//

/// A BoundsArray is simply an array of mlir::ConstantIntRanges used to
/// represent either the bounds on a shape of a tensor-typed SSA value or the
/// bounds of the element values of a statically shaped integer tensor-typed SSA
/// value. When it is used to represent the bounds for the value of a tensor, we
/// use a canonical packed generalized row-major layout mapping from tensor
/// coordinates to storage index.
class BoundsArray {
public:
BoundsArray() : value(std::nullopt) {}

BoundsArray(llvm::ArrayRef<mlir::ConstantIntRanges> value)
: value(std::make_optional(llvm::to_vector(value))) {}

bool isUninitialized() const { return !value.has_value(); }

bool operator==(const BoundsArray &rhs) const { return value == rhs.value; }

llvm::ArrayRef<mlir::ConstantIntRanges> getValue() const {
assert(!isUninitialized());
return *value;
}

/// Return the most conservative integer scalar bounds for an dynamic/unknown
/// dimension extent.
static mlir::ConstantIntRanges getMaxDimRange();

/// Create a BoundsValue from the min/max bounds of shape. Using this method
/// ensures that the `value` are created with the correct storage bitwidth
/// (an implementation detail of the analysis).
static BoundsArray fromShapeBounds(llvm::ArrayRef<int64_t> min,
llvm::ArrayRef<int64_t> max);

/// Create a `BoundsValue` using the given scalar values encoded as int64_t
/// values. However, when storing the bounds, use the given bitwidth.
/// TODO: remove this when we migrate away from using
/// `#tensorrt.shape_profile` for value bounds.
static BoundsArray fromIntegerValueBounds(unsigned bitwidth,
llvm::ArrayRef<int64_t> min,
llvm::ArrayRef<int64_t> max);
static BoundsArray fromIntegerValueBounds(llvm::ArrayRef<llvm::APInt> min,
llvm::ArrayRef<llvm::APInt> max);

/// For the given tensor-typed value, return the most conservative bounds for
/// the shape of `v`. For each unknown dimension of the shape of `v` the
/// `getMaxDimRange()` bound is used.
static BoundsArray getMaxRangeForShapeBounds(mlir::Value v);

/// For the given statically shaped integer tensor-typed value, return the
/// most conservative bounds for the value of `v`.
static BoundsArray getMaxRangeForValueBounds(mlir::Value v);

/// For the given DenseIntElementsAttr, return a corresponding BoudnsValue
/// representing constant bounds as indicated by the attribute.
static BoundsArray getFromConstantValue(mlir::DenseIntElementsAttr attr);

/// Join two BoundsValues by performing a pointwise union of the integer
/// scalar a ranges.
static BoundsArray join(const BoundsArray &lhs, const BoundsArray &rhs);

/// Meet two BoundsValues by performing a pointwise intersection of the
/// integer scalar a ranges.
static BoundsArray meet(const BoundsArray &lhs, const BoundsArray &rhs);

/// Print a human-readable representation of the bounds.
void print(llvm::raw_ostream &os) const;

/// Return the min/max bounds representation as two DenseElementsAttrs.
std::pair<mlir::DenseElementsAttr, mlir::DenseElementsAttr>
getAsElementsAttr(mlir::RankedTensorType type) const;

/// Returns DenseElementsAttr representation if the element ranges are all
/// constant (single-value) ranges, otherwise nullopt.
std::optional<mlir::DenseElementsAttr>
getConstantValues(mlir::RankedTensorType type) const;

/// The maximum allowed volume of a tensor that we allow tracking the value
/// of. This is used to avoid edge cases where tracking the bounds would
/// require an excess amount of memory.
static constexpr int64_t kMaxVolumeThreshold = 32;

/// Whether the analysis should consider a value. To consider
/// a value, it must be a ranked tensor of static shape and signless-or-index
/// integer element type and have a total volume <= kMaxVolumeThreshold.
static bool shouldAnalyzeValueBounds(mlir::Type type);

/// Whether the analysis should consider a value. To consider
/// a value, it must be a ranked tensor of static shape and signless-or-index
/// integer element type and have a total volume <= kMaxVolumeThreshold.
static bool shouldAnalyzeValueBounds(mlir::Value value);

private:
std::optional<llvm::SmallVector<mlir::ConstantIntRanges>> value;
};

llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const BoundsArray &v);

/// Represents either a BoundsArray lattice or a InterValueRange lattice.
struct IntOrTensorValueRange
: public llvm::PointerUnion<const BoundsArray *,
const mlir::IntegerValueRange *> {
using PointerUnion::PointerUnion;
};

/// Similar to SetIntRangeFn, but operating on IntegerValueRange lattice values.
/// This is the `setResultRanges` callback for the BoundsArray based
/// interface method.
using SetTensorValueLatticeFn =
llvm::function_ref<void(mlir::Value, BoundsArray)>;

class InferTensorValueRangeInterface;

namespace detail {} // namespace detail

} // namespace mlirtrt::compiler

#include "mlir-tensorrt/Interfaces/InferTensorValueRangeInterface.h.inc"

#endif // MLIR_TENSORRT_INTERFACES_INFERTENSORVALUERANGEINTERFACE
Loading