Adds support for a dimension names attribute for block arguments

pranavm-nvidia · pranavm-nvidia · commit 34df3e223695 · 2025-06-11T09:27:21.000-07:00
Adds support for a dimension names attribute which is used to set dimension
names in TensorRT. This conveys that the dimensions are equal at runtime.
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
@@ -240,13 +240,17 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
 
   StringRef tensorrtShapeBoundsAttrName =
       mlir::tensorrt::TensorRTDialect::getShapeProfileArgAttrName();
+  StringRef tensorrtDimensionNamesAttrName =
+      mlir::tensorrt::TensorRTDialect::getDimensionNamesArgAttrName();
   func::FuncOp funcContainingCluster =
       cluster.back()->getParentOfType<func::FuncOp>();
   SmallVector<Attribute> profileAttrsPerInput;
+  SmallVector<Attribute> dimensionNamesAttrsPerInput;
   for (Value v : inputs) {
     auto rtt = dyn_cast<RankedTensorType>(v.getType());
     if (!rtt || rtt.hasStaticShape()) {
       profileAttrsPerInput.push_back(Attribute{});
+      dimensionNamesAttrsPerInput.push_back(Attribute{});
       continue;
     }
 
@@ -263,6 +267,10 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
         funcContainingCluster.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
             argIndex, tensorrtShapeBoundsAttrName));
 
+    dimensionNamesAttrsPerInput.push_back(
+        funcContainingCluster.getArgAttrOfType<DictionaryAttr>(
+            argIndex, tensorrtDimensionNamesAttrName));
+
     if (!profileAttrsPerInput.back()) {
       return emitError(blockArg.getLoc())
              << "Profile attribute (" << tensorrtShapeBoundsAttrName
@@ -271,10 +279,12 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
   }
 
   for (unsigned idx = 0; idx < func->getNumArguments(); idx++) {
-    if (!profileAttrsPerInput[idx])
-      continue;
-    func->setArgAttr(idx, tensorrtShapeBoundsAttrName,
-                     profileAttrsPerInput[idx]);
+    if (profileAttrsPerInput[idx])
+      func->setArgAttr(idx, tensorrtShapeBoundsAttrName,
+                       profileAttrsPerInput[idx]);
+    if (dimensionNamesAttrsPerInput[idx])
+      func->setArgAttr(idx, tensorrtDimensionNamesAttrName,
+                       dimensionNamesAttrsPerInput[idx]);
   }
 
   rewriter.setInsertionPoint(inlineGroupOp);
diff --git a/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.td b/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.td
@@ -38,6 +38,12 @@ def TensorRT_Dialect : Dialect {
       return "tensorrt.shape_profile";
     }
 
+    /// Return the name of the function arg attr that encodes
+    /// the dimension names. It should have a type `DictionaryAttr`.
+    static StringRef getDimensionNamesArgAttrName() {
+      return "tensorrt.dimension_names";
+    }
+
     /// TensorRT quantization and dequantization mode markers.
     static constexpr StringRef kTensorRTPerTensorQuantizationMarker = "tensorrt.pt_q";
     static constexpr StringRef kTensorRTPerChannelQuantizationMarker = "tensorrt.pc_q";
diff --git a/mlir-tensorrt/tensorrt/lib/Target/TensorRTEncodingOpInterface/NetworkEncoder.cpp b/mlir-tensorrt/tensorrt/lib/Target/TensorRTEncodingOpInterface/NetworkEncoder.cpp
@@ -27,6 +27,7 @@
 #include "mlir-tensorrt-dialect/TensorRT/Utils/Utils.h"
 #include "mlir-tensorrt-dialect/Utils/NvInferAdaptor.h"
 #include "mlir-tensorrt-dialect/Utils/StaticValueUtils.h"
+#include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
 #include "llvm/ADT/STLExtras.h"
@@ -922,6 +923,34 @@ LogicalResult NvInferNetworkEncoder::encodeFunc(FunctionOpInterface func) {
       return failure();
     nvinfer1::ITensor *inputTensor =
         getNetworkDefinition()->addInput(name.c_str(), *dtype, trtShape);
+
+    // setDimensionName must be called immediately after addInput, or TensorRT
+    // will not deduplicate equal dimensions, which leads to perf gaps.
+    auto dimNamesAttr = func.getArgAttrOfType<DictionaryAttr>(
+        arg.getArgNumber(), TensorRTDialect::getDimensionNamesArgAttrName());
+    if (dimNamesAttr) {
+      for (NamedAttribute namedAttr : dimNamesAttr) {
+        int32_t key;
+        if (namedAttr.getName().getValue().getAsInteger(10, key))
+          return func->emitOpError()
+                 << "dimension name key '" << namedAttr.getName()
+                 << "' is not an integer";
+
+        if (key < 0 || key >= argType.getRank())
+          return func->emitOpError()
+                 << "dimension name key '" << key
+                 << "' is out of bounds for rank " << argType.getRank();
+
+        StringAttr strAttr = dyn_cast<StringAttr>(namedAttr.getValue());
+        if (!strAttr)
+          return func->emitOpError()
+                 << "dimension name value '" << namedAttr.getValue()
+                 << "' is not a string";
+
+        inputTensor->setDimensionName(key, strAttr.getValue().str().c_str());
+      }
+    }
+
     if (!usesStronglyTyped && dtype == nvinfer1::DataType::kINT8)
       setIdentityInt8DynamicRange(inputTensor);
     this->map(arg, inputTensor);
diff --git a/mlir-tensorrt/tensorrt/test/Target/TensorRT/translate-to-tensorrt.mlir b/mlir-tensorrt/tensorrt/test/Target/TensorRT/translate-to-tensorrt.mlir
@@ -54,3 +54,14 @@ func.func @trt_reduce(%arg0: tensor<1024x1024xf32>) -> tensor<1024x1xf32> {
 func.func @input_passthrough(%arg0: tensor<1xf32>, %arg1: tensor<1xf16>, %arg2: tensor<1xi32>) -> (tensor<1xf32>, tensor<1xf32>, tensor<1xf16>, tensor<1xi32>) {
   return %arg0, %arg0, %arg1, %arg2: tensor<1xf32>, tensor<1xf32>, tensor<1xf16>, tensor<1xi32>
 }
+
+
+// CHECK-LABEL: @trt_dim_names
+//  CHECK-SAME: tensorrt.engine
+func.func @trt_dim_names(
+  %arg0: tensor<?x?xf32> {tensorrt.dimension_names = {"0" = "batch", "1" = "features"}, tensorrt.shape_profile = #tensorrt.shape_profile<min=[2, 2], opt=[5, 5], max=[10, 10]>},
+  %arg1: tensor<?x?xf32> {tensorrt.dimension_names = {"0" = "batch", "1" = "features"}, tensorrt.shape_profile = #tensorrt.shape_profile<min=[2, 2], opt=[5, 5], max=[10, 10]>},
+  %arg2: tensor<2x10xf32>) -> tensor<?x?xf32> {
+  %0 = tensorrt.identity %arg0 : tensor<?x?xf32> to tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}