Adds support for a dimension names attribute for block arguments

pranavm-nvidia · pranavm-nvidia · commit 006bde8e68cb · 2025-06-09T09:38:04.000-07:00
Adds support for a dimension names attribute which is used to set dimension
names in TensorRT. This conveys that the dimensions are equal at runtime.
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
@@ -240,13 +240,17 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
 
   StringRef tensorrtShapeBoundsAttrName =
       mlir::tensorrt::TensorRTDialect::getShapeProfileArgAttrName();
+  StringRef tensorrtDimensionNamesAttrName =
+      mlir::tensorrt::TensorRTDialect::getDimensionNamesArgAttrName();
   func::FuncOp funcContainingCluster =
       cluster.back()->getParentOfType<func::FuncOp>();
   SmallVector<Attribute> profileAttrsPerInput;
+  SmallVector<Attribute> dimensionNamesAttrsPerInput;
   for (Value v : inputs) {
     auto rtt = dyn_cast<RankedTensorType>(v.getType());
     if (!rtt || rtt.hasStaticShape()) {
       profileAttrsPerInput.push_back(Attribute{});
+      dimensionNamesAttrsPerInput.push_back(Attribute{});
       continue;
     }
 
@@ -263,6 +267,10 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
         funcContainingCluster.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
             argIndex, tensorrtShapeBoundsAttrName));
 
+    dimensionNamesAttrsPerInput.push_back(
+        funcContainingCluster.getArgAttrOfType<DictionaryAttr>(
+            argIndex, tensorrtDimensionNamesAttrName));
+
     if (!profileAttrsPerInput.back()) {
       return emitError(blockArg.getLoc())
              << "Profile attribute (" << tensorrtShapeBoundsAttrName
@@ -271,10 +279,12 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
   }
 
   for (unsigned idx = 0; idx < func->getNumArguments(); idx++) {
-    if (!profileAttrsPerInput[idx])
-      continue;
-    func->setArgAttr(idx, tensorrtShapeBoundsAttrName,
-                     profileAttrsPerInput[idx]);
+    if (profileAttrsPerInput[idx])
+      func->setArgAttr(idx, tensorrtShapeBoundsAttrName,
+                       profileAttrsPerInput[idx]);
+    if (dimensionNamesAttrsPerInput[idx])
+      func->setArgAttr(idx, tensorrtDimensionNamesAttrName,
+                       dimensionNamesAttrsPerInput[idx]);
   }
 
   rewriter.setInsertionPoint(inlineGroupOp);
diff --git a/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.td b/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.td
@@ -38,6 +38,12 @@ def TensorRT_Dialect : Dialect {
       return "tensorrt.shape_profile";
     }
 
+    /// Return the name of the function arg attr that encodes
+    /// the dimension names. It should have a type `DictionaryAttr`.
+    static StringRef getDimensionNamesArgAttrName() {
+      return "tensorrt.dimension_names";
+    }
+
     /// TensorRT quantization and dequantization mode markers.
     static constexpr StringRef kTensorRTPerTensorQuantizationMarker = "tensorrt.pt_q";
     static constexpr StringRef kTensorRTPerChannelQuantizationMarker = "tensorrt.pc_q";
diff --git a/mlir-tensorrt/tensorrt/lib/Target/TensorRTEncodingOpInterface/NetworkEncoder.cpp b/mlir-tensorrt/tensorrt/lib/Target/TensorRTEncodingOpInterface/NetworkEncoder.cpp
@@ -922,6 +922,29 @@ LogicalResult NvInferNetworkEncoder::encodeFunc(FunctionOpInterface func) {
       return failure();
     nvinfer1::ITensor *inputTensor =
         getNetworkDefinition()->addInput(name.c_str(), *dtype, trtShape);
+
+    // setDimensionName must be called immediately after addInput, or TensorRT
+    // will not deduplicate equal dimensions, which leads to perf gaps.
+    auto dimNamesAttr = func.getArgAttrOfType<DictionaryAttr>(
+        arg.getArgNumber(), TensorRTDialect::getDimensionNamesArgAttrName());
+    if (dimNamesAttr) {
+
+      for (NamedAttribute namedAttr : dimNamesAttr) {
+        int32_t key;
+        if (namedAttr.getName().getValue().getAsInteger(10, key)) {
+          return func->emitOpError()
+                 << "dimension name key '" << namedAttr.getName()
+                 << "' is not an integer";
+        }
+
+        if (StringAttr strAttr = namedAttr.getValue().dyn_cast<StringAttr>()) {
+          StringRef value = strAttr.getValue();
+          inputTensor->setDimensionName(static_cast<int32_t>(key),
+                                        value.str().c_str());
+        }
+      }
+    }
+
     if (!usesStronglyTyped && dtype == nvinfer1::DataType::kINT8)
       setIdentityInt8DynamicRange(inputTensor);
     this->map(arg, inputTensor);
diff --git a/mlir-tensorrt/tensorrt/test/Target/TensorRT/dimension_names.mlir b/mlir-tensorrt/tensorrt/test/Target/TensorRT/dimension_names.mlir
@@ -0,0 +1,8 @@
+// RUN: %pick-one-gpu tensorrt-opt -split-input-file -pass-pipeline="builtin.module(translate-tensorrt-to-engine)" \
+// RUN:  -mlir-elide-elementsattrs-if-larger=32 -tensorrt-builder-opt-level=0 %s | FileCheck %s
+
+// CHECK-LABEL: @trt_dim_names
+func.func @trt_dim_names(%arg0: tensor<2x10xf32> {tensorrt.dimension_names = {"0" = "batch", "1" = "features"}}) -> tensor<2x10xf32> {
+  %0 = tensorrt.identity %arg0 : tensor<2x10xf32> to tensor<2x10xf32>
+  return %0 : tensor<2x10xf32>
+}