[TensorRT] Copy tensorrt.host_tensor attribute in outline pass

yizhuoz004 · yizhuoz004 · commit e00c1982e2a4 · 2025-07-11T11:58:04.000-07:00
Copies tensorrt.host_tensor attribute and also adds plan.memory_space
attribute accordingly.
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
@@ -280,12 +280,17 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
       mlir::tensorrt::TensorRTDialect::getShapeProfileArgAttrName();
   StringRef tensorrtDimensionNamesAttrName =
       mlir::tensorrt::TensorRTDialect::getDimensionNamesArgAttrName();
+  StringRef tensorrtValueBoundsAttrName =
+      mlir::tensorrt::TensorRTDialect::getShapeTensorValueBoundsArgAttrName();
+  StringRef hostTensorAttrName = mlir::getHostTensorArgAttrName();
+  StringRef memorySpaceAttrName =
+      plan::PlanDialect::getMemorySpaceConstraintAttrName();
 
   SmallVector<Attribute> profileAttrsPerInput;
   SmallVector<Attribute> dimensionNamesAttrsPerInput;
   for (Value v : inputs) {
     auto rtt = dyn_cast<RankedTensorType>(v.getType());
-    if (!rtt || rtt.hasStaticShape()) {
+    if (!rtt) {
       profileAttrsPerInput.push_back(Attribute{});
       dimensionNamesAttrsPerInput.push_back(Attribute{});
       continue;
@@ -299,30 +304,45 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
     }
 
     int64_t argIndex = blockArg.getArgNumber();
-    profileAttrsPerInput.push_back(
-        parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
-            argIndex, tensorrtShapeBoundsAttrName));
-
-    dimensionNamesAttrsPerInput.push_back(
-        parentFunc.getArgAttrOfType<DictionaryAttr>(
-            argIndex, tensorrtDimensionNamesAttrName));
-
-    if (!profileAttrsPerInput.back()) {
-      return emitError(blockArg.getLoc())
-             << "Profile attribute (" << tensorrtShapeBoundsAttrName
-             << ") of argument " << argIndex << " is not set";
+    // Get shape profile and dynamision name attributes of the input
+    if (rtt.hasStaticShape()) {
+      // static-shaped argument can only have value bound attr (shape input)
+      auto valueBoundAttr =
+          parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
+              argIndex, tensorrtValueBoundsAttrName);
+      if (valueBoundAttr) {
+        func->setArgAttr(argIndex, tensorrtValueBoundsAttrName, valueBoundAttr);
+      }
+      // Get host tensor attribute of the input
+      auto hostTensorAttr = parentFunc.getArgAttr(argIndex, hostTensorAttrName);
+      if (hostTensorAttr) {
+        func->setArgAttr(argIndex, hostTensorAttrName, hostTensorAttr);
+        // Add plan.memory_space attr, it is also required for the parent
+        // function
+        auto memorySpaceAttr = plan::MemorySpaceAttr::get(
+            rewriter.getContext(), plan::MemorySpace::host);
+        func->setArgAttr(argIndex, memorySpaceAttrName, memorySpaceAttr);
+        parentFunc.setArgAttr(argIndex, memorySpaceAttrName, memorySpaceAttr);
+      }
+    } else {
+      auto shapeBoundAttr =
+          parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
+              argIndex, tensorrtShapeBoundsAttrName);
+      if (!shapeBoundAttr) {
+        return emitError(blockArg.getLoc())
+               << "Profile attribute (" << tensorrtShapeBoundsAttrName
+               << ") of argument " << argIndex << " is not set";
+      }
+      func->setArgAttr(argIndex, tensorrtShapeBoundsAttrName, shapeBoundAttr);
+      auto dimensionNameAttr = parentFunc.getArgAttrOfType<DictionaryAttr>(
+          argIndex, tensorrtDimensionNamesAttrName);
+      if (dimensionNameAttr) {
+        func->setArgAttr(argIndex, tensorrtDimensionNamesAttrName,
+                         dimensionNameAttr);
+      }
     }
   }
 
-  for (unsigned idx = 0; idx < func->getNumArguments(); idx++) {
-    if (profileAttrsPerInput[idx])
-      func->setArgAttr(idx, tensorrtShapeBoundsAttrName,
-                       profileAttrsPerInput[idx]);
-    if (dimensionNamesAttrsPerInput[idx])
-      func->setArgAttr(idx, tensorrtDimensionNamesAttrName,
-                       dimensionNamesAttrsPerInput[idx]);
-  }
-
   rewriter.setInsertionPoint(inlineGroupOp);
   auto callOp = rewriter.create<tensorrt::CallAllocOp>(
       inlineGroupOp.getLoc(), inlineGroupOp.getResultTypes(), inputs,
diff --git a/mlir-tensorrt/tensorrt/test/Target/TensorRT/translate-to-tensorrt.mlir b/mlir-tensorrt/tensorrt/test/Target/TensorRT/translate-to-tensorrt.mlir
@@ -65,3 +65,27 @@ func.func @trt_dim_names(
   %0 = tensorrt.identity %arg0 : tensor<?x?xf32> to tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
+
+// CHECK-LABEL: @trt_host_input
+//  CHECK-SAME: tensorrt.engine
+func.func @trt_host_input(%arg0: tensor<?x4xf32> {tensorrt.dimension_names = {}, tensorrt.shape_profile = #tensorrt.shape_profile<min = [2, 4], opt = [4, 4], max = [6, 4]>}, %arg1: tensor<i32> {tensorrt.host_tensor, tensorrt.value_bounds = #tensorrt.shape_profile<min = [1], opt = [2], max = [3]>}) -> tensor<?x?xf32> {
+    %0 = tensorrt.element_wise <kSUM>(%arg0, %arg0 : tensor<?x4xf32>, tensor<?x4xf32>) -> tensor<?x4xf32>
+    %1 = tensorrt.shape %0 : tensor<?x4xf32> -> tensor<2xi32>
+    %2 = tensorrt.slice %1[0][1][1] : tensor<2xi32> to tensor<1xi32>
+    %3 = tensorrt.collapse_rank %2 : tensor<1xi32> to tensor<i32>
+    %cst_i32 = tensorrt.constant dense<1> : tensor<i32>
+    %4 = tensorrt.element_wise <kPROD>(%3, %cst_i32 : tensor<i32>, tensor<i32>) -> tensor<i32>
+    %5 = tensorrt.slice %1[1][1][1] : tensor<2xi32> to tensor<1xi32>
+    %6 = tensorrt.collapse_rank %5 : tensor<1xi32> to tensor<i32>
+    %7 = tensorrt.element_wise <kPROD>(%4, %6 : tensor<i32>, tensor<i32>) -> tensor<i32>
+    %cst_i32_0 = tensorrt.constant dense<1> : tensor<i32>
+    %8 = tensorrt.element_wise <kPROD>(%arg1, %cst_i32_0 : tensor<i32>, tensor<i32>) -> tensor<i32>
+    %9 = tensorrt.element_wise <kFLOOR_DIV>(%7, %8 : tensor<i32>, tensor<i32>) -> tensor<i32>
+    %cst_i32_1 = tensorrt.constant dense<1> : tensor<1xi32>
+    %10 = tensorrt.reshape %9 shape(%cst_i32_1: tensor<1xi32>) : tensor<i32> to tensor<?xi32>
+    %cst_i32_2 = tensorrt.constant dense<1> : tensor<1xi32>
+    %11 = tensorrt.reshape %arg1 shape(%cst_i32_2: tensor<1xi32>) : tensor<i32> to tensor<?xi32>
+    %12 = tensorrt.concatenation {axis = 0 : i32} ins(%10, %11 : tensor<?xi32>, tensor<?xi32>) -> tensor<2xi32>
+    %13 = tensorrt.reshape %0 shape(%12: tensor<2xi32>) : tensor<?x4xf32> to tensor<?x?xf32>
+    return %13 : tensor<?x?xf32>
+}