[TensorRT] Copy tensorrt.host_tensor attribute in outline pass

yizhuoz004 · yizhuoz004 · commit b18d15cce206 · 2025-07-11T14:03:02.000-07:00
WIP: Add tp.DimensionInputInfo, support shape tensor input
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
@@ -280,12 +280,17 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
       mlir::tensorrt::TensorRTDialect::getShapeProfileArgAttrName();
   StringRef tensorrtDimensionNamesAttrName =
       mlir::tensorrt::TensorRTDialect::getDimensionNamesArgAttrName();
+  StringRef tensorrtValueBoundsAttrName =
+      mlir::tensorrt::TensorRTDialect::getShapeTensorValueBoundsArgAttrName();
+  StringRef hostTensorAttrName = mlir::getHostTensorArgAttrName();
+  StringRef memorySpaceAttrName =
+      plan::PlanDialect::getMemorySpaceConstraintAttrName();
 
   SmallVector<Attribute> profileAttrsPerInput;
   SmallVector<Attribute> dimensionNamesAttrsPerInput;
   for (Value v : inputs) {
     auto rtt = dyn_cast<RankedTensorType>(v.getType());
-    if (!rtt || rtt.hasStaticShape()) {
+    if (!rtt) {
       profileAttrsPerInput.push_back(Attribute{});
       dimensionNamesAttrsPerInput.push_back(Attribute{});
       continue;
@@ -299,30 +304,41 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
     }
 
     int64_t argIndex = blockArg.getArgNumber();
-    profileAttrsPerInput.push_back(
-        parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
-            argIndex, tensorrtShapeBoundsAttrName));
-
-    dimensionNamesAttrsPerInput.push_back(
-        parentFunc.getArgAttrOfType<DictionaryAttr>(
-            argIndex, tensorrtDimensionNamesAttrName));
-
-    if (!profileAttrsPerInput.back()) {
-      return emitError(blockArg.getLoc())
-             << "Profile attribute (" << tensorrtShapeBoundsAttrName
-             << ") of argument " << argIndex << " is not set";
+    // Get shape profile and dynamision name attributes of the input
+    if (rtt.hasStaticShape()) {
+      // static-shaped argument can only have value bound attr (shape input)
+      auto valueBoundAttr =
+          parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
+              argIndex, tensorrtValueBoundsAttrName);
+      if (valueBoundAttr) {
+        func->setArgAttr(argIndex, tensorrtValueBoundsAttrName, valueBoundAttr);
+      }
+      // Get host tensor attribute of the input
+      auto memorySpaceAttr = parentFunc.getArgAttr(argIndex, memorySpaceAttrName);
+      if (memorySpaceAttr) {
+        func->setArgAttr(argIndex, memorySpaceAttrName, memorySpaceAttr);
+        // Add tensorrt.host_tensor attr, it is needed by NetworkEncoder for now
+        func->setArgAttr(argIndex, hostTensorAttrName, rewriter.getUnitAttr());
+      }
+    } else {
+      auto shapeBoundAttr =
+          parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
+              argIndex, tensorrtShapeBoundsAttrName);
+      if (!shapeBoundAttr) {
+        return emitError(blockArg.getLoc())
+               << "Profile attribute (" << tensorrtShapeBoundsAttrName
+               << ") of argument " << argIndex << " is not set";
+      }
+      func->setArgAttr(argIndex, tensorrtShapeBoundsAttrName, shapeBoundAttr);
+      auto dimensionNameAttr = parentFunc.getArgAttrOfType<DictionaryAttr>(
+          argIndex, tensorrtDimensionNamesAttrName);
+      if (dimensionNameAttr) {
+        func->setArgAttr(argIndex, tensorrtDimensionNamesAttrName,
+                         dimensionNameAttr);
+      }
     }
   }
 
-  for (unsigned idx = 0; idx < func->getNumArguments(); idx++) {
-    if (profileAttrsPerInput[idx])
-      func->setArgAttr(idx, tensorrtShapeBoundsAttrName,
-                       profileAttrsPerInput[idx]);
-    if (dimensionNamesAttrsPerInput[idx])
-      func->setArgAttr(idx, tensorrtDimensionNamesAttrName,
-                       dimensionNamesAttrsPerInput[idx]);
-  }
-
   rewriter.setInsertionPoint(inlineGroupOp);
   auto callOp = rewriter.create<tensorrt::CallAllocOp>(
       inlineGroupOp.getLoc(), inlineGroupOp.getResultTypes(), inputs,
diff --git a/tripy/nvtripy/backend/api/compile.py b/tripy/nvtripy/backend/api/compile.py
@@ -20,7 +20,7 @@
 
 from nvtripy import constants, export, utils
 from nvtripy.backend.api.executable import Executable
-from nvtripy.backend.api.input_info import InputInfo
+from nvtripy.backend.api.input_info import InputInfo, DimensionInputInfo
 from nvtripy.backend.mlir import Compiler
 from nvtripy.common.exception import raise_error
 from nvtripy.frontend import Tensor, Trace
@@ -162,6 +162,22 @@ def process_arg(name, arg):
             input_names.add(name)
 
             return tensor
+
+        if isinstance(arg, DimensionInputInfo):
+            from nvtripy.frontend.dimension_size import DimensionSize
+
+            input_infos[name] = arg
+
+            tensor = DimensionSize(arg.value_bounds.opt[0])
+            tensor.name = name
+            tensor.trace_tensor.is_compile_tracer = True
+            assert tensor.trace_tensor.shape == ()
+
+            trace_input_map[name] = tensor
+            input_names.add(name)
+
+            return tensor
+
         return arg
 
     compiled_arg_names = []
diff --git a/tripy/nvtripy/backend/api/executable.py b/tripy/nvtripy/backend/api/executable.py
@@ -191,17 +191,17 @@ def add(a, b):
                 ],
             )
 
-        for tensor in input_tensors:
-            producer = tensor.trace_tensor.producer
-            if not isinstance(producer, Constant) or tensor.device.kind != "gpu":
-                raise_error(
-                    "Inputs to compiled executables must be evaluated tensors on the GPU.",
-                    [
-                        "Got input" + (f" on device '{tensor.device}':" if tensor.device.kind != "gpu" else ":"),
-                        tensor,
-                        "Hint: Try calling `.eval()` on the tensor to ensure it is a GPU constant.",
-                    ],
-                )
+            # for tensor in input_tensors:
+            #     producer = tensor.trace_tensor.producer
+            # if not isinstance(producer, Constant) or tensor.device.kind != "gpu":
+            #     raise_error(
+            #         "Inputs to compiled executables must be evaluated tensors on the GPU.",
+            #         [
+            #             "Got input" + (f" on device '{tensor.device}':" if tensor.device.kind != "gpu" else ":"),
+            #             tensor,
+            #             "Hint: Try calling `.eval()` on the tensor to ensure it is a GPU constant.",
+            #         ],
+            #     )
 
         input_memrefs = [inp.trace_tensor.producer.data for inp in input_tensors]
         try:
diff --git a/tripy/nvtripy/backend/api/input_info.py b/tripy/nvtripy/backend/api/input_info.py
@@ -16,7 +16,7 @@
 
 from nvtripy import export
 from nvtripy.backend.api.named_dimension import NamedDimension
-from nvtripy.backend.api.shape_bounds import ShapeBounds
+from nvtripy.backend.api.shape_bounds import ShapeBounds, ValueBounds
 from nvtripy.frontend.dimension_size import DimensionSize
 from nvtripy.types import IntLike
 from nvtripy.utils import json as json_utils
@@ -74,7 +74,6 @@ def __init__(
         """
         is_int_like = lambda arg: any(isinstance(arg, typ) for typ in {int, DimensionSize})
 
-        # TODO (#252): Allow `shape` to be a shape tensor
         min_shape = []
         opt_shape = []
         max_shape = []
@@ -129,3 +128,48 @@ def decode_input_info(input_info_dict):
     input_info.shape_bounds = input_info_dict["shape_bounds"]
     input_info.dimension_names = {int(k): v for k, v in input_info_dict.get("dimension_names", {}).items()}
     return input_info
+
+
+@export.public_api(document_under="compiling_code")
+class DimensionInputInfo:
+    """
+    Captures information about a dimension size input to a compiled function.
+    """
+
+    def __init__(self, value_bounds: Tuple[IntLike, IntLike, IntLike]) -> None:
+        """
+        Args:
+            value_bounds: The value bound of the dimension size input, consisting of minimum, optimum, and maximum values.
+
+        .. code-block:: python
+            :linenos:
+            :caption: Dynamic Dimensions
+
+            # The dimension size will support values in the range [1, 3],
+            # optimizing for a size of 2.
+            dim_inp = tp.DimensionInputInfo((1, 2, 3))
+            assert dim_inp.min == 1
+            assert dim_inp.opt == 2
+            assert dim_inp.max == 3
+        """
+        self.value_bounds = ValueBounds(
+            min=tuple([value_bounds[0]]), opt=tuple([value_bounds[1]]), max=tuple([value_bounds[2]])
+        )
+
+    def __str__(self) -> str:
+        return (
+            f"DimensionInputInfo(min={self.value_bounds.min}, opt={self.value_bounds.opt}, max={self.value_bounds.max})"
+        )
+
+
+@json_utils.Encoder.register(DimensionInputInfo)
+def encode_dim_input_info(dim_input_info):
+    return {
+        "value_bounds": dim_input_info.value_bounds,
+    }
+
+
+@json_utils.Decoder.register(DimensionInputInfo)
+def decode_dim_input_info(dim_input_info_dict):
+    dim_input_info_dict.value_bounds = dim_input_info_dict["value_bounds"]
+    return dim_input_info_dict
diff --git a/tripy/nvtripy/backend/api/shape_bounds.py b/tripy/nvtripy/backend/api/shape_bounds.py
@@ -59,3 +59,28 @@ def decode_shape_bounds(shape_bounds_dict):
         opt=tuple(shape_bounds_dict["opt"]),
         max=tuple(shape_bounds_dict["max"]),
     )
+
+
+@dataclass
+class ValueBounds:
+    min: Tuple[IntLike]
+    opt: Tuple[IntLike]
+    max: Tuple[IntLike]
+
+
+@json_utils.Encoder.register(ValueBounds)
+def encode_value_bounds(value_bounds):
+    return {
+        "min": tuple(value_bounds.min),
+        "opt": tuple(value_bounds.opt),
+        "max": tuple(value_bounds.max),
+    }
+
+
+@json_utils.Decoder.register(ValueBounds)
+def decode_value_bounds(value_bounds_dict):
+    return ValueBounds(
+        min=tuple(value_bounds_dict["min"]),
+        opt=tuple(value_bounds_dict["opt"]),
+        max=tuple(value_bounds_dict["max"]),
+    )
diff --git a/tripy/nvtripy/trace/trace.py b/tripy/nvtripy/trace/trace.py
@@ -16,7 +16,7 @@
 #
 
 from textwrap import indent
-from typing import Dict, List, Optional, Sequence, Set
+from typing import Dict, List, Optional, Sequence, Set, Union
 
 from mlir_tensorrt.compiler import ir
 from mlir_tensorrt.compiler.dialects import func as func_dialect
@@ -43,7 +43,7 @@ def __init__(
         self,
         outputs: Sequence[TraceTensor],
         inputs: Sequence[TraceTensor] = [],
-        input_infos: Optional[Dict[str, "nvtripy.InputInfo"]] = None,
+        input_infos: Optional[Dict[str, Union["nvtripy.InputInfo", "nvtripy.DimensionInputInfo"]]] = None,
         name: str = "main",
     ) -> None:
         # ops/inputs/outputs are populated by `trace()`
@@ -132,6 +132,8 @@ def get_sep(lst):
         return "\n".join(layer_strs)
 
     def to_mlir(self):
+        from nvtripy.backend.api.input_info import InputInfo, DimensionInputInfo
+
         def to_mlir_impl():
 
             with make_ir_context(), ir.Location.unknown():
@@ -195,13 +197,23 @@ def num_known_dims(ranked_tensor_type):
                         attr = {}
                         if self.input_infos:
                             input_info = self.input_infos[inp.name]
-                            shape_bounds = input_info.shape_bounds
-                            attr["tensorrt.shape_profile"] = ir.Attribute.parse(
-                                f"#tensorrt.shape_profile<min={list(shape_bounds.min)}, opt={list(shape_bounds.opt)}, max={list(shape_bounds.max)}>"
-                            )
-                            attr["tensorrt.dimension_names"] = ir.DictAttr.get(
-                                {str(idx): ir.StringAttr.get(name) for idx, name in input_info.dimension_names.items()}
-                            )
+                            if isinstance(input_info, InputInfo):
+                                shape_bounds = input_info.shape_bounds
+                                attr["tensorrt.shape_profile"] = ir.Attribute.parse(
+                                    f"#tensorrt.shape_profile<min={list(shape_bounds.min)}, opt={list(shape_bounds.opt)}, max={list(shape_bounds.max)}>"
+                                )
+                                attr["tensorrt.dimension_names"] = ir.DictAttr.get(
+                                    {
+                                        str(idx): ir.StringAttr.get(name)
+                                        for idx, name in input_info.dimension_names.items()
+                                    }
+                                )
+                            elif isinstance(input_info, DimensionInputInfo):
+                                value_bounds = input_info.value_bounds
+                                attr["tensorrt.value_bounds"] = ir.Attribute.parse(
+                                    f"#tensorrt.shape_profile<min={list(value_bounds.min)}, opt={list(value_bounds.opt)}, max={list(value_bounds.max)}>"
+                                )
+                                attr["plan.memory_space"] = ir.Attribute.parse("#plan.memory_space<host>")
 
                         arg_attrs.append(ir.DictAttr.get(attr))
 
diff --git a/tripy/test_out_of_bound.py b/tripy/test_out_of_bound.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import nvtripy as tp
+import cupy as cp
+
+from nvtripy.logging import logger
+
+logger.verbosity = "ir"
+import mlir_tensorrt.runtime.api as runtime
+
+
+def func(x):
+    x = x + x
+    return x
+
+
+compiled_func = tp.compile(func, args=[tp.InputInfo(shape=((2, 4, 6), 4), dtype=tp.float32)])
+
+sig = compiled_func._executable_signature
+
+for idx in range(2):
+
+    arg = sig.get_arg(idx)
+    memref = runtime.MemRefType(arg)
+    print(f"Arg {idx}: ", memref.address_space)
+
+    print("Shape: ", memref.shape)
+    bound = sig.get_arg_bound(idx)
+    print(f"Bound: {bound.min()}, {bound.max()}")
+
+# inp = cp.ones((8, 4), dtype=cp.float32)
+# inp = tp.Tensor(inp)
+# out = compiled_func(inp)
diff --git a/tripy/test_shape_input.py b/tripy/test_shape_input.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import nvtripy as tp
+import cupy as cp
+
+from nvtripy.logging import logger
+
+logger.verbosity = "ir"
+import mlir_tensorrt.runtime.api as runtime
+
+
+def func(x, y):
+    x = x + x
+    x = tp.reshape(x, (-1, y))
+    return x
+
+
+compiled_func = tp.compile(
+    func, args=[tp.InputInfo(shape=((2, 4, 6), 4), dtype=tp.float32), tp.DimensionInputInfo(value_bounds=(1, 2, 3))]
+)
+
+print("compilation complete.")
+
+sig = compiled_func._executable_signature
+
+for idx in range(2):
+
+    arg = sig.get_arg(idx)
+    memref = runtime.MemRefType(arg)
+    print(f"Arg {idx}: ", memref.address_space)
+
+    print("Shape: ", memref.shape)
+    bound = sig.get_arg_bound(idx)
+    print(f"Bound: {bound.min()}, {bound.max()}")
+
+
+# import pdb
+# pdb.set_trace()
+
+
+inp = cp.ones((4, 4), dtype=cp.float32)
+inp = tp.Tensor(inp)
+dim_inp = tp.DimensionSize(2)
+out = compiled_func(inp, dim_inp)
+print(out)