NVIDIA
diff --git a/‎tripy/docs/post0_developer_guides/02-debugging.md‎
Lines changed: 1 addition & 0 deletions b/‎tripy/docs/post0_developer_guides/02-debugging.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tripy/examples/segment-anything-model-v2/sam2/build_sam.py‎
Lines changed: 1 addition & 2 deletions b/‎tripy/examples/segment-anything-model-v2/sam2/build_sam.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎tripy/examples/segment-anything-model-v2/sam2/modeling/memory_attention.py‎
Lines changed: 1 addition & 3 deletions b/‎tripy/examples/segment-anything-model-v2/sam2/modeling/memory_attention.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎tripy/examples/segment-anything-model-v2/sam2/modeling/sam2_base.py‎
Lines changed: 1 addition & 5 deletions b/‎tripy/examples/segment-anything-model-v2/sam2/modeling/sam2_base.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎tripy/nvtripy/backend/api/shape_bounds.py‎ renamed to ‎tripy/nvtripy/backend/api/bounds.py‎
Lines changed: 15 additions & 15 deletions b/‎tripy/nvtripy/backend/api/shape_bounds.py‎ renamed to ‎tripy/nvtripy/backend/api/bounds.py‎
Lines changed: 15 additions & 15 deletions
diff --git a/‎tripy/nvtripy/backend/api/compile.py‎
Lines changed: 41 additions & 1 deletion b/‎tripy/nvtripy/backend/api/compile.py‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎tripy/nvtripy/backend/api/executable.py‎
Lines changed: 22 additions & 11 deletions b/‎tripy/nvtripy/backend/api/executable.py‎
Lines changed: 22 additions & 11 deletions
diff --git a/‎tripy/nvtripy/backend/api/input_info.py‎
Lines changed: 50 additions & 3 deletions b/‎tripy/nvtripy/backend/api/input_info.py‎
Lines changed: 50 additions & 3 deletions
diff --git a/‎tripy/nvtripy/frontend/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎tripy/nvtripy/frontend/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tripy/nvtripy/frontend/dimension_size.py‎
Lines changed: 28 additions & 4 deletions b/‎tripy/nvtripy/frontend/dimension_size.py‎
Lines changed: 28 additions & 4 deletions
@@ -12,6 +12,7 @@ We include some environment variables to enable extra debugging information from
 - `export TRIPY_MLIR_DEBUG_PATH=<mlir-debug-path>` sets the directory for IR dumps. The default path is `mlir-dumps`.
 - `export TRIPY_TRT_DEBUG_ENABLED=1` will dump TensorRT engines and their layer information.
 - `export TRIPY_TRT_DEBUG_PATH=<trt-debug-path>` sets the directory for TensorRT dumps. Default path is `tensorrt-dumps`.
+- `export MTRT_TENSORRT_NVTX=DETAILED` will enable detailed nvtx profiling verbosity for TRT layers.
 
 
 ## Using A Debugger
 
@@ -81,8 +81,7 @@ def get_component_configs(model, cfg):
                     (seq_len, mem_attention_batch, 64),
                     getattr(tp, model_precision),
                 ),
-                # TODO (#594): Remove this hack once we are able to pass in DimensionSizes directly:
-                tp.InputInfo(((4, 16, 64),), tp.int32),
+                tp.DimensionInputInfo(value_bounds=(4, 16, 64)),
             ],
             "skip_dtype_convert": [],
         },
 
@@ -186,10 +186,8 @@ def forward(
         memory: tp.Tensor,  # cross-attention inputs
         curr_pos: Optional[tp.Tensor] = None,  # pos_enc for self-attention inputs
         memory_pos: Optional[tp.Tensor] = None,  # pos_enc for cross-attention inputs
-        num_obj_ptr_tokens: Optional[tp.Tensor] = None,  # number of object pointer *tokens*
+        num_obj_ptr_tokens: Optional[tp.DimensionSize] = None,  # number of object pointer *tokens*
     ):
-        # TODO (#594): Remove this hack once we are able to pass in DimensionSizes directly:
-        num_obj_ptr_tokens = num_obj_ptr_tokens.shape[0]
         output = curr
         if self.pos_enc_at_input and curr_pos is not None:
             output = output + 0.1 * curr_pos
 
@@ -242,8 +242,6 @@ def _build_sam_heads(self):
         else:
             self.obj_ptr_tpos_proj = torch.nn.Identity()
 
-        self.fake_object_ptrs = torch.ones((1,), dtype=torch.int32, device="cuda")
-
     def _forward_sam_heads(
         self,
         backbone_features,
@@ -667,14 +665,12 @@ def _prepare_memory_conditioned_features(
         memory = torch.cat(to_cat_memory, dim=0)
         memory_pos_embed = torch.cat(to_cat_memory_pos_embed, dim=0)
         if isinstance(self.memory_attention, tp.Module) or isinstance(self.memory_attention, tp.Executable):
-            if self.fake_object_ptrs.shape != (num_obj_ptr_tokens,):
-                self.fake_object_ptrs = torch.ones((num_obj_ptr_tokens,), dtype=torch.int32, device="cuda")
             pix_feat_with_mem = self.memory_attention(
                 curr=tp.Tensor(current_vision_feats[0].half().contiguous()),
                 memory=tp.Tensor(memory.half().contiguous()),
                 curr_pos=tp.Tensor(current_vision_pos_embeds[0].half().contiguous()),
                 memory_pos=tp.Tensor(memory_pos_embed.half().contiguous()),
-                num_obj_ptr_tokens=tp.Tensor(self.fake_object_ptrs),
+                num_obj_ptr_tokens=tp.DimensionSize(num_obj_ptr_tokens),
             )
         else:
             pix_feat_with_mem = self.memory_attention(
 
@@ -25,37 +25,37 @@
 
 @export.public_api(document_under="compiling_code/input_info", document_init_sig=False)
 @dataclass
-class ShapeBounds:
+class Bounds:
     min: Tuple[IntLike]
     """
-    The minimum shape.
+    The minimum value.
     """
     opt: Tuple[IntLike]
     """
-    The shape to optimize for.
+    The value to optimize for.
     """
     max: Tuple[IntLike]
     """
-    The maximum shape.
+    The maximum value.
     """
 
     def is_static(self):
         return self.min == self.opt == self.max
 
 
-@json_utils.Encoder.register(ShapeBounds)
-def encode_shape_bounds(shape_bounds):
+@json_utils.Encoder.register(Bounds)
+def encode_bounds(bounds):
     return {
-        "min": shape_bounds.min,
-        "opt": shape_bounds.opt,
-        "max": shape_bounds.max,
+        "min": bounds.min,
+        "opt": bounds.opt,
+        "max": bounds.max,
     }
 
 
-@json_utils.Decoder.register(ShapeBounds)
-def decode_shape_bounds(shape_bounds_dict):
-    return ShapeBounds(
-        min=tuple(shape_bounds_dict["min"]),
-        opt=tuple(shape_bounds_dict["opt"]),
-        max=tuple(shape_bounds_dict["max"]),
+@json_utils.Decoder.register(Bounds)
+def decode_bounds(bounds_dict):
+    return Bounds(
+        min=tuple(bounds_dict["min"]),
+        opt=tuple(bounds_dict["opt"]),
+        max=tuple(bounds_dict["max"]),
     )
@@ -20,7 +20,7 @@
 
 from nvtripy import constants, export, utils
 from nvtripy.backend.api.executable import Executable
-from nvtripy.backend.api.input_info import InputInfo
+from nvtripy.backend.api.input_info import InputInfo, DimensionInputInfo
 from nvtripy.backend.mlir import Compiler
 from nvtripy.common.exception import raise_error
 from nvtripy.frontend import Tensor, Trace
@@ -106,6 +106,30 @@ def add(a, b):
 
         big_out = compiled_add(big_a, big_b)
 
+    .. code-block:: python
+        :linenos:
+        :caption: Shape Input
+
+        def dynamic_reshape(x, s):
+            return tp.reshape(x, (-1, s))
+
+        # doc: no-print-locals compiled_reshape
+
+        # Support dynamic dim in the range of 1 to 4, optimizing for a
+        # dim value of 2
+        compiled_reshape = tp.compile(
+            dynamic_reshape,
+            args=[
+                tp.InputInfo(shape=(3, (2, 4, 6)), dtype=tp.float32),
+                tp.DimensionInputInfo(value_bounds=(1, 2, 4)),
+            ],
+        )
+
+        a = tp.ones((3, 4), dtype=tp.float32).eval()
+        s = tp.DimensionSize(2)
+
+        out = compiled_reshape(a, s)
+        assert out.shape == (6, 2)
 
     .. code-block:: python
         :linenos:
@@ -162,6 +186,22 @@ def process_arg(name, arg):
             input_names.add(name)
 
             return tensor
+
+        if isinstance(arg, DimensionInputInfo):
+            from nvtripy.frontend.dimension_size import DimensionSize
+
+            input_infos[name] = arg
+
+            tensor = DimensionSize(arg.value_bounds.opt[0])
+            tensor.name = name
+            tensor.trace_tensor.is_compile_tracer = True
+            assert tensor.trace_tensor.shape == ()
+
+            trace_input_map[name] = tensor
+            input_names.add(name)
+
+            return tensor
+
         return arg
 
     compiled_arg_names = []
 
@@ -18,7 +18,7 @@
 
 import mlir_tensorrt.runtime.api as runtime
 from nvtripy import config, export
-from nvtripy.backend.api.input_info import InputInfo
+from nvtripy.backend.api.input_info import InputInfo, DimensionInputInfo
 from nvtripy.backend.api.stream import default_stream
 from nvtripy.backend.mlir.utils import MLIRRuntimeClient
 from nvtripy.common.exception import raise_error
@@ -41,7 +41,11 @@ class Executable:
     # `return_single_tensor_as_sequence` indicates whether the return type should be a sequence even if
     # there is only one output.
     def __init__(
-        self, executable, arg_names, return_single_tensor_as_sequence: bool, input_infos: Dict[str, InputInfo]
+        self,
+        executable,
+        arg_names,
+        return_single_tensor_as_sequence: bool,
+        input_infos: Dict[str, Union[InputInfo, DimensionInputInfo]],
     ):
         self._executable = executable
 
@@ -69,7 +73,7 @@ def __init__(
 
         self.__signature__ = inspect.Signature(params, return_annotation=return_annotation)
 
-        self.input_infos: Dict[str, InputInfo] = input_infos
+        self.input_infos: Dict[str, Union[InputInfo, DimensionInputInfo]] = input_infos
         """
         Stores metadata, like shapes and data types, for each input to the executable.
         """
@@ -191,15 +195,16 @@ def add(a, b):
                 ],
             )
 
-        for tensor in input_tensors:
+        expected_devices = ["gpu" if isinstance(info, InputInfo) else "cpu" for info in self.input_infos.values()]
+        for tensor, expected_device, arg_name in zip(input_tensors, expected_devices, self._arg_names):
             producer = tensor.trace_tensor.producer
-            if not isinstance(producer, Constant) or tensor.device.kind != "gpu":
+            if not isinstance(producer, Constant):
+                raise_error(f"Tensor `{arg_name}` is not evaluated.", ["Hint: Try calling `.eval()` on the tensor."])
+            if tensor.device.kind != expected_device:
                 raise_error(
-                    "Inputs to compiled executables must be evaluated tensors on the GPU.",
+                    "Unexpected tensor device.",
                     [
-                        "Got input" + (f" on device '{tensor.device}':" if tensor.device.kind != "gpu" else ":"),
-                        tensor,
-                        "Hint: Try calling `.eval()` on the tensor to ensure it is a GPU constant.",
+                        f"For tensor: `{arg_name}`, expected to be on device: {expected_device} but got: {tensor.device.kind}.\n",
                     ],
                 )
 
@@ -212,7 +217,11 @@ def add(a, b):
             # TODO: Evaluate whether this should be moved into the executor
             if "function expects a memref type with element type" in str(err):
                 # If the problem is a mismatched data type, we can provide a better error message than the executor can.
-                expected_input_dtypes = [info.dtype for info in self.input_infos.values()]
+                from nvtripy.common.datatype import int32
+
+                expected_input_dtypes = [
+                    info.dtype if isinstance(info, InputInfo) else int32 for info in self.input_infos.values()
+                ]
                 for tensor, dtype, arg_name in zip(input_tensors, expected_input_dtypes, self._arg_names):
                     if tensor.dtype != dtype:
                         raise_error(
@@ -225,7 +234,9 @@ def add(a, b):
                             ),
                         )
             elif "InternalError: failed to set input shape" in str(err) or "Runtime shape mismatch" in str(err):
-                expected_input_shapes = [info.shape_bounds for info in self.input_infos.values()]
+                expected_input_shapes = [
+                    info.shape_bounds if isinstance(info, InputInfo) else tuple() for info in self.input_infos.values()
+                ]
                 for tensor, expected_bounds, arg_name in zip(input_tensors, expected_input_shapes, self._arg_names):
                     shape = tensor.shape
 
 
@@ -16,7 +16,7 @@
 
 from nvtripy import export
 from nvtripy.backend.api.named_dimension import NamedDimension
-from nvtripy.backend.api.shape_bounds import ShapeBounds
+from nvtripy.backend.api.bounds import Bounds
 from nvtripy.frontend.dimension_size import DimensionSize
 from nvtripy.types import IntLike
 from nvtripy.utils import json as json_utils
@@ -74,7 +74,6 @@ def __init__(
         """
         is_int_like = lambda arg: any(isinstance(arg, typ) for typ in {int, DimensionSize})
 
-        # TODO (#252): Allow `shape` to be a shape tensor
         min_shape = []
         opt_shape = []
         max_shape = []
@@ -98,7 +97,7 @@ def __init__(
         A mapping of dimension indices to their names, if set.
         """
 
-        self.shape_bounds: ShapeBounds = ShapeBounds(tuple(min_shape), tuple(opt_shape), tuple(max_shape))
+        self.shape_bounds: Bounds = Bounds(tuple(min_shape), tuple(opt_shape), tuple(max_shape))
         """
         The shape bounds of the input.
         """
@@ -129,3 +128,51 @@ def decode_input_info(input_info_dict):
     input_info.shape_bounds = input_info_dict["shape_bounds"]
     input_info.dimension_names = {int(k): v for k, v in input_info_dict.get("dimension_names", {}).items()}
     return input_info
+
+
+@export.public_api(document_under="compiling_code")
+class DimensionInputInfo:
+    """
+    Captures information about a dimension size input to a compiled function.
+    """
+
+    def __init__(self, value_bounds: Tuple[IntLike, IntLike, IntLike]) -> None:
+        """
+        Args:
+            value_bounds: The value bound of the dimension size input, consisting of minimum, optimum, and maximum values.
+
+        .. code-block:: python
+            :linenos:
+            :caption: Dimension Size Input
+
+            # The dimension size will support values in the range [1, 3],
+            # optimizing for a size of 2.
+            dim_inp = tp.DimensionInputInfo((1, 2, 3))
+            assert dim_inp.value_bounds.min == (1,)
+            assert dim_inp.value_bounds.opt == (2,)
+            assert dim_inp.value_bounds.max == (3,)
+        """
+        # Evaluate `DimensionSize` early to avoid duplicate evaluation
+        value_bounds = tuple(map(int, value_bounds))
+        self.value_bounds = Bounds(
+            min=tuple([value_bounds[0]]), opt=tuple([value_bounds[1]]), max=tuple([value_bounds[2]])
+        )
+
+    def __str__(self) -> str:
+        return (
+            f"DimensionInputInfo(min={self.value_bounds.min}, opt={self.value_bounds.opt}, max={self.value_bounds.max})"
+        )
+
+
+@json_utils.Encoder.register(DimensionInputInfo)
+def encode_dim_input_info(dim_input_info):
+    return {
+        "value_bounds": dim_input_info.value_bounds,
+    }
+
+
+@json_utils.Decoder.register(DimensionInputInfo)
+def decode_dim_input_info(dim_input_info_dict):
+    dim_input_info = DimensionInputInfo((-1, -1, -1))
+    dim_input_info.value_bounds = dim_input_info_dict["value_bounds"]
+    return dim_input_info
@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,4 +16,5 @@
 #
 
 from nvtripy.frontend.tensor import Tensor
+from nvtripy.frontend.dimension_size import DimensionSize
 from nvtripy.trace.trace import Trace
@@ -15,10 +15,9 @@
 # limitations under the License.
 #
 
-from typing import Optional, Union
+from typing import Optional
 
 from nvtripy import export
-from nvtripy.common.datatype import int32
 from nvtripy.frontend.tensor import Tensor
 
 
@@ -47,7 +46,27 @@ def __str__(self) -> str:
         assert isinstance(val, int)
         return str(val)
 
-    def eval(self) -> "nvtripy.Tensor":
+    def eval(self) -> "nvtripy.DimensionSize":
+        """
+        Immediately evaluates this ``DimensionSize`` object.
+
+        .. note:: ``DimensionSize`` will always reside on host even after it is evaluated.
+
+        Returns:
+            The evaluated ``DimensionSize``.
+
+        .. code-block:: python
+            :linenos:
+
+
+            dim_size = tp.ones((2, 2)).shape[0]
+            dim_size.eval()
+            print(dim_size.device)
+            assert dim_size.device.kind == "cpu"
+
+        """
+        from nvtripy.backend.mlir import memref
+        from nvtripy.trace.ops.constant import Constant
         from nvtripy.trace.ops.shape import GetDimensionSize, Shape
 
         # TODO (#593): Generalize this to any branchy graph:
@@ -62,4 +81,9 @@ def eval(self) -> "nvtripy.Tensor":
             dim_size.outputs[0].is_compile_tracer = self.trace_tensor.is_compile_tracer
             self.trace_tensor = dim_size.outputs[0]
 
-        return super().eval()
+        if not isinstance(producer, Constant):
+            super().eval()
+        dim_value = memref.tolist(self.trace_tensor.producer.data)
+        dim_size = DimensionSize(data=int(dim_value), name=self.name)
+        self.trace_tensor = dim_size.trace_tensor
+        return self
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`#`
`2`		`-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
	`2`	`+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
`3`	`3`	`# SPDX-License-Identifier: Apache-2.0`
`4`	`4`	`#`
`5`	`5`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`@@ -16,4 +16,5 @@`
`16`	`16`	`#`
`17`	`17`
`18`	`18`	`from nvtripy.frontend.tensor import Tensor`
	`19`	`+from nvtripy.frontend.dimension_size import DimensionSize`
`19`	`20`	`from nvtripy.trace.trace import Trace`