[Tripy] Return the shape immediately if it is statically known instead of producing a trace operator. (#379)

slyubomirsky · web-flow · commit fec98bd7182b · 2024-11-26T18:06:36.000-05:00
Addresses issue #360.
diff --git a/tripy/tests/backend/api/test_compile.py b/tripy/tests/backend/api/test_compile.py
@@ -138,6 +138,19 @@ def test_dynamic_shapes(self):
         out = compiled_add(tp.ones((3, 1), dtype=tp.float32), tp.ones((3, 1), dtype=tp.float32))
         assert cp.array_equal(cp.from_dlpack(out), cp.ones((3, 1), dtype=cp.float32) * 2)
 
+    # if we specify dynamic shapes in compilation, they should not be fixed afterwards
+    def test_dynamic_shapes_not_fixed(self):
+        def func(inp):
+            s = inp.shape[0] + inp.shape[1] + inp.shape[2]
+            return tp.ones([s], dtype=tp.float32)
+
+        compiled_ones = tp.compile(func, args=[tp.InputInfo(((1, 2, 5), (1, 2, 5), (1, 2, 5)), dtype=tp.float32)])
+
+        for shape in ((1, 1, 1), (3, 3, 3), (2, 4, 5), (5, 2, 1)):
+            inp = tp.ones(shape, dtype=tp.float32)
+            out = compiled_ones(inp)
+            assert out.shape == [sum(shape)]
+
     def test_error_if_evaling_input_during_compile(self):
         def func(a):
             print(a)
diff --git a/tripy/tests/frontend/trace/ops/test_binary_elementwise.py b/tripy/tests/frontend/trace/ops/test_binary_elementwise.py
@@ -127,12 +127,13 @@ def test_invalid_broadcast_fails(self):
             c.eval()
 
     def test_dimension_size_inputs(self):
-        a = tp.Tensor([1, 2])
+        d = tp.DimensionSize(1)
 
         # Operations on only DimensionSizes will yield a DimensionSize
-        out = a.shape[0] + a.shape[0]
+        out = d + d
         assert isinstance(out, tp.DimensionSize)
 
         # Otherwise, a Tensor is yielded.
-        out = a + a.shape[0]
+        a = tp.Tensor([1, 2])
+        out = a + d
         assert isinstance(out, tp.Tensor) and not isinstance(out, tp.DimensionSize)
diff --git a/tripy/tests/performance/test_perf.py b/tripy/tests/performance/test_perf.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 import time
 from textwrap import dedent
+from typing import Callable
 
 import pytest
 import torch
@@ -25,6 +26,22 @@
 import tripy as tp
 
 
+def run_timed_trials(thunk: Callable[[], None], warm_up_runs=10, iterations=1000):
+    """
+    Returns the average time measured for calls to the thunk (the function intended to be timed)
+    in microseconds. First performs the specified number of untimed warm-ups.
+    """
+
+    for _ in range(warm_up_runs):
+        thunk()
+
+    start = time.perf_counter_ns()
+    for _ in range(iterations):
+        thunk()
+    end = time.perf_counter_ns()
+    return (end - start) / (iterations * 1000.0)
+
+
 @pytest.mark.parametrize("perf_case", PERF_CASES)
 def test_perf_regression(perf_case, benchmark):
     compiled_tripy_module, _, inputs, _ = perf_case
@@ -115,15 +132,10 @@ def func({arg_str}):
         for input in inputs:
             input.eval()
 
-        for _ in range(warm_up_runs):
-            compiled_one_io(*inputs)
-
-        start = time.perf_counter_ns()
-        for _ in range(iterations):
-            compiled_one_io(*inputs)
-        end = time.perf_counter_ns()
+        def measure_thunk():
+            return compiled_one_io(*inputs)
 
-        return (end - start) / (iterations * 1000.0)
+        return run_timed_trials(measure_thunk, warm_up_runs=warm_up_runs, iterations=iterations)
 
     assert measure_overhead(1) < 60.0
 
@@ -137,3 +149,13 @@ def func({arg_str}):
     # Ensure all deltas are within a few microseconds of each other
     average_delta = sum(deltas) / float(len(deltas))
     assert all(abs(delta - average_delta) < 10 for delta in deltas)
+
+
+def test_tripy_param_update(benchmark):
+    m = tp.Module()
+    m.param = tp.Parameter([1, 2, 3, 4])
+
+    def measure_thunk():
+        m.param = tp.Parameter([5, 6, 7, 8])
+
+    benchmark(measure_thunk)
diff --git a/tripy/tripy/frontend/trace/ops/reduce.py b/tripy/tripy/frontend/trace/ops/reduce.py
@@ -296,6 +296,7 @@ def prod(
 
 
 def mean_impl(tensor: "tripy.Tensor", dim: Union[int, Sequence] = None, keepdim: bool = False, apply_to_divisor=None):
+    from tripy.frontend.tensor import Tensor
     from tripy.frontend.trace.ops.cast import cast
 
     sum_val = sum(tensor, dim=dim, keepdim=keepdim)
@@ -307,7 +308,12 @@ def mean_impl(tensor: "tripy.Tensor", dim: Union[int, Sequence] = None, keepdim:
     if apply_to_divisor:
         num_elements = apply_to_divisor(num_elements)
 
-    return sum_val / (cast(num_elements, sum_val.dtype))
+    num_elements = (
+        cast(num_elements, sum_val.dtype)
+        if isinstance(num_elements, Tensor)
+        else Tensor(num_elements, dtype=sum_val.dtype)
+    )
+    return sum_val / num_elements
 
 
 @export.public_api(document_under="operations/functions")
diff --git a/tripy/tripy/frontend/trace/ops/reshape.py b/tripy/tripy/frontend/trace/ops/reshape.py
@@ -43,14 +43,17 @@ def to_flat_ir(self, inputs, outputs):
 
 
 def infer_dimensions(input: "tripy.Tensor", shape: ShapeLike) -> ShapeLike:
+
     num_unknown_dims = len([dim for dim in shape if op_utils.is_minus_one(dim)])
     if num_unknown_dims > 1:
         raise_error(f"The new shape can have at most one inferred dimension (denoted by -1)", [f"Got shape: {shape}."])
 
     if num_unknown_dims == 1:
         input_volume = math.prod(input.shape)
         known_dims_volume = math.prod(dim for dim in shape if not op_utils.is_minus_one(dim))
-        inferred_dim = input_volume / known_dims_volume
+        inferred_dim = (
+            input_volume // known_dims_volume
+        )  # If we have scalars, the floor div ensures the result is an int.
 
         shape = [inferred_dim if op_utils.is_minus_one(dim) else dim for dim in shape]
 
diff --git a/tripy/tripy/frontend/trace/ops/shape.py b/tripy/tripy/frontend/trace/ops/shape.py
@@ -16,12 +16,12 @@
 #
 
 from dataclasses import dataclass
-from typing import List
 
 from tripy import constraints
 from tripy.common.datatype import DATA_TYPES
 from tripy.frontend.ops.registry import TENSOR_METHOD_REGISTRY
 from tripy.frontend.trace.ops.base import BaseTraceOp
+from tripy.types import ShapeLike
 
 
 @dataclass(repr=False)
@@ -45,7 +45,7 @@ def to_flat_ir(self, inputs, outputs):
 @TENSOR_METHOD_REGISTRY("shape")
 @property
 @constraints.dtypes(constraints={"self": "T1"}, variables={"T1": list(DATA_TYPES.keys())})
-def shape(self: "tripy.Tensor") -> List["tripy.DimensionSize"]:
+def shape(self: "tripy.Tensor") -> ShapeLike:
     """
     Represents the shape of the tensor.
 
@@ -63,4 +63,9 @@ def shape(self: "tripy.Tensor") -> List["tripy.DimensionSize"]:
         assert shape == [8, 2]
     """
 
+    # If the shape is statically known, we do not need to insert any operator calls.
+    # However, if we are tracing, it might still be necessary to insert calls in the final program, so we will keep it.
+    if all(dim >= 0 for dim in self.trace_tensor.shape) and not self.trace_tensor.is_compile_tracer:
+        return self.trace_tensor.shape
+
     return [GetDimensionSize.build([self], dim=index, always_cast_to_dimension_size=True) for index in range(self.rank)]
diff --git a/tripy/tripy/frontend/trace/ops/slice.py b/tripy/tripy/frontend/trace/ops/slice.py
@@ -164,7 +164,9 @@ def __getitem__(
         assert np.array_equal(cp.from_dlpack(output).get(), np.arange(10)[8:2:-1])
 
     """
+    from tripy.frontend.dimension_size import DimensionSize
     from tripy.frontend.tensor import Tensor
+    from tripy.frontend.trace.ops.binary_elementwise import maximum, minimum
     from tripy.frontend.trace.ops.flip import flip
     from tripy.frontend.trace.ops.gather import gather
     from tripy.frontend.trace.ops.squeeze import squeeze
@@ -198,9 +200,16 @@ def convert_to_positive_idx(index: Union[int, Tensor]) -> Union[int, Tensor]:
         # because out of bounds indices for a *slice* mean that the dim should be empty, not an error
         def clamp_bound(bound: Union[int, Tensor]) -> Union[int, Tensor]:
             if isinstance(bound, int):
-                return 0 if bound < 0 else where(bound > t_shape[i], t_shape[i], Tensor([bound]))
-            else:
-                return where(bound < 0, Tensor([0]), where(bound > t_shape[i], t_shape[i], bound))
+                if bound < 0:
+                    return 0
+
+                if isinstance(t_shape[i], int):
+                    return min(bound, t_shape[i])
+                return minimum(t_shape[i], Tensor([bound]))
+
+            # need the shame dimension to be a tensor to use as an argument to min and max
+            shape_dim = t_shape[i] if isinstance(t_shape[i], Tensor) else DimensionSize(t_shape[i])
+            return maximum(Tensor([0]), minimum(shape_dim, bound))
 
         if isinstance(idx, int) or isinstance(idx, Tensor):
             args.append(convert_to_positive_idx(idx))