diff --git a/tripy/nvtripy/frontend/ops/reduce/utils.py b/tripy/nvtripy/frontend/ops/reduce/utils.py index be249c043..f2d2b0513 100644 --- a/tripy/nvtripy/frontend/ops/reduce/utils.py +++ b/tripy/nvtripy/frontend/ops/reduce/utils.py @@ -42,8 +42,7 @@ def topk_impl(TopKType, input, dim, k): from nvtripy.frontend.tensor import Tensor if input.rank == 0: - # TODO (#496): Remove this hack of adding 0 when inputs can be returned directly in compiled functions. - return input + 0, Tensor(0) + return input, Tensor(0) dim = op_utils.process_dim(dim, input.rank) diff --git a/tripy/tests/performance/test_perf.py b/tripy/tests/performance/test_perf.py index 22837c290..109b83c01 100644 --- a/tripy/tests/performance/test_perf.py +++ b/tripy/tests/performance/test_perf.py @@ -114,13 +114,11 @@ def measure_overhead(num_io, warm_up_runs=10, iterations=1000): assert num_io > 0 arg_str = ", ".join(f"arg{num}" for num in range(num_io)) - # TODO (#496): Remove tp.relu after no-op functions work correctly and reset - # thresholds: 75 -> 45, 30 -> 10. exec( dedent( f""" def func({arg_str}): - return [tp.relu(x) for x in [{arg_str}]] + return [{arg_str}] """ ), locals(), @@ -148,11 +146,11 @@ def measure_thunk(): deltas = [n - p for p, n in zip(overheads[:-1], overheads[1:])] print(f"overheads: {overheads}") print(f"deltas: {deltas}") - assert all(delta < 75 for delta in deltas) + assert all(delta < 45 for delta in deltas) # Ensure all deltas are within a few microseconds of each other average_delta = sum(deltas) / float(len(deltas)) - assert all(abs(delta - average_delta) < 30 for delta in deltas) + assert all(abs(delta - average_delta) < 10 for delta in deltas) def test_tripy_param_update(benchmark):