From 2a361efc9b005ef8a512b2ab2c604366e4c75abe Mon Sep 17 00:00:00 2001
From: Antoine Simoulin <antoinesimoulin@fb.com>
Date: Fri, 20 Jun 2025 13:32:30 -0700
Subject: [PATCH 01/11] Adjust rotated clamping conditions

Test Plan:
```bash
pytest test/test_transforms_v2.py -k box -v
```
---
 test/common_utils.py                          |  6 +-
 test/test_transforms_v2.py                    |  8 ++-
 .../transforms/v2/functional/_geometry.py     | 15 ++---
 torchvision/transforms/v2/functional/_meta.py | 60 +++++++++----------
 4 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/test/common_utils.py b/test/common_utils.py
index 9da3cf52d1c..b0481b1badf 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -469,9 +469,9 @@ def sample_position(values, max_value):
         raise ValueError(f"Format {format} is not supported")
     out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
     if tv_tensors.is_rotated_bounding_format(format):
-        # The rotated bounding boxes are not guaranteed to be within the canvas by design,
-        # so we apply clamping. We also add a 2 buffer to the canvas size to avoid
-        # numerical issues during the testing
+        # Rotated bounding boxes are not inherently confined within the canvas, so clamping is applied. 
+        # Transform tests allow a 2-pixel tolerance relative to the canvas size.
+        # To prevent discrepancies when clamping with different canvas sizes, we add a 2-pixel buffer.
         buffer = 4
         out_boxes = clamp_bounding_boxes(
             out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer)
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index 7e667586ac1..4ef91cbf605 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -4421,9 +4421,15 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
             else reference_affine_bounding_boxes_helper
         )
 
+        bounding_boxes = helper(
+            bounding_boxes,
+            affine_matrix=crop_affine_matrix,
+            new_canvas_size=(height, width)
+        )
+
         return helper(
             bounding_boxes,
-            affine_matrix=affine_matrix,
+            affine_matrix=resize_affine_matrix,
             new_canvas_size=size,
         )
 
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
index 7e9766bdaf5..b28f2aced28 100644
--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -1104,8 +1104,9 @@ def _affine_bounding_boxes_with_expand(
 
     original_shape = bounding_boxes.shape
     dtype = bounding_boxes.dtype
-    need_cast = not bounding_boxes.is_floating_point()
-    bounding_boxes = bounding_boxes.float() if need_cast else bounding_boxes.clone()
+    acceptable_dtypes = [torch.float64]  # Ensure consistency between CPU and GPU.
+    need_cast = dtype not in acceptable_dtypes
+    bounding_boxes = bounding_boxes.to(torch.float64) if need_cast else bounding_boxes.clone()
     device = bounding_boxes.device
     is_rotated = tv_tensors.is_rotated_bounding_format(format)
     intermediate_format = tv_tensors.BoundingBoxFormat.XYXYXYXY if is_rotated else tv_tensors.BoundingBoxFormat.XYXY
@@ -2397,11 +2398,11 @@ def elastic_bounding_boxes(
 
     original_shape = bounding_boxes.shape
     # TODO: first cast to float if bbox is int64 before convert_bounding_box_format
-    intermediate_format = tv_tensors.BoundingBoxFormat.XYXYXYXY if is_rotated else tv_tensors.BoundingBoxFormat.XYXY
+    intermediate_format = tv_tensors.BoundingBoxFormat.CXCYWHR if is_rotated else tv_tensors.BoundingBoxFormat.XYXY
 
     bounding_boxes = (
         convert_bounding_box_format(bounding_boxes.clone(), old_format=format, new_format=intermediate_format)
-    ).reshape(-1, 8 if is_rotated else 4)
+    ).reshape(-1, 5 if is_rotated else 4)
 
     id_grid = _create_identity_grid(canvas_size, device=device, dtype=dtype)
     # We construct an approximation of inverse grid as inv_grid = id_grid - displacement
@@ -2409,7 +2410,7 @@ def elastic_bounding_boxes(
     inv_grid = id_grid.sub_(displacement)
 
     # Get points from bboxes
-    points = bounding_boxes if is_rotated else bounding_boxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]]
+    points = bounding_boxes[:, :2] if is_rotated else bounding_boxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]]
     points = points.reshape(-1, 2)
     if points.is_floating_point():
         points = points.ceil_()
@@ -2421,8 +2422,8 @@ def elastic_bounding_boxes(
     transformed_points = inv_grid[0, index_y, index_x, :].add_(1).mul_(0.5 * t_size).sub_(0.5)
 
     if is_rotated:
-        transformed_points = transformed_points.reshape(-1, 8)
-        out_bboxes = _parallelogram_to_bounding_boxes(transformed_points).to(bounding_boxes.dtype)
+        transformed_points = transformed_points.reshape(-1, 2)
+        out_bboxes = torch.cat([transformed_points, bounding_boxes[:, 2:]], dim=1).to(bounding_boxes.dtype)
     else:
         transformed_points = transformed_points.reshape(-1, 4, 2)
         out_bbox_mins, out_bbox_maxs = torch.aminmax(transformed_points, dim=1)
diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
index 1729aa4bbaf..96ee69c46c0 100644
--- a/torchvision/transforms/v2/functional/_meta.py
+++ b/torchvision/transforms/v2/functional/_meta.py
@@ -409,23 +409,17 @@ def _order_bounding_boxes_points(
     if indices is None:
         output_xyxyxyxy = bounding_boxes.reshape(-1, 8)
         x, y = output_xyxyxyxy[..., 0::2], output_xyxyxyxy[..., 1::2]
-        y_max = torch.max(y, dim=1, keepdim=True)[0]
-        _, x1 = ((y_max - y) / y_max + (x + 1) * 100).min(dim=1)
+        y_max = torch.max(y.abs(), dim=1, keepdim=True)[0]
+        _, x1 = (y / y_max + (x + 1) * 100).min(dim=1)
         indices = torch.ones_like(output_xyxyxyxy)
         indices[..., 0] = x1.mul(2)
         indices.cumsum_(1).remainder_(8)
     return indices, bounding_boxes.gather(1, indices.to(torch.int64))
 
 
-def _area(box: torch.Tensor) -> torch.Tensor:
-    x1, y1, x2, y2, x3, y3, x4, y4 = box.reshape(-1, 8).unbind(-1)
-    w = torch.sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2)
-    h = torch.sqrt((y3 - y2) ** 2 + (x3 - x2) ** 2)
-    return w * h
-
-
 def _clamp_along_y_axis(
     bounding_boxes: torch.Tensor,
+    canvas_size: tuple[int, int],
 ) -> torch.Tensor:
     """
     Adjusts bounding boxes along the y-axis based on specific conditions.
@@ -448,29 +442,33 @@ def _clamp_along_y_axis(
     b2 = y2 + x2 / a
     b3 = y3 - a * x3
     b4 = y4 + x4 / a
-    b23 = (b2 - b3) / 2 * a / (1 + a**2)
-    z = torch.zeros_like(b1)
-    case_a = torch.cat([x.unsqueeze(1) for x in [z, b1, x2, y2, x3, y3, x3 - x2, y3 + b1 - y2]], dim=1)
-    case_b = torch.cat([x.unsqueeze(1) for x in [z, b4, x2 - x1, y2 - y1 + b4, x3, y3, x4, y4]], dim=1)
-    case_c = torch.cat(
-        [x.unsqueeze(1) for x in [z, (b2 + b3) / 2, b23, -b23 / a + b2, x3, y3, b23, b23 * a + b3]], dim=1
+    c = a / (1 + a**2)
+    b1 = b2.clamp(0).clamp(b1, b3)
+    b4 = b3.clamp(max=canvas_size[0]).clamp(b2, b4)
+    case_a = torch.stack(
+        (
+            (b4 - b1) * c,
+            (b4 - b1) * c * a + b1,
+            (b2 - b1) * c,
+            (b1 - b2) * c / a + b2,
+            x3,
+            y3,
+            (b4 - b3) * c,
+            (b3 - b4) * c / a + b4,
+        ),
+        dim=-1,
     )
-    case_d = torch.zeros_like(case_c)
-    case_e = torch.cat([x.unsqueeze(1) for x in [x1.clamp(0), y1, x2.clamp(0), y2, x3, y3, x4, y4]], dim=1)
-
-    cond_a = (x1 < 0).logical_and(x2 >= 0).logical_and(x3 >= 0).logical_and(x4 >= 0)
-    cond_a = cond_a.logical_and(_area(case_a) > _area(case_b))
-    cond_a = cond_a.logical_or((x1 < 0).logical_and(x2 >= 0).logical_and(x3 >= 0).logical_and(x4 <= 0))
-    cond_b = (x1 < 0).logical_and(x2 >= 0).logical_and(x3 >= 0).logical_and(x4 >= 0)
-    cond_b = cond_b.logical_and(_area(case_a) <= _area(case_b))
-    cond_b = cond_b.logical_or((x1 < 0).logical_and(x2 <= 0).logical_and(x3 >= 0).logical_and(x4 >= 0))
-    cond_c = (x1 < 0).logical_and(x2 <= 0).logical_and(x3 >= 0).logical_and(x4 <= 0)
-    cond_d = (x1 < 0).logical_and(x2 <= 0).logical_and(x3 <= 0).logical_and(x4 <= 0)
-    cond_e = x1.isclose(x2)
-
+    case_b = bounding_boxes.clone()
+    case_b[..., 0].clamp_(0)
+    case_b[..., 6].clamp_(0)
+    case_c = torch.zeros_like(case_b)
+
+    cond_a = x1 < 0
+    cond_b = y1.isclose(y2, rtol=1e-05, atol=1e-05)
+    cond_c = (x1 <= 0).logical_and(x2 <= 0).logical_and(x3 <= 0).logical_and(x4 <= 0)
     for cond, case in zip(
-        [cond_a, cond_b, cond_c, cond_d, cond_e],
-        [case_a, case_b, case_c, case_d, case_e],
+        [cond_a, cond_b, cond_c],
+        [case_a, case_b, case_c],
     ):
         bounding_boxes = torch.where(cond.unsqueeze(1).repeat(1, 8), case.reshape(-1, 8), bounding_boxes)
     return bounding_boxes.to(original_dtype).reshape(original_shape)
@@ -512,7 +510,7 @@ def _clamp_rotated_bounding_boxes(
 
     for _ in range(4):  # Iterate over the 4 vertices.
         indices, out_boxes = _order_bounding_boxes_points(out_boxes)
-        out_boxes = _clamp_along_y_axis(out_boxes)
+        out_boxes = _clamp_along_y_axis(out_boxes, canvas_size)
         _, out_boxes = _order_bounding_boxes_points(out_boxes, indices)
         # rotate 90 degrees counter clock wise
         out_boxes[:, ::2], out_boxes[:, 1::2] = (

From 4261ed3201ced834533259d5b361a2d8df0cd2c0 Mon Sep 17 00:00:00 2001
From: Antoine Simoulin <antoinesimoulin@fb.com>
Date: Fri, 20 Jun 2025 13:46:41 -0700
Subject: [PATCH 02/11] apply linting

---
 test/common_utils.py       | 2 +-
 test/test_transforms_v2.py | 7 +------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/test/common_utils.py b/test/common_utils.py
index b0481b1badf..8ecfd81d3a0 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -469,7 +469,7 @@ def sample_position(values, max_value):
         raise ValueError(f"Format {format} is not supported")
     out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
     if tv_tensors.is_rotated_bounding_format(format):
-        # Rotated bounding boxes are not inherently confined within the canvas, so clamping is applied. 
+        # Rotated bounding boxes are not inherently confined within the canvas, so clamping is applied.
         # Transform tests allow a 2-pixel tolerance relative to the canvas size.
         # To prevent discrepancies when clamping with different canvas sizes, we add a 2-pixel buffer.
         buffer = 4
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index 4ef91cbf605..19b832a14bd 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -4413,7 +4413,6 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
                 [0, 0, 1],
             ],
         )
-        affine_matrix = (resize_affine_matrix @ crop_affine_matrix)[:2, :]
 
         helper = (
             reference_affine_rotated_bounding_boxes_helper
@@ -4421,11 +4420,7 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
             else reference_affine_bounding_boxes_helper
         )
 
-        bounding_boxes = helper(
-            bounding_boxes,
-            affine_matrix=crop_affine_matrix,
-            new_canvas_size=(height, width)
-        )
+        bounding_boxes = helper(bounding_boxes, affine_matrix=crop_affine_matrix, new_canvas_size=(height, width))
 
         return helper(
             bounding_boxes,

From 42bae572fd426368354d024346fd59ff8f3086c7 Mon Sep 17 00:00:00 2001
From: Antoine Simoulin <antoinesimoulin@fb.com>
Date: Thu, 26 Jun 2025 19:51:36 -0700
Subject: [PATCH 03/11] Fix hard clamping

---
 test/common_utils.py                          |  12 --
 test/test_transforms_v2.py                    |  18 ++-
 torchvision/transforms/v2/functional/_meta.py | 148 ++++++++++++++----
 3 files changed, 124 insertions(+), 54 deletions(-)

diff --git a/test/common_utils.py b/test/common_utils.py
index 8ecfd81d3a0..61feee4c896 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -468,18 +468,6 @@ def sample_position(values, max_value):
     else:
         raise ValueError(f"Format {format} is not supported")
     out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
-    if tv_tensors.is_rotated_bounding_format(format):
-        # Rotated bounding boxes are not inherently confined within the canvas, so clamping is applied.
-        # Transform tests allow a 2-pixel tolerance relative to the canvas size.
-        # To prevent discrepancies when clamping with different canvas sizes, we add a 2-pixel buffer.
-        buffer = 4
-        out_boxes = clamp_bounding_boxes(
-            out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer)
-        )
-        if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR:
-            out_boxes[:, :2] += buffer // 2
-        elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
-            out_boxes[:, :] += buffer // 2
     return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size)
 
 
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index 19b832a14bd..9b44f0f5a1a 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -1298,7 +1298,7 @@ def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.B
         )
 
         helper = (
-            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True)
+            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True, clamp=False)
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
@@ -1907,7 +1907,7 @@ def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.Bou
         )
 
         helper = (
-            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True)
+            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True, clamp=False)
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
@@ -2196,7 +2196,7 @@ def _recenter_bounding_boxes_after_expand(self, bounding_boxes, *, recenter_xy):
             (bounding_boxes.to(torch.float64) - torch.tensor(translate)).to(bounding_boxes.dtype), like=bounding_boxes
         )
 
-    def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, center):
+    def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, center, canvas_size=None):
         if center is None:
             center = [s * 0.5 for s in bounding_boxes.canvas_size[::-1]]
         cx, cy = center
@@ -2222,7 +2222,7 @@ def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, cen
         output = helper(
             bounding_boxes,
             affine_matrix=affine_matrix,
-            new_canvas_size=new_canvas_size,
+            new_canvas_size=new_canvas_size if canvas_size is None else canvas_size,
             clamp=False,
         )
 
@@ -2239,9 +2239,10 @@ def test_functional_bounding_boxes_correctness(self, format, angle, expand, cent
 
         actual = F.rotate(bounding_boxes, angle=angle, expand=expand, center=center)
         expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center)
+        torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
 
+        expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center, canvas_size=actual.canvas_size)
         torch.testing.assert_close(actual, expected)
-        torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("expand", [False, True])
@@ -2259,9 +2260,10 @@ def test_transform_bounding_boxes_correctness(self, format, expand, center, seed
         actual = transform(bounding_boxes)
 
         expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center)
-
-        torch.testing.assert_close(actual, expected)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
+        
+        expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center, canvas_size=actual.canvas_size)
+        torch.testing.assert_close(actual, expected)
 
     def _recenter_keypoints_after_expand(self, keypoints, *, recenter_xy):
         x, y = recenter_xy
@@ -4437,7 +4439,7 @@ def test_functional_bounding_boxes_correctness(self, format):
             bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE
         )
 
-        torch.testing.assert_close(actual, expected)
+        torch.testing.assert_close(actual, expected, atol=1e-5, rtol=1e-5)
         assert_equal(F.get_size(actual), F.get_size(expected))
 
     def _reference_resized_crop_keypoints(self, keypoints, *, top, left, height, width, size):
diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
index 96ee69c46c0..7d14d2eb084 100644
--- a/torchvision/transforms/v2/functional/_meta.py
+++ b/torchvision/transforms/v2/functional/_meta.py
@@ -410,16 +410,87 @@ def _order_bounding_boxes_points(
         output_xyxyxyxy = bounding_boxes.reshape(-1, 8)
         x, y = output_xyxyxyxy[..., 0::2], output_xyxyxyxy[..., 1::2]
         y_max = torch.max(y.abs(), dim=1, keepdim=True)[0]
-        _, x1 = (y / y_max + (x + 1) * 100).min(dim=1)
+        x_max = torch.max(x.abs(), dim=1, keepdim=True)[0]
+        _, x1 = (y / y_max + (x / x_max) * 100).min(dim=1)
         indices = torch.ones_like(output_xyxyxyxy)
         indices[..., 0] = x1.mul(2)
         indices.cumsum_(1).remainder_(8)
     return indices, bounding_boxes.gather(1, indices.to(torch.int64))
 
 
+def _get_slope_and_intercept(box: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+    """
+    Calculate the slope and y-intercept of the lines defined by consecutive vertices in a bounding box.
+    This function computes the slope (a) and y-intercept (b) for each line segment in a bounding box,
+    where each line is defined by two consecutive vertices.
+    """
+    x, y = box[..., ::2], box[..., 1::2]
+    a = y.diff(append=y[..., 0:1]) / x.diff(append=x[..., 0:1])
+    b = y - a * x
+    return a, b
+
+
+def _get_intersection_point(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+    """
+    Calculate the intersection point of two lines defined by their slopes and y-intercepts.
+    This function computes the intersection points between pairs of lines, where each line
+    is defined by the equation y = ax + b (slope and y-intercept form).
+    """
+    batch_size = a.shape[0]
+    x = b.diff(prepend=b[..., 3:4]).neg() / a.diff(prepend=a[..., 3:4])
+    y = a * x + b
+    return torch.cat((x.unsqueeze(-1), y.unsqueeze(-1)), dim=-1).view(batch_size, 8)
+
+
+def _clamp_y_intercept(
+    bounding_boxes: torch.Tensor,
+    original_bounding_boxes: torch.Tensor,
+    canvas_size: tuple[int, int],
+    clamping: str = "hard",
+) -> torch.Tensor:
+    """
+    Apply clamping to bounding box y-intercepts. This function handles two clamping strategies:
+    - Hard clamping: Ensures all box vertices stay within canvas boundaries, finding the largest
+      angle-preserving box enclosed within the original box and the image canvas.
+    - Soft clamping: Allows some vertices to extend beyond the canvas, finding the smallest
+      angle-preserving box that encloses the intersection of the original box and the image canvas.
+
+    The function first calculates the slopes and y-intercepts of the lines forming the bounding box,
+    then applies various constraints to ensure the clamping conditions are respected.
+    """
+
+    a, b = _get_slope_and_intercept(bounding_boxes)
+    a1, a2, a3, a4 = a.unbind(-1)
+    b1, b2, b3, b4 = b.unbind(-1)
+
+    # Clamp y-intercepts (soft clamping)
+    b1 = b2.clamp(0).clamp(b1, b3)
+    b4 = b3.clamp(max=canvas_size[0]).clamp(b2, b4)
+
+    if clamping == "hard":
+        # Get y-intercepts from original bounding boxes
+        _, b = _get_slope_and_intercept(original_bounding_boxes)
+        _, b2, b3, _ = b.unbind(-1)
+
+        # Set b1 and b4 to the average of their clamped values
+        b1 = b4 = (b1.clamp(0, canvas_size[0]) + b4.clamp(0, canvas_size[0])) / 2
+
+        # Ensure b2 and b3 defined the box of maximum area after clamping b1 and b4
+        b2.clamp_(b1 * a2 / a1, b4).clamp_((a1 - a2) * canvas_size[1] + b1)
+        b2.clamp_(b3 * a2 / a3, b4).clamp_((a3 - a2) * canvas_size[1] + b3)
+        b3.clamp_(max=canvas_size[0] * (1 - a3 / a4) + b4 * a3 / a4)
+        b3.clamp_(max=canvas_size[0] * (1 - a3 / a2) + b2 * a3 / a2)
+        b3.clamp_(b1, (a2 - a3) * canvas_size[1] + b2)
+        b3.clamp_(b1, (a4 - a3) * canvas_size[1] + b4)
+
+    return torch.stack([b1, b2, b3, b4], dim=-1)
+
+
 def _clamp_along_y_axis(
     bounding_boxes: torch.Tensor,
+    original_bounding_boxes: torch.Tensor,
     canvas_size: tuple[int, int],
+    clamping: str = "hard",
 ) -> torch.Tensor:
     """
     Adjusts bounding boxes along the y-axis based on specific conditions.
@@ -430,52 +501,53 @@ def _clamp_along_y_axis(
 
     Args:
         bounding_boxes (torch.Tensor): A tensor containing bounding box coordinates.
+        original_bounding_boxes (torch.Tensor): The original bounding boxes before any clamping is applied.
+        canvas_size (tuple[int, int]): The size of the canvas as (height, width).
+        clamping (str, optional): The clamping strategy to use. Defaults to "hard".
 
     Returns:
         torch.Tensor: The adjusted bounding boxes.
     """
-    original_dtype = bounding_boxes.dtype
+    dtype = bounding_boxes.dtype
+    acceptable_dtypes = [torch.float64]  # Ensure consistency between CPU and GPU.
+    need_cast = dtype not in acceptable_dtypes
+    eps = 1e-06  # Ensure consistency between CPU and GPU.
     original_shape = bounding_boxes.shape
-    x1, y1, x2, y2, x3, y3, x4, y4 = bounding_boxes.reshape(-1, 8).unbind(-1)
-    a = (y2 - y1) / (x2 - x1)
-    b1 = y1 - a * x1
-    b2 = y2 + x2 / a
-    b3 = y3 - a * x3
-    b4 = y4 + x4 / a
-    c = a / (1 + a**2)
-    b1 = b2.clamp(0).clamp(b1, b3)
-    b4 = b3.clamp(max=canvas_size[0]).clamp(b2, b4)
-    case_a = torch.stack(
-        (
-            (b4 - b1) * c,
-            (b4 - b1) * c * a + b1,
-            (b2 - b1) * c,
-            (b1 - b2) * c / a + b2,
-            x3,
-            y3,
-            (b4 - b3) * c,
-            (b3 - b4) * c / a + b4,
-        ),
-        dim=-1,
-    )
+    bounding_boxes = bounding_boxes.reshape(-1, 8)
+    original_bounding_boxes = original_bounding_boxes.reshape(-1, 8)
+
+    # Calculate slopes (a) and y-intercepts (b) for all lines in the bounding boxes
+    a, b = _get_slope_and_intercept(bounding_boxes)
+    x1, y1, x2, y2, x3, y3, x4, y4 = bounding_boxes.unbind(-1)
+    b = _clamp_y_intercept(bounding_boxes, original_bounding_boxes, canvas_size, clamping)
+
+    case_a = _get_intersection_point(a, b)
     case_b = bounding_boxes.clone()
-    case_b[..., 0].clamp_(0)
-    case_b[..., 6].clamp_(0)
+    case_b[..., 0].clamp_(0)  # Clamp x1 to 0
+    case_b[..., 6].clamp_(0)  # Clamp x4 to 0
     case_c = torch.zeros_like(case_b)
 
-    cond_a = x1 < 0
-    cond_b = y1.isclose(y2, rtol=1e-05, atol=1e-05)
-    cond_c = (x1 <= 0).logical_and(x2 <= 0).logical_and(x3 <= 0).logical_and(x4 <= 0)
-    for cond, case in zip(
+    cond_a = (x1 < eps) & ~case_a.isnan().any(-1)  # First point is outside left boundary
+    cond_b = y1.isclose(y2, rtol=eps, atol=eps) | y3.isclose(y4, rtol=eps, atol=eps)  # First line is nearly vertical
+    cond_c = (x1 <= 0) & (x2 <= 0) & (x3 <= 0) & (x4 <= 0)  # All points outside left boundary
+    cond_c = cond_c | y1.isclose(y4, rtol=eps, atol=eps) | y2.isclose(y3, rtol=eps, atol=eps) | (cond_b & x1.isclose(x2, rtol=eps, atol=eps))  # First line is nearly horizontal
+
+    for (cond, case) in zip(
         [cond_a, cond_b, cond_c],
         [case_a, case_b, case_c],
     ):
         bounding_boxes = torch.where(cond.unsqueeze(1).repeat(1, 8), case.reshape(-1, 8), bounding_boxes)
-    return bounding_boxes.to(original_dtype).reshape(original_shape)
+    bounding_boxes[..., 0].clamp_(0)  # Clamp x1 to 0
+
+    if need_cast:
+        if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64):
+            bounding_boxes.round_()
+        bounding_boxes = bounding_boxes.to(dtype)
+    return bounding_boxes.reshape(original_shape)
 
 
 def _clamp_rotated_bounding_boxes(
-    bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int]
+    bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int], clamping_mode: str = "soft"
 ) -> torch.Tensor:
     """
     Clamp rotated bounding boxes to ensure they stay within the canvas boundaries.
@@ -508,15 +580,22 @@ def _clamp_rotated_bounding_boxes(
         )
     ).reshape(-1, 8)
 
+    original_boxes = out_boxes.clone()
     for _ in range(4):  # Iterate over the 4 vertices.
         indices, out_boxes = _order_bounding_boxes_points(out_boxes)
-        out_boxes = _clamp_along_y_axis(out_boxes, canvas_size)
+        _, original_boxes = _order_bounding_boxes_points(original_boxes, indices)
+        out_boxes = _clamp_along_y_axis(out_boxes, original_boxes, canvas_size, clamping_mode)
         _, out_boxes = _order_bounding_boxes_points(out_boxes, indices)
+        _, original_boxes = _order_bounding_boxes_points(original_boxes, indices)
         # rotate 90 degrees counter clock wise
         out_boxes[:, ::2], out_boxes[:, 1::2] = (
             out_boxes[:, 1::2].clone(),
             canvas_size[1] - out_boxes[:, ::2].clone(),
         )
+        original_boxes[:, ::2], original_boxes[:, 1::2] = (
+            original_boxes[:, 1::2].clone(),
+            canvas_size[1] - original_boxes[:, ::2].clone(),
+        )
         canvas_size = (canvas_size[1], canvas_size[0])
 
     out_boxes = convert_bounding_box_format(
@@ -525,7 +604,8 @@ def _clamp_rotated_bounding_boxes(
 
     if need_cast:
         if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64):
-            out_boxes.round_()
+            # Adding epsilon to ensure consistency between CPU and GPU rounding.
+            out_boxes.add_(1e-7).round_()
         out_boxes = out_boxes.to(dtype)
     return out_boxes
 

From 9e3f7c03e86e8e84a39e79a8f7512a24386a4fd5 Mon Sep 17 00:00:00 2001
From: Antoine Simoulin <antoinesimoulin@fb.com>
Date: Thu, 26 Jun 2025 20:00:18 -0700
Subject: [PATCH 04/11] Adjust soft clamping

---
 torchvision/transforms/v2/functional/_meta.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
index 7d14d2eb084..1ea73a7cf8e 100644
--- a/torchvision/transforms/v2/functional/_meta.py
+++ b/torchvision/transforms/v2/functional/_meta.py
@@ -446,7 +446,7 @@ def _clamp_y_intercept(
     bounding_boxes: torch.Tensor,
     original_bounding_boxes: torch.Tensor,
     canvas_size: tuple[int, int],
-    clamping: str = "hard",
+    clamping_mode: str = "hard",
 ) -> torch.Tensor:
     """
     Apply clamping to bounding box y-intercepts. This function handles two clamping strategies:
@@ -464,10 +464,10 @@ def _clamp_y_intercept(
     b1, b2, b3, b4 = b.unbind(-1)
 
     # Clamp y-intercepts (soft clamping)
-    b1 = b2.clamp(0).clamp(b1, b3)
-    b4 = b3.clamp(max=canvas_size[0]).clamp(b2, b4)
+    b1 = b2.clamp(b1, b3).clamp(0, canvas_size[0])
+    b4 = b3.clamp(b2, b4).clamp(0, canvas_size[0])
 
-    if clamping == "hard":
+    if clamping_mode == "hard":
         # Get y-intercepts from original bounding boxes
         _, b = _get_slope_and_intercept(original_bounding_boxes)
         _, b2, b3, _ = b.unbind(-1)
@@ -490,7 +490,7 @@ def _clamp_along_y_axis(
     bounding_boxes: torch.Tensor,
     original_bounding_boxes: torch.Tensor,
     canvas_size: tuple[int, int],
-    clamping: str = "hard",
+    clamping_mode: str = "hard",
 ) -> torch.Tensor:
     """
     Adjusts bounding boxes along the y-axis based on specific conditions.
@@ -503,7 +503,7 @@ def _clamp_along_y_axis(
         bounding_boxes (torch.Tensor): A tensor containing bounding box coordinates.
         original_bounding_boxes (torch.Tensor): The original bounding boxes before any clamping is applied.
         canvas_size (tuple[int, int]): The size of the canvas as (height, width).
-        clamping (str, optional): The clamping strategy to use. Defaults to "hard".
+        clamping_mode (str, optional): The clamping strategy to use. Defaults to "hard".
 
     Returns:
         torch.Tensor: The adjusted bounding boxes.
@@ -519,7 +519,7 @@ def _clamp_along_y_axis(
     # Calculate slopes (a) and y-intercepts (b) for all lines in the bounding boxes
     a, b = _get_slope_and_intercept(bounding_boxes)
     x1, y1, x2, y2, x3, y3, x4, y4 = bounding_boxes.unbind(-1)
-    b = _clamp_y_intercept(bounding_boxes, original_bounding_boxes, canvas_size, clamping)
+    b = _clamp_y_intercept(bounding_boxes, original_bounding_boxes, canvas_size, clamping_mode)
 
     case_a = _get_intersection_point(a, b)
     case_b = bounding_boxes.clone()
@@ -537,7 +537,8 @@ def _clamp_along_y_axis(
         [case_a, case_b, case_c],
     ):
         bounding_boxes = torch.where(cond.unsqueeze(1).repeat(1, 8), case.reshape(-1, 8), bounding_boxes)
-    bounding_boxes[..., 0].clamp_(0)  # Clamp x1 to 0
+    if clamping_mode == "hard":
+        bounding_boxes[..., 0].clamp_(0)  # Clamp x1 to 0
 
     if need_cast:
         if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64):

From be3619feb4a90dac2938836768e0c6f6611679d3 Mon Sep 17 00:00:00 2001
From: Antoine Simoulin <antoinesimoulin@fb.com>
Date: Thu, 26 Jun 2025 20:08:52 -0700
Subject: [PATCH 05/11] lint

---
 test/common_utils.py                          |  2 +-
 test/test_transforms_v2.py                    | 10 +++++++---
 torchvision/transforms/v2/functional/_meta.py |  7 ++++++-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/test/common_utils.py b/test/common_utils.py
index 61feee4c896..afb48dce541 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -21,7 +21,7 @@
 from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
 from torchvision import io, tv_tensors
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
-from torchvision.transforms.v2.functional import clamp_bounding_boxes, to_image, to_pil_image
+from torchvision.transforms.v2.functional import to_image, to_pil_image
 
 
 IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index 9b44f0f5a1a..bea3fe8976d 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -2241,7 +2241,9 @@ def test_functional_bounding_boxes_correctness(self, format, angle, expand, cent
         expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
 
-        expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center, canvas_size=actual.canvas_size)
+        expected = self._reference_rotate_bounding_boxes(
+            bounding_boxes, angle=angle, expand=expand, center=center, canvas_size=actual.canvas_size
+        )
         torch.testing.assert_close(actual, expected)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@@ -2261,8 +2263,10 @@ def test_transform_bounding_boxes_correctness(self, format, expand, center, seed
 
         expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
-        
-        expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center, canvas_size=actual.canvas_size)
+
+        expected = self._reference_rotate_bounding_boxes(
+            bounding_boxes, **params, expand=expand, center=center, canvas_size=actual.canvas_size
+        )
         torch.testing.assert_close(actual, expected)
 
     def _recenter_keypoints_after_expand(self, keypoints, *, recenter_xy):
diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
index 1ea73a7cf8e..c30838905fd 100644
--- a/torchvision/transforms/v2/functional/_meta.py
+++ b/torchvision/transforms/v2/functional/_meta.py
@@ -530,7 +530,12 @@ def _clamp_along_y_axis(
     cond_a = (x1 < eps) & ~case_a.isnan().any(-1)  # First point is outside left boundary
     cond_b = y1.isclose(y2, rtol=eps, atol=eps) | y3.isclose(y4, rtol=eps, atol=eps)  # First line is nearly vertical
     cond_c = (x1 <= 0) & (x2 <= 0) & (x3 <= 0) & (x4 <= 0)  # All points outside left boundary
-    cond_c = cond_c | y1.isclose(y4, rtol=eps, atol=eps) | y2.isclose(y3, rtol=eps, atol=eps) | (cond_b & x1.isclose(x2, rtol=eps, atol=eps))  # First line is nearly horizontal
+    cond_c = (
+        cond_c
+        | y1.isclose(y4, rtol=eps, atol=eps)
+        | y2.isclose(y3, rtol=eps, atol=eps)
+        | (cond_b & x1.isclose(x2, rtol=eps, atol=eps))
+    )  # First line is nearly horizontal
 
     for (cond, case) in zip(
         [cond_a, cond_b, cond_c],

From 62f5f78347a335b30442ca27869e185303c63706 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 30 Jun 2025 10:31:49 +0100
Subject: [PATCH 06/11] remove debug stuff

---
 test/test_transforms_v2.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index 87abd7516b2..c4209c5e05c 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -5615,7 +5615,6 @@ class TestSetClampingMode:
     def test_setter(self, format, constructor_clamping_mode, desired_clamping_mode):
 
         in_boxes = make_bounding_boxes(format=format, clamping_mode=constructor_clamping_mode)
-        assert in_boxes.clamping_mode == constructor_clamping_mode  # input is unchanged: no leak
         out_boxes = transforms.SetClampingMode(clamping_mode=desired_clamping_mode)(in_boxes)
 
         assert in_boxes.clamping_mode == constructor_clamping_mode  # input is unchanged: no leak

From 9e0c2ddcc483d3a47589a544d2aa0b7ec190686e Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 30 Jun 2025 13:48:34 +0100
Subject: [PATCH 07/11] Make soft the default clamping_mode, and add a test

---
 test/common_utils.py                              |  2 +-
 test/test_tv_tensors.py                           |  5 +++++
 torchvision/transforms/v2/functional/_geometry.py | 14 +++++++-------
 torchvision/tv_tensors/_bounding_boxes.py         |  2 +-
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/test/common_utils.py b/test/common_utils.py
index c4e950997ca..ee3a2d5cbde 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -410,7 +410,7 @@ def make_bounding_boxes(
     canvas_size=DEFAULT_SIZE,
     *,
     format=tv_tensors.BoundingBoxFormat.XYXY,
-    clamping_mode="hard",  # TODOBB
+    clamping_mode="soft",
     num_boxes=1,
     dtype=None,
     device="cpu",
diff --git a/test/test_tv_tensors.py b/test/test_tv_tensors.py
index 43efceba5c9..bed419b312c 100644
--- a/test/test_tv_tensors.py
+++ b/test/test_tv_tensors.py
@@ -406,3 +406,8 @@ def test_return_type_input():
         tv_tensors.set_return_type("typo")
 
     tv_tensors.set_return_type("tensor")
+
+
+def test_box_clamping_mode_default():
+    assert tv_tensors.BoundingBoxes([0, 0, 10, 10], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft"
+    assert tv_tensors.BoundingBoxes([0, 0, 10, 10, 0], format="XYWHR", canvas_size=(100, 100)).clamping_mode == "soft"
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
index 8fd7a776de9..57646d957aa 100644
--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -522,7 +522,7 @@ def resize_bounding_boxes(
     size: Optional[list[int]],
     max_size: Optional[int] = None,
     format: tv_tensors.BoundingBoxFormat = tv_tensors.BoundingBoxFormat.XYXY,
-    clamping_mode: CLAMPING_MODE_TYPE = "hard",  # TODOBB soft
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> tuple[torch.Tensor, tuple[int, int]]:
     # We set the default format as `tv_tensors.BoundingBoxFormat.XYXY`
     # to ensure backward compatibility.
@@ -1108,7 +1108,7 @@ def _affine_bounding_boxes_with_expand(
     shear: list[float],
     center: Optional[list[float]] = None,
     expand: bool = False,
-    clamping_mode: CLAMPING_MODE_TYPE = "hard",  # TODOBB soft
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> tuple[torch.Tensor, tuple[int, int]]:
     if bounding_boxes.numel() == 0:
         return bounding_boxes, canvas_size
@@ -1211,7 +1211,7 @@ def affine_bounding_boxes(
     scale: float,
     shear: list[float],
     center: Optional[list[float]] = None,
-    clamping_mode: CLAMPING_MODE_TYPE = "hard",  # TODOBB soft
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> torch.Tensor:
     out_box, _ = _affine_bounding_boxes_with_expand(
         bounding_boxes,
@@ -1740,7 +1740,7 @@ def pad_bounding_boxes(
     canvas_size: tuple[int, int],
     padding: list[int],
     padding_mode: str = "constant",
-    clamping_mode: CLAMPING_MODE_TYPE = "hard",  # TODOBB soft
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> tuple[torch.Tensor, tuple[int, int]]:
     if padding_mode not in ["constant"]:
         # TODO: add support of other padding modes
@@ -1858,7 +1858,7 @@ def crop_bounding_boxes(
     left: int,
     height: int,
     width: int,
-    clamping_mode: CLAMPING_MODE_TYPE = "hard",  # TODOBB soft
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> tuple[torch.Tensor, tuple[int, int]]:
 
     # Crop or implicit pad if left and/or top have negative values:
@@ -2098,7 +2098,7 @@ def perspective_bounding_boxes(
     startpoints: Optional[list[list[int]]],
     endpoints: Optional[list[list[int]]],
     coefficients: Optional[list[float]] = None,
-    clamping_mode: CLAMPING_MODE_TYPE = "hard",  # TODOBB soft
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> torch.Tensor:
     if bounding_boxes.numel() == 0:
         return bounding_boxes
@@ -2413,7 +2413,7 @@ def elastic_bounding_boxes(
     format: tv_tensors.BoundingBoxFormat,
     canvas_size: tuple[int, int],
     displacement: torch.Tensor,
-    clamping_mode: CLAMPING_MODE_TYPE = "hard",  # TODOBB soft
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> torch.Tensor:
     expected_shape = (1, canvas_size[0], canvas_size[1], 2)
     if not isinstance(displacement, torch.Tensor):
diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py
index 22a32b7dfa5..72a2825aad1 100644
--- a/torchvision/tv_tensors/_bounding_boxes.py
+++ b/torchvision/tv_tensors/_bounding_boxes.py
@@ -105,7 +105,7 @@ def __new__(
         *,
         format: BoundingBoxFormat | str,
         canvas_size: tuple[int, int],
-        clamping_mode: CLAMPING_MODE_TYPE = "hard",  # TODOBB change default to soft!
+        clamping_mode: CLAMPING_MODE_TYPE = "soft",
         dtype: torch.dtype | None = None,
         device: torch.device | str | int | None = None,
         requires_grad: bool | None = None,

From 64e104e3ab5d939af750cba3f6957d1db35df095 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 30 Jun 2025 14:37:26 +0100
Subject: [PATCH 08/11] set clamp=False in reference for both rotated and
 non-rotated

---
 test/test_transforms_v2.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index c4209c5e05c..69f5def29c5 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -1301,11 +1301,11 @@ def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.B
         )
 
         helper = (
-            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True, clamp=False)
+            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True)
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
-        return helper(bounding_boxes, affine_matrix=affine_matrix)
+        return helper(bounding_boxes, affine_matrix=affine_matrix, clamp=False)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize(
@@ -1910,11 +1910,11 @@ def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.Bou
         )
 
         helper = (
-            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True, clamp=False)
+            functools.partial(reference_affine_rotated_bounding_boxes_helper, flip=True)
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
-        return helper(bounding_boxes, affine_matrix=affine_matrix)
+        return helper(bounding_boxes, affine_matrix=affine_matrix, clamp=False)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])

From 064eb9ff7cc7601220fbf272beebb9b4a3a40d05 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 30 Jun 2025 15:27:49 +0100
Subject: [PATCH 09/11] Simplify test and fix some kernels that I forgot

---
 test/test_transforms_v2.py                    | 21 ++++++------
 .../transforms/v2/functional/_geometry.py     | 33 ++++++++++++++++---
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index 69f5def29c5..77c39f6a414 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -551,6 +551,7 @@ def affine_bounding_boxes(bounding_boxes):
         ),
         format=format,
         canvas_size=canvas_size,
+        clamping_mode=clamping_mode,
     )
 
 
@@ -639,6 +640,7 @@ def affine_rotated_bounding_boxes(bounding_boxes):
         ).reshape(bounding_boxes.shape),
         format=format,
         canvas_size=canvas_size,
+        clamping_mode=clamping_mode,
     )
 
 
@@ -4355,7 +4357,6 @@ def test_functional(self, make_input):
             (F.resized_crop_image, torch.Tensor),
             (F._geometry._resized_crop_image_pil, PIL.Image.Image),
             (F.resized_crop_image, tv_tensors.Image),
-            (F.resized_crop_bounding_boxes, tv_tensors.BoundingBoxes),
             (F.resized_crop_mask, tv_tensors.Mask),
             (F.resized_crop_video, tv_tensors.Video),
             (F.resized_crop_keypoints, tv_tensors.KeyPoints),
@@ -4422,30 +4423,30 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
             ],
         )
 
+        affine_matrix = (resize_affine_matrix @ crop_affine_matrix)[:2, :]
+
         helper = (
             reference_affine_rotated_bounding_boxes_helper
             if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
             else reference_affine_bounding_boxes_helper
         )
 
-        bounding_boxes = helper(bounding_boxes, affine_matrix=crop_affine_matrix, new_canvas_size=(height, width))
-
-        return helper(
-            bounding_boxes,
-            affine_matrix=resize_affine_matrix,
-            new_canvas_size=size,
-        )
+        return helper(bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=size, clamp=False)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     def test_functional_bounding_boxes_correctness(self, format):
-        bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
+        # Note that we don't want to clamp because in
+        # _reference_resized_crop_bounding_boxes we are fusing the crop and the
+        # resize operation, where none of the croppings happen - particularly,
+        # the intermediate one.
+        bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, clamping_mode="none")
 
         actual = F.resized_crop(bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE)
         expected = self._reference_resized_crop_bounding_boxes(
             bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE
         )
 
-        torch.testing.assert_close(actual, expected, atol=1e-5, rtol=1e-5)
+        torch.testing.assert_close(actual, expected)
         assert_equal(F.get_size(actual), F.get_size(expected))
 
     def _reference_resized_crop_keypoints(self, keypoints, *, top, left, height, width, size):
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
index 57646d957aa..4a3b20b7fb3 100644
--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -2620,11 +2620,18 @@ def center_crop_bounding_boxes(
     format: tv_tensors.BoundingBoxFormat,
     canvas_size: tuple[int, int],
     output_size: list[int],
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> tuple[torch.Tensor, tuple[int, int]]:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
     crop_top, crop_left = _center_crop_compute_crop_anchor(crop_height, crop_width, *canvas_size)
     return crop_bounding_boxes(
-        bounding_boxes, format, top=crop_top, left=crop_left, height=crop_height, width=crop_width
+        bounding_boxes,
+        format,
+        top=crop_top,
+        left=crop_left,
+        height=crop_height,
+        width=crop_width,
+        clamping_mode=clamping_mode,
     )
 
 
@@ -2633,7 +2640,11 @@ def _center_crop_bounding_boxes_dispatch(
     inpt: tv_tensors.BoundingBoxes, output_size: list[int]
 ) -> tv_tensors.BoundingBoxes:
     output, canvas_size = center_crop_bounding_boxes(
-        inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, output_size=output_size
+        inpt.as_subclass(torch.Tensor),
+        format=inpt.format,
+        canvas_size=inpt.canvas_size,
+        output_size=output_size,
+        clamping_mode=inpt.clamping_mode,
     )
     return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size)
 
@@ -2780,9 +2791,14 @@ def resized_crop_bounding_boxes(
     height: int,
     width: int,
     size: list[int],
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> tuple[torch.Tensor, tuple[int, int]]:
-    bounding_boxes, canvas_size = crop_bounding_boxes(bounding_boxes, format, top, left, height, width)
-    return resize_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size, size=size)
+    bounding_boxes, canvas_size = crop_bounding_boxes(
+        bounding_boxes, format, top, left, height, width, clamping_mode=clamping_mode
+    )
+    return resize_bounding_boxes(
+        bounding_boxes, format=format, canvas_size=canvas_size, size=size, clamping_mode=clamping_mode
+    )
 
 
 @_register_kernel_internal(resized_crop, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
@@ -2790,7 +2806,14 @@ def _resized_crop_bounding_boxes_dispatch(
     inpt: tv_tensors.BoundingBoxes, top: int, left: int, height: int, width: int, size: list[int], **kwargs
 ) -> tv_tensors.BoundingBoxes:
     output, canvas_size = resized_crop_bounding_boxes(
-        inpt.as_subclass(torch.Tensor), format=inpt.format, top=top, left=left, height=height, width=width, size=size
+        inpt.as_subclass(torch.Tensor),
+        format=inpt.format,
+        top=top,
+        left=left,
+        height=height,
+        width=width,
+        size=size,
+        clamping_mode=inpt.clamping_mode,
     )
     return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size)
 

From 90a578bc66dc9b306ded73a77923414ebf509b12 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 30 Jun 2025 15:43:19 +0100
Subject: [PATCH 10/11] Add clamping_mode to rotate and set it to none in test

---
 test/test_transforms_v2.py                    | 21 +++++--------------
 .../transforms/v2/functional/_geometry.py     |  3 +++
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index 77c39f6a414..416b2e4facb 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -2081,7 +2081,6 @@ def test_functional(self, make_input):
             (F.rotate_image, torch.Tensor),
             (F._geometry._rotate_image_pil, PIL.Image.Image),
             (F.rotate_image, tv_tensors.Image),
-            (F.rotate_bounding_boxes, tv_tensors.BoundingBoxes),
             (F.rotate_mask, tv_tensors.Mask),
             (F.rotate_video, tv_tensors.Video),
             (F.rotate_keypoints, tv_tensors.KeyPoints),
@@ -2201,7 +2200,7 @@ def _recenter_bounding_boxes_after_expand(self, bounding_boxes, *, recenter_xy):
             (bounding_boxes.to(torch.float64) - torch.tensor(translate)).to(bounding_boxes.dtype), like=bounding_boxes
         )
 
-    def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, center, canvas_size=None):
+    def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, center):
         if center is None:
             center = [s * 0.5 for s in bounding_boxes.canvas_size[::-1]]
         cx, cy = center
@@ -2227,28 +2226,22 @@ def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, cen
         output = helper(
             bounding_boxes,
             affine_matrix=affine_matrix,
-            new_canvas_size=new_canvas_size if canvas_size is None else canvas_size,
+            new_canvas_size=new_canvas_size,
             clamp=False,
         )
 
-        return F.clamp_bounding_boxes(self._recenter_bounding_boxes_after_expand(output, recenter_xy=recenter_xy)).to(
-            bounding_boxes
-        )
+        return self._recenter_bounding_boxes_after_expand(output, recenter_xy=recenter_xy).to(bounding_boxes)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     def test_functional_bounding_boxes_correctness(self, format, angle, expand, center):
-        bounding_boxes = make_bounding_boxes(format=format)
+        bounding_boxes = make_bounding_boxes(format=format, clamping_mode="none")
 
         actual = F.rotate(bounding_boxes, angle=angle, expand=expand, center=center)
         expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
-
-        expected = self._reference_rotate_bounding_boxes(
-            bounding_boxes, angle=angle, expand=expand, center=center, canvas_size=actual.canvas_size
-        )
         torch.testing.assert_close(actual, expected)
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@@ -2256,7 +2249,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, expand, cent
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     @pytest.mark.parametrize("seed", list(range(5)))
     def test_transform_bounding_boxes_correctness(self, format, expand, center, seed):
-        bounding_boxes = make_bounding_boxes(format=format)
+        bounding_boxes = make_bounding_boxes(format=format, clamping_mode="none")
 
         transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center)
 
@@ -2268,10 +2261,6 @@ def test_transform_bounding_boxes_correctness(self, format, expand, center, seed
 
         expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
-
-        expected = self._reference_rotate_bounding_boxes(
-            bounding_boxes, **params, expand=expand, center=center, canvas_size=actual.canvas_size
-        )
         torch.testing.assert_close(actual, expected)
 
     def _recenter_keypoints_after_expand(self, keypoints, *, recenter_xy):
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
index 4a3b20b7fb3..f109247dc6b 100644
--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -1449,6 +1449,7 @@ def rotate_bounding_boxes(
     angle: float,
     expand: bool = False,
     center: Optional[list[float]] = None,
+    clamping_mode: CLAMPING_MODE_TYPE = "soft",
 ) -> tuple[torch.Tensor, tuple[int, int]]:
     return _affine_bounding_boxes_with_expand(
         bounding_boxes,
@@ -1460,6 +1461,7 @@ def rotate_bounding_boxes(
         shear=[0.0, 0.0],
         center=center,
         expand=expand,
+        clamping_mode=clamping_mode,
     )
 
 
@@ -1474,6 +1476,7 @@ def _rotate_bounding_boxes_dispatch(
         angle=angle,
         expand=expand,
         center=center,
+        clamping_mode=inpt.clamping_mode,
     )
     return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size)
 

From 9b91d9448a7c132457173f6441f81d55214e3881 Mon Sep 17 00:00:00 2001
From: Antoine Simoulin <antoinesimoulin@fb.com>
Date: Mon, 30 Jun 2025 07:57:06 -0700
Subject: [PATCH 11/11] Adjust hard clamping

Test Plan:
```bash
pytest test/test_transforms_v2.py -k box -v
```
---
 torchvision/transforms/v2/functional/_meta.py | 55 ++++++++++++++-----
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
index 4cc3c2f3f8e..bca7a6de088 100644
--- a/torchvision/transforms/v2/functional/_meta.py
+++ b/torchvision/transforms/v2/functional/_meta.py
@@ -466,29 +466,54 @@ def _clamp_y_intercept(
     then applies various constraints to ensure the clamping conditions are respected.
     """
 
+    # Calculate slopes and y-intercepts for bounding boxes
     a, b = _get_slope_and_intercept(bounding_boxes)
     a1, a2, a3, a4 = a.unbind(-1)
     b1, b2, b3, b4 = b.unbind(-1)
 
-    # Clamp y-intercepts (soft clamping)
+    # Get y-intercepts from original bounding boxes
+    _, bm = _get_slope_and_intercept(original_bounding_boxes)
+    b1m, b2m, b3m, b4m = bm.unbind(-1)
+
+    # Soft clamping: Clamp y-intercepts within canvas boundaries
     b1 = b2.clamp(b1, b3).clamp(0, canvas_size[0])
     b4 = b3.clamp(b2, b4).clamp(0, canvas_size[0])
 
     if clamping_mode == "hard":
-        # Get y-intercepts from original bounding boxes
-        _, b = _get_slope_and_intercept(original_bounding_boxes)
-        _, b2, b3, _ = b.unbind(-1)
-
-        # Set b1 and b4 to the average of their clamped values
-        b1 = b4 = (b1.clamp(0, canvas_size[0]) + b4.clamp(0, canvas_size[0])) / 2
-
-        # Ensure b2 and b3 defined the box of maximum area after clamping b1 and b4
-        b2.clamp_(b1 * a2 / a1, b4).clamp_((a1 - a2) * canvas_size[1] + b1)
-        b2.clamp_(b3 * a2 / a3, b4).clamp_((a3 - a2) * canvas_size[1] + b3)
-        b3.clamp_(max=canvas_size[0] * (1 - a3 / a4) + b4 * a3 / a4)
-        b3.clamp_(max=canvas_size[0] * (1 - a3 / a2) + b2 * a3 / a2)
-        b3.clamp_(b1, (a2 - a3) * canvas_size[1] + b2)
-        b3.clamp_(b1, (a4 - a3) * canvas_size[1] + b4)
+        # Hard clamping: Average b1 and b4, and adjust b2 and b3 for maximum area
+        b1 = b4 = (b1 + b4) / 2
+
+        # Calculate candidate values for b2 based on geometric constraints
+        b2_candidates = torch.stack(
+            [
+                b1 * a2 / a1,  # Constraint at y=0
+                b3 * a2 / a3,  # Constraint at y=0
+                (a1 - a2) * canvas_size[1] + b1,  # Constraint at x=canvas_width
+                (a3 - a2) * canvas_size[1] + b3,  # Constraint at x=canvas_width
+            ],
+            dim=1,
+        )
+        # Take maximum value that doesn't exceed original b2
+        b2 = torch.max(b2_candidates, dim=1)[0].clamp(max=b2)
+
+        # Calculate candidate values for b3 based on geometric constraints
+        b3_candidates = torch.stack(
+            [
+                canvas_size[0] * (1 - a3 / a4) + b4 * a3 / a4,  # Constraint at y=canvas_height
+                canvas_size[0] * (1 - a3 / a2) + b2 * a3 / a2,  # Constraint at y=canvas_height
+                (a2 - a3) * canvas_size[1] + b2,  # Constraint at x=canvas_width
+                (a4 - a3) * canvas_size[1] + b4,  # Constraint at x=canvas_width
+            ],
+            dim=1,
+        )
+        # Take minimum value that doesn't go below original b3
+        b3 = torch.min(b3_candidates, dim=1)[0].clamp(min=b3)
+
+    # Final clamping to ensure y-intercepts are within original box bounds
+    b1.clamp_(b1m, b3m)
+    b3.clamp_(b1m, b3m)
+    b2.clamp_(b2m, b4m)
+    b4.clamp_(b2m, b4m)
 
     return torch.stack([b1, b2, b3, b4], dim=-1)