pytorch · NicolasHug · Feb 20, 2025 · Jan 2, 2025 · Jan 22, 2025 · Jan 23, 2025
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -423,6 +423,7 @@ def sample_position(values, max_value):
     h, w = [torch.randint(1, s, (num_boxes,)) for s in canvas_size]
     y = sample_position(h, canvas_size[0])
     x = sample_position(w, canvas_size[1])
+    r = -360 * torch.rand((num_boxes,)) + 180
 
     if format is tv_tensors.BoundingBoxFormat.XYWH:
         parts = (x, y, w, h)
@@ -435,6 +436,23 @@ def sample_position(values, max_value):
         cx = x + w / 2
         cy = y + h / 2
         parts = (cx, cy, w, h)
+    elif format is tv_tensors.BoundingBoxFormat.XYWHR:
+        parts = (x, y, w, h, r)
+    elif format is tv_tensors.BoundingBoxFormat.CXCYWHR:
+        cx = x + w / 2
+        cy = y + h / 2
+        parts = (cx, cy, w, h, r)
+    elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
+        r_rad = r * torch.pi / 180.0
+        cos, sin = torch.cos(r_rad), torch.sin(r_rad)
+        x1, y1 = x, y
+        x3 = x1 + w * cos
+        y3 = y1 - w * sin
+        x2 = x3 + h * sin
+        y2 = y3 + h * cos
+        x4 = x1 + h * sin
+        y4 = y1 + h * cos
+        parts = (x1, y1, x3, y3, x2, y2, x4, y4)
     else:
         raise ValueError(f"Format {format} is not supported")
 

diff --git a/test/test_ops.py b/test/test_ops.py
@@ -1339,8 +1339,61 @@ def test_bbox_xywh_cxcywh(self):
         box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
         assert_equal(box_xywh, box_tensor)
 
-    @pytest.mark.parametrize("inv_infmt", ["xwyh", "cxwyh"])
-    @pytest.mark.parametrize("inv_outfmt", ["xwcx", "xhwcy"])
+    def test_bbox_xywhr_cxcywhr(self):
+        box_tensor = torch.tensor(
+            [
+                [0, 0, 100, 100, 0],
+                [0, 0, 0, 0, 0],
+                [10, 15, 20, 20, 0],
+                [23, 35, 70, 60, 0],
+                [4, 2, 4, 2, 0],
+                [5, 5, 4, 2, 90],
+                [8, 4, 4, 2, 180],
+                [7, 1, 4, 2, -90],
+            ],
+            dtype=torch.float,
+        )
+
+        exp_cxcywhr = torch.tensor(
+            [
+                [50, 50, 100, 100, 0],
+                [0, 0, 0, 0, 0],
+                [20, 25, 20, 20, 0],
+                [58, 65, 70, 60, 0],
+                [6, 3, 4, 2, 0],
+                [6, 3, 4, 2, 90],
+                [6, 3, 4, 2, 180],
+                [6, 3, 4, 2, -90],
+            ],
+            dtype=torch.float,
+        )
+
+        assert exp_cxcywhr.size() == torch.Size([8, 5])
+        box_cxcywhr = ops.box_convert(box_tensor, in_fmt="xywhr", out_fmt="cxcywhr")
+        torch.testing.assert_close(box_cxcywhr, exp_cxcywhr)
+
+        # Reverse conversion
+        box_xywhr = ops.box_convert(box_cxcywhr, in_fmt="cxcywhr", out_fmt="xywhr")
+        torch.testing.assert_close(box_xywhr, box_tensor)
+
+    def test_bbox_cxcywhr_to_xyxyxyxy(self):
+        box_tensor = torch.tensor([[5, 3, 4, 2, 90]], dtype=torch.float)
+        exp_xyxyxyxy = torch.tensor([[4, 5, 4, 1, 6, 1, 6, 5]], dtype=torch.float)
+
+        assert exp_xyxyxyxy.size() == torch.Size([1, 8])
+        box_xyxyxyxy = ops.box_convert(box_tensor, in_fmt="cxcywhr", out_fmt="xyxyxyxy")
+        torch.testing.assert_close(box_xyxyxyxy, exp_xyxyxyxy)
+
+    def test_bbox_xywhr_to_xyxyxyxy(self):
+        box_tensor = torch.tensor([[4, 5, 4, 2, 90]], dtype=torch.float)
+        exp_xyxyxyxy = torch.tensor([[4, 5, 4, 1, 6, 1, 6, 5]], dtype=torch.float)
+
+        assert exp_xyxyxyxy.size() == torch.Size([1, 8])
+        box_xyxyxyxy = ops.box_convert(box_tensor, in_fmt="xywhr", out_fmt="xyxyxyxy")
+        torch.testing.assert_close(box_xyxyxyxy, exp_xyxyxyxy)
+
+    @pytest.mark.parametrize("inv_infmt", ["xwyh", "cxwyh", "xwyhr", "cxwyhr", "xxxxyyyy"])
+    @pytest.mark.parametrize("inv_outfmt", ["xwcx", "xhwcy", "xwcxr", "xhwcyr", "xyxyxxyy"])
     def test_bbox_invalid(self, inv_infmt, inv_outfmt):
         box_tensor = torch.tensor(
             [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -53,6 +53,15 @@
 from torchvision.transforms.v2.functional._utils import _get_kernel, _register_kernel_internal
 
 
+# While we are working on adjusting transform functions
+# for rotated and oriented bounding boxes formats,
+# we limit the perimeter of tests to formats
+#  for which transform functions are already implemented.
+# In the future, this global variable will be replaced with `list(tv_tensors.BoundingBoxFormat)`
+# to support all available formats.
+SUPPORTED_BOX_FORMATS = [tv_tensors.BoundingBoxFormat[x] for x in ["XYXY", "XYWH", "CXCYWH"]]
+NEW_BOX_FORMATS = [tv_tensors.BoundingBoxFormat[x] for x in ["XYWHR", "CXCYWHR", "XYXYXYXY"]]
+
 # turns all warnings into errors for this module
 pytestmark = [pytest.mark.filterwarnings("error")]
 
@@ -626,7 +635,7 @@ def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype,
             check_scripted_vs_eager=not isinstance(size, int),
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@@ -757,7 +766,7 @@ def _reference_resize_bounding_boxes(self, bounding_boxes, *, size, max_size=Non
             new_canvas_size=(new_height, new_width),
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
@@ -1003,7 +1012,7 @@ class TestHorizontalFlip:
     def test_kernel_image(self, dtype, device):
         check_kernel(F.horizontal_flip_image, make_image(dtype=dtype, device=device))
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
@@ -1072,7 +1081,7 @@ def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes):
 
         return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize(
         "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
     )
@@ -1169,7 +1178,7 @@ def test_kernel_image(self, param, value, dtype, device):
         shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"],
         center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
     )
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
@@ -1318,7 +1327,7 @@ def _reference_affine_bounding_boxes(self, bounding_boxes, *, angle, translate,
             ),
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"])
     @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
@@ -1346,7 +1355,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, translate, s
 
         torch.testing.assert_close(actual, expected)
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     @pytest.mark.parametrize("seed", list(range(5)))
     def test_transform_bounding_boxes_correctness(self, format, center, seed):
@@ -1453,7 +1462,7 @@ class TestVerticalFlip:
     def test_kernel_image(self, dtype, device):
         check_kernel(F.vertical_flip_image, make_image(dtype=dtype, device=device))
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
@@ -1520,7 +1529,7 @@ def _reference_vertical_flip_bounding_boxes(self, bounding_boxes):
 
         return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
     def test_bounding_boxes_correctness(self, format, fn):
         bounding_boxes = make_bounding_boxes(format=format)
@@ -1589,7 +1598,7 @@ def test_kernel_image(self, param, value, dtype, device):
         expand=[False, True],
         center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
     )
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
@@ -1760,7 +1769,7 @@ def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, cen
             bounding_boxes
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
@@ -1773,7 +1782,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, expand, cent
         torch.testing.assert_close(actual, expected)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     @pytest.mark.parametrize("seed", list(range(5)))
@@ -2694,7 +2703,7 @@ def test_kernel_image(self, param, value, dtype, device):
             check_cuda_vs_cpu=dtype is not torch.float16,
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
@@ -2821,7 +2830,7 @@ def test_kernel_image(self, kwargs, dtype, device):
         check_kernel(F.crop_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **kwargs)
 
     @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_box(self, kwargs, format, dtype, device):
@@ -2971,7 +2980,7 @@ def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, w
         )
 
     @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device):
@@ -2984,7 +2993,7 @@ def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device
         assert_equal(F.get_size(actual), F.get_size(expected))
 
     @pytest.mark.parametrize("output_size", [(17, 11), (11, 17), (11, 11)])
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("seed", list(range(5)))
@@ -3507,7 +3516,8 @@ def test_aug_mix_severity_error(self, severity):
 
 
 class TestConvertBoundingBoxFormat:
-    old_new_formats = list(itertools.permutations(iter(tv_tensors.BoundingBoxFormat), 2))
+    old_new_formats = list(itertools.permutations(SUPPORTED_BOX_FORMATS, 2))
+    old_new_formats += list(itertools.permutations(NEW_BOX_FORMATS, 2))
 
     @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
     def test_kernel(self, old_format, new_format):
@@ -3518,7 +3528,7 @@ def test_kernel(self, old_format, new_format):
             old_format=old_format,
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("inplace", [False, True])
     def test_kernel_noop(self, format, inplace):
         input = make_bounding_boxes(format=format).as_subclass(torch.Tensor)
@@ -3542,9 +3552,13 @@ def test_kernel_inplace(self, old_format, new_format):
         output_inplace = F.convert_bounding_box_format(
             input, old_format=old_format, new_format=new_format, inplace=True
         )
-        assert output_inplace.data_ptr() == input.data_ptr()
-        assert output_inplace._version > input_version
-        assert output_inplace is input
+        if old_format != tv_tensors.BoundingBoxFormat.XYXYXYXY and new_format != tv_tensors.BoundingBoxFormat.XYXYXYXY:
+            # NOTE: BoundingBox format conversion from and to XYXYXYXY format
+            # cannot modify the input tensor inplace as it requires a dimension
+            # change.
+            assert output_inplace.data_ptr() == input.data_ptr()
+            assert output_inplace._version > input_version
+            assert output_inplace is input
 
         assert_equal(output_inplace, output_out_of_place)
 
@@ -3563,7 +3577,7 @@ def test_transform(self, old_format, new_format, format_type):
     @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
     def test_strings(self, old_format, new_format):
         # Non-regression test for https://github.com/pytorch/vision/issues/8258
-        input = tv_tensors.BoundingBoxes(torch.tensor([[10, 10, 20, 20]]), format=old_format, canvas_size=(50, 50))
+        input = make_bounding_boxes(format=old_format, canvas_size=(50, 50))
         expected = self._reference_convert_bounding_box_format(input, new_format)
 
         old_format = old_format.name
@@ -3728,7 +3742,7 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
             new_canvas_size=size,
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_functional_bounding_boxes_correctness(self, format):
         bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
 
@@ -3796,7 +3810,7 @@ def test_kernel_image(self, param, value, dtype, device):
             ),
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_kernel_bounding_boxes(self, format):
         bounding_boxes = make_bounding_boxes(format=format)
         check_kernel(
@@ -3915,7 +3929,7 @@ def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding):
         )
 
     @pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
@@ -3944,7 +3958,7 @@ def test_kernel_image(self, output_size, dtype, device):
         )
 
     @pytest.mark.parametrize("output_size", OUTPUT_SIZES)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_kernel_bounding_boxes(self, output_size, format):
         bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
         check_kernel(
@@ -4023,7 +4037,7 @@ def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size):
         )
 
     @pytest.mark.parametrize("output_size", OUTPUT_SIZES)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)])
@@ -4090,7 +4104,7 @@ def test_kernel_image_error(self):
         coefficients=COEFFICIENTS,
         start_end_points=START_END_POINTS,
     )
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_kernel_bounding_boxes(self, param, value, format):
         if param == "start_end_points":
             kwargs = dict(zip(["startpoints", "endpoints"], value))
@@ -4266,7 +4280,7 @@ def perspective_bounding_boxes(bounding_boxes):
         )
 
     @pytest.mark.parametrize(("startpoints", "endpoints"), START_END_POINTS)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, format, dtype, device):
@@ -4473,7 +4487,7 @@ def test_correctness_image(self, mean, std, dtype, fn):
 
 
 class TestClampBoundingBoxes:
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel(self, format, dtype, device):
@@ -4485,7 +4499,7 @@ def test_kernel(self, format, dtype, device):
             canvas_size=bounding_boxes.canvas_size,
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_functional(self, format):
         check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format))