Skip to content

Commit 1d03fba

Browse files
committed
Support OpenCV convention for transforms
1 parent d5447fd commit 1d03fba

File tree

11 files changed

+278
-46
lines changed

11 files changed

+278
-46
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ pip install torchhull
7171
torchhull gets as input mask images with camera information:
7272

7373
- `masks`: Single-channel images `M` with binary values {0, 1}.
74-
- `transforms`: Fused extrinsic and intrinsic matrix `K * T`, i.e. transformation from world coordinates to OpenGL clip space (right before perspective division).
74+
- `transforms`: Fused extrinsic and intrinsic matrix `K * T`, i.e. from world coordinates to image coordinates (right before perspective division), either in OpenGL or OpenCV convention.
7575

7676
The visual hull is then evaluated inside a cube with bottom-front-left corner `cube_corner_bfl` and extent `cube_length` at extracted at octree level `level`. The remaining flags control how the output mesh `(verts, faces)` should look like.
7777

@@ -91,6 +91,7 @@ verts, faces = torchhull.visual_hull(masks, # [B, H, W, 1]
9191
cube_corner_bfl,
9292
cube_length,
9393
masks_partial=False,
94+
transforms_convention="opengl",
9495
unique_verts=True,
9596
)
9697
```

benchmarks/test_bench_gaussian_blur.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def test_gaussian_blur(
4242
data_dir = pathlib.Path(__file__).parents[1] / "data"
4343
file = "Armadillo.ply"
4444

45-
_, _, masks = generate_dataset(
45+
_, _, masks, _, _, _ = generate_dataset(
4646
mesh_file=data_dir / file,
4747
number_cameras=number_cameras,
4848
device=DEVICE,

benchmarks/test_bench_visual_hull.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def test_visual_hull(benchmark, level: int, number_cameras: int) -> None: # noq
2727
data_dir = pathlib.Path(__file__).parents[1] / "data"
2828
file = "Armadillo.ply"
2929

30-
projection_matrices, view_matrices, masks = generate_dataset(
30+
projection_matrices, view_matrices, masks, _, _, _ = generate_dataset(
3131
mesh_file=data_dir / file,
3232
number_cameras=number_cameras,
3333
device=DEVICE,
@@ -47,6 +47,7 @@ def test_visual_hull(benchmark, level: int, number_cameras: int) -> None: # noq
4747
cube_corner_bfl=(-scale, -scale, -scale),
4848
cube_length=2.0 * scale,
4949
masks_partial=False,
50+
transforms_convention="opengl",
5051
unique_verts=True,
5152
)
5253

@@ -58,5 +59,6 @@ def test_visual_hull(benchmark, level: int, number_cameras: int) -> None: # noq
5859
cube_corner_bfl=(-scale, -scale, -scale),
5960
cube_length=2.0 * scale,
6061
masks_partial=False,
62+
transforms_convention="opengl",
6163
unique_verts=True,
6264
)

data/generate_dataset.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,26 @@ def perspective(
3030
)
3131

3232

33+
def perspective_cv(
34+
fovy: float,
35+
aspect: float,
36+
height: int,
37+
width: int,
38+
dtype: torch.dtype,
39+
device: torch.device,
40+
) -> torch.Tensor:
41+
return torch.tensor(
42+
[
43+
[(width / 2.0) / (np.tan(fovy / 2.0) * aspect), 0.0, width / 2.0, 0.0],
44+
[0.0, (height / 2.0) / np.tan(fovy / 2.0), height / 2.0, 0.0],
45+
[0.0, 0.0, 1.0, 0.0],
46+
[0.0, 0.0, 0.0, 1.0],
47+
],
48+
dtype=dtype,
49+
device=device,
50+
)
51+
52+
3353
def rotate(
3454
angle: float,
3555
x: float,
@@ -77,6 +97,7 @@ def generate_random_camera(
7797
device: torch.device,
7898
) -> tuple[torch.Tensor, torch.Tensor]:
7999
projection_matrix = perspective(fovy, width / height, near, far, dtype, device)
100+
projection_matrix_cv = perspective_cv(fovy, width / height, height, width, dtype, device)
80101

81102
random_axis = scipy.stats.uniform_direction.rvs(3)
82103
rng = np.random.default_rng(1337)
@@ -87,8 +108,13 @@ def generate_random_camera(
87108
random_t = translate(random_translate[0], random_translate[1], random_translate[2], dtype, device)
88109

89110
view_matrix = translate(0, 0, -camera_origin_distance, dtype, device) @ random_t @ random_r
111+
view_matrix_cv = (
112+
torch.tensor([1, -1, -1, 1], dtype=dtype, device=device).diag()
113+
@ view_matrix
114+
@ torch.tensor([1, -1, -1, 1], dtype=dtype, device=device).diag()
115+
)
90116

91-
return projection_matrix, view_matrix
117+
return projection_matrix, view_matrix, projection_matrix_cv, view_matrix_cv
92118

93119

94120
def render_masks(
@@ -180,8 +206,15 @@ def generate_dataset(
180206

181207
projection_matrices = torch.empty([number_cameras, 4, 4], dtype=dtype, device=device)
182208
view_matrices = torch.empty([number_cameras, 4, 4], dtype=dtype, device=device)
209+
projection_matrices_cv = torch.empty([number_cameras, 4, 4], dtype=dtype, device=device)
210+
view_matrices_cv = torch.empty([number_cameras, 4, 4], dtype=dtype, device=device)
183211
for i in range(number_cameras):
184-
projection_matrices[i, :, :], view_matrices[i, :, :] = generate_random_camera(
212+
(
213+
projection_matrices[i, :, :],
214+
view_matrices[i, :, :],
215+
projection_matrices_cv[i, :, :],
216+
view_matrices_cv[i, :, :],
217+
) = generate_random_camera(
185218
fovy,
186219
near,
187220
far,
@@ -204,7 +237,7 @@ def generate_dataset(
204237
device=device,
205238
)
206239

207-
return projection_matrices, view_matrices, masks
240+
return projection_matrices, view_matrices, masks, projection_matrices_cv, view_matrices_cv, masks.flip((1,))
208241

209242

210243
def main() -> None:
@@ -215,7 +248,7 @@ def main() -> None:
215248

216249
output_dir.mkdir(exist_ok=True)
217250

218-
projection_matrices, view_matrices, masks = generate_dataset(
251+
projection_matrices, view_matrices, masks, _, _, _ = generate_dataset(
219252
mesh_file=data_dir / file,
220253
dtype=torch.float32,
221254
device=torch.device("cuda"),

src/torchhull/_C/include/torchhull/image_utils.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,17 @@ unnormalize_ndc_false(const float coordinate, const int64_t size)
7070
return (coordinate + 1.f) * static_cast<float>(size) / 2.f - 0.5f;
7171
}
7272

73+
// NOTE
74+
// ----
75+
// false refers to torch.nn.functional.grid_sample()'s align_corners=false
76+
//
77+
// A pixel with integer coordinates (y, x) covers the area inside [y - 0.5, y + 0.5] and [x - 0.5, x + 0.5].
78+
inline C10_HOST_DEVICE float
79+
align_cv_false(const float coordinate)
80+
{
81+
return coordinate - 0.5f;
82+
}
83+
7384
template <typename ValueT>
7485
inline C10_DEVICE ValueT
7586
sample_zeros_padding(const torch::PackedTensorAccessor64<ValueT, 4, torch::RestrictPtrTraits> image,

src/torchhull/_C/include/torchhull/visual_hull.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ visual_hull(const torch::Tensor& masks,
1515
const std::array<float, 3>& cube_corner_bfl,
1616
const float cube_length,
1717
const bool masks_partial,
18+
const std::string& transforms_convention,
1819
const bool unique_verts);
1920

2021
std::tuple<torch::Tensor, torch::Tensor, std::vector<torch::Tensor>>
@@ -24,6 +25,7 @@ visual_hull_with_candidate_voxels(const torch::Tensor& masks,
2425
const std::array<float, 3>& cube_corner_bfl,
2526
const float cube_length,
2627
const bool masks_partial,
28+
const std::string& transforms_convention,
2729
const bool unique_verts);
2830

2931
std::vector<std::tuple<torch::Tensor, torch::Tensor>>
@@ -37,6 +39,7 @@ sparse_visual_hull_field(const torch::Tensor& masks,
3739
const int level,
3840
const std::array<float, 3>& cube_corner_bfl,
3941
const float cube_length,
40-
const bool masks_partial);
42+
const bool masks_partial,
43+
const std::string& transforms_convention);
4144

4245
} // namespace torchhull

src/torchhull/_C/python/bindings.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
1717
"cube_corner_bfl"_a,
1818
"cube_length"_a,
1919
"masks_partial"_a,
20+
"transforms_convention"_a,
2021
"unique_verts"_a = true,
2122
R"(
2223
Compute the visual hull of the given masks in terms of a mesh.
@@ -34,7 +35,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
3435
masks
3536
Single-channel mask images with binary values {0, 1}. B x H x W x 1.
3637
transforms
37-
The combined transformations from world coordinates to OpenGL clip space (right before perspective division). B x 4 x 4.
38+
The combined transformations, i.e. intrinsics * extrinsics, from world coordinates to image coordinates (right before perspective division). B x 4 x 4.
3839
level
3940
The hierarchy level to compute the visual hull at.
4041
cube_corner_bfl
@@ -43,6 +44,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
4344
The length of the cube in world space.
4445
masks_partial
4546
Whether some masks may only contain the object partially.
47+
transforms_convention
48+
Convention used to specify the transformations. Options: `opengl`, `opencv`.
4649
unique_verts
4750
Whether a compact mesh without duplicate vertices (\|F\| approx. 2 * \|V\|) if true, or a triangle soup
4851
(\|F\| = (1/3) * \|V\|) if false should be returned.
@@ -61,6 +64,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
6164
"cube_corner_bfl"_a,
6265
"cube_length"_a,
6366
"masks_partial"_a,
67+
"transforms_convention"_a,
6468
"unique_verts"_a = true,
6569
R"(
6670
Compute the visual hull of the given masks in terms of a mesh.
@@ -78,7 +82,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
7882
masks
7983
Single-channel mask images with binary values {0, 1}. B x H x W x 1.
8084
transforms
81-
The combined transformations from world coordinates to OpenGL clip space (right before perspective division). B x 4 x 4.
85+
The combined transformations, i.e. intrinsics * extrinsics, from world coordinates to image coordinates (right before perspective division). B x 4 x 4.
8286
level
8387
The hierarchy level to compute the visual hull at.
8488
cube_corner_bfl
@@ -87,6 +91,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
8791
The length of the cube in world space.
8892
masks_partial
8993
Whether some masks may only contain the object partially.
94+
transforms_convention
95+
Convention used to specify the transformations. Options: `opengl`, `opencv`.
9096
unique_verts
9197
Whether a compact mesh without duplicate vertices (\|F\| approx. 2 * \|V\|) if true, or a triangle soup
9298
(\|F\| = (1/3) * \|V\|) if false should be returned.
@@ -136,6 +142,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
136142
"cube_corner_bfl"_a,
137143
"cube_length"_a,
138144
"masks_partial"_a,
145+
"transforms_convention"_a,
139146
R"(
140147
Compute a sparse scalar field of the sum of projected foreground pixels per detected candidate voxel. In this
141148
field, the visual hull is located at isolevel \|M\| - 0.5.
@@ -153,7 +160,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
153160
masks
154161
Single-channel mask images with binary values {0, 1}. B x H x W x 1.
155162
transforms
156-
The combined transformations from world coordinates to OpenGL clip space (right before perspective division). B x 4 x 4.
163+
The combined transformations, i.e. intrinsics * extrinsics, from world coordinates to image coordinates (right before perspective division). B x 4 x 4.
157164
level
158165
The hierarchy level to compute the counts at.
159166
cube_corner_bfl
@@ -162,6 +169,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
162169
The length of the cube in world space.
163170
masks_partial
164171
Whether some masks may only contain the object partially.
172+
transforms_convention
173+
Convention used to specify the transformations. Options: `opengl`, `opencv`.
165174
166175
Returns
167176
-------

src/torchhull/_C/src/visual_hull.cpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ sparse_visual_hull_field_cuda_ravelled(const torch::Tensor& masks,
1616
const int level,
1717
const std::array<float, 3>& cube_corner_bfl,
1818
const float cube_length,
19-
const bool masks_partial);
19+
const bool masks_partial,
20+
const std::string& transforms_convention);
2021

2122
std::tuple<torch::Tensor, torch::Tensor>
2223
marching_cubes_cuda_sparse(const RavelledSparseTensor& sparse_volume,
@@ -37,14 +38,16 @@ visual_hull_cuda(const torch::Tensor& masks,
3738
const std::array<float, 3>& cube_corner_bfl,
3839
const float cube_length,
3940
const bool masks_partial,
41+
const std::string& transforms_convention,
4042
const bool unique_verts)
4143
{
4244
auto [volume, _] = sparse_visual_hull_field_cuda_ravelled(masks,
4345
transforms,
4446
level,
4547
cube_corner_bfl,
4648
cube_length,
47-
masks_partial);
49+
masks_partial,
50+
transforms_convention);
4851

4952
auto isolevel = 0.5f;
5053
auto mesh = marching_cubes_cuda_sparse(volume, isolevel, false, unique_verts);
@@ -61,11 +64,19 @@ visual_hull(const torch::Tensor& masks,
6164
const std::array<float, 3>& cube_corner_bfl,
6265
const float cube_length,
6366
const bool masks_partial,
67+
const std::string& transforms_convention,
6468
const bool unique_verts)
6569
{
6670
if (masks.is_cuda())
6771
{
68-
return visual_hull_cuda(masks, transforms, level, cube_corner_bfl, cube_length, masks_partial, unique_verts);
72+
return visual_hull_cuda(masks,
73+
transforms,
74+
level,
75+
cube_corner_bfl,
76+
cube_length,
77+
masks_partial,
78+
transforms_convention,
79+
unique_verts);
6980
}
7081

7182
TORCH_CHECK(false, "No backend implementation available for device \"" + masks.device().str() + "\".");
@@ -78,14 +89,16 @@ visual_hull_cuda_with_candidate_voxels_cuda(const torch::Tensor& masks,
7889
const std::array<float, 3>& cube_corner_bfl,
7990
const float cube_length,
8091
const bool masks_partial,
92+
const std::string& transforms_convention,
8193
const bool unique_verts)
8294
{
8395
auto [volume, candidates_octree] = sparse_visual_hull_field_cuda_ravelled(masks,
8496
transforms,
8597
level,
8698
cube_corner_bfl,
8799
cube_length,
88-
masks_partial);
100+
masks_partial,
101+
transforms_convention);
89102

90103
auto isolevel = 0.5f;
91104
auto mesh = marching_cubes_cuda_sparse(volume, isolevel, false, unique_verts);
@@ -102,6 +115,7 @@ visual_hull_with_candidate_voxels(const torch::Tensor& masks,
102115
const std::array<float, 3>& cube_corner_bfl,
103116
const float cube_length,
104117
const bool masks_partial,
118+
const std::string& transforms_convention,
105119
const bool unique_verts)
106120
{
107121
if (masks.is_cuda())
@@ -112,6 +126,7 @@ visual_hull_with_candidate_voxels(const torch::Tensor& masks,
112126
cube_corner_bfl,
113127
cube_length,
114128
masks_partial,
129+
transforms_convention,
115130
unique_verts);
116131
}
117132

@@ -145,19 +160,27 @@ sparse_visual_hull_field_cuda(const torch::Tensor& masks,
145160
const int level,
146161
const std::array<float, 3>& cube_corner_bfl,
147162
const float cube_length,
148-
const bool masks_partial);
163+
const bool masks_partial,
164+
const std::string& transforms_convention);
149165

150166
torch::Tensor
151167
sparse_visual_hull_field(const torch::Tensor& masks,
152168
const torch::Tensor& transforms,
153169
const int level,
154170
const std::array<float, 3>& cube_corner_bfl,
155171
const float cube_length,
156-
const bool masks_partial)
172+
const bool masks_partial,
173+
const std::string& transforms_convention)
157174
{
158175
if (masks.is_cuda())
159176
{
160-
return sparse_visual_hull_field_cuda(masks, transforms, level, cube_corner_bfl, cube_length, masks_partial);
177+
return sparse_visual_hull_field_cuda(masks,
178+
transforms,
179+
level,
180+
cube_corner_bfl,
181+
cube_length,
182+
masks_partial,
183+
transforms_convention);
161184
}
162185

163186
TORCH_CHECK(false, "No backend implementation available for device \"" + masks.device().str() + "\".");

0 commit comments

Comments
 (0)