diff --git a/Cargo.lock b/Cargo.lock index fddd308..cc7683c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,37 @@ version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677" +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "glam" version = "0.14.0" @@ -134,6 +165,7 @@ name = "meshbool" version = "0.0.0" dependencies = [ "nalgebra", + "rayon", ] [[package]] @@ -227,6 +259,26 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "safe_arch" version = "0.7.4" diff --git a/Cargo.toml b/Cargo.toml index d7ef28e..46cd6f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,5 @@ edition = "2024" [features] [dependencies] +rayon = "1.11.0" nalgebra = { version = "0.34", default-features = false, features = ["std"] } diff --git a/src/collider.rs b/src/collider.rs index 3d6fcdb..8464fe1 100644 --- a/src/collider.rs +++ b/src/collider.rs @@ -2,6 +2,7 @@ use crate::common::{AABB, AABBOverlap}; use crate::utils::atomic_add_i32; use crate::vec::vec_uninit; use nalgebra::{Matrix3x4, Point3, Vector3}; +use rayon::prelude::*; use std::fmt::Debug; use std::mem; @@ -313,9 +314,11 @@ impl Collider { ); // copy in leaf node Boxes - for i in 0..leaf_bb.len() { - self.node_bbox[i * 2] = leaf_bb[i]; - } + self.node_bbox + .par_iter_mut() + .step_by(2) + .enumerate() + .for_each(|(i, b)| *b = leaf_bb[i]); // create global counters let mut counter = vec![0; self.num_internal()]; diff --git a/src/meshboolimpl.rs b/src/meshboolimpl.rs index e7099c6..1b4f0a6 100644 --- a/src/meshboolimpl.rs +++ b/src/meshboolimpl.rs @@ -8,6 +8,7 @@ use crate::utils::{atomic_add_i32, mat3, mat4, next3_i32, next3_usize}; use crate::vec::{vec_resize, vec_resize_nofill, vec_uninit}; use crate::{ManifoldError, MeshGL}; use nalgebra::{Matrix3x4, Point3, Vector3, Vector4}; +use rayon::prelude::*; use std::cmp::Ordering as CmpOrdering; use std::collections::{BTreeMap, HashMap}; use std::f64; @@ -160,7 +161,7 @@ impl<'a, const USE_PROP: bool, F: FnMut(i32, i32, i32)> PrepHalfedges<'a, USE_PR } else { self.tri_vert[tri as usize][j as usize] }; - debug_assert!(v0 != v1, "topological degeneracy"); + debug_assert_ne!(v0, v1, "topological degeneracy"); self.halfedges[e as usize] = Halfedge { start_vert: v0, end_vert: v1, @@ -556,26 +557,32 @@ impl MeshBoolImpl { tri: i32, } let mut tri_priority = unsafe { vec_uninit(num_tri) }; - for tri in 0..num_tri { - self.mesh_relation.tri_ref[tri].coplanar_id = -1; - if self.halfedge[3 * tri].start_vert < 0 { - tri_priority[tri] = TriPriority { - area2: 0.0, - tri: tri as i32, - }; - continue; - } - - let v = self.vert_pos[self.halfedge[3 * tri].start_vert as usize]; - tri_priority[tri] = TriPriority { - area2: (self.vert_pos[self.halfedge[3 * tri].end_vert as usize] - v) - .cross(&(self.vert_pos[self.halfedge[3 * tri + 1].end_vert as usize] - v)) - .magnitude_squared(), - tri: tri as i32, - }; - } + self.mesh_relation.tri_ref[0..num_tri] + .par_iter_mut() + .enumerate() + .map(|(tri, mesh_relation_tri_ref)| { + mesh_relation_tri_ref.coplanar_id = -1; + if self.halfedge[3 * tri].start_vert < 0 { + TriPriority { + area2: 0.0, + tri: tri as i32, + } + } else { + let v = self.vert_pos[self.halfedge[3 * tri].start_vert as usize]; + TriPriority { + area2: (self.vert_pos[self.halfedge[3 * tri].end_vert as usize] - v) + .cross( + &(self.vert_pos[self.halfedge[3 * tri + 1].end_vert as usize] - v), + ) + .magnitude_squared(), + tri: tri as i32, + } + } + }) + .collect_into_vec(&mut tri_priority); - tri_priority.sort_by(|a, b| b.area2.partial_cmp(&a.area2).unwrap_or(CmpOrdering::Equal)); + tri_priority + .par_sort_by(|a, b| b.area2.partial_cmp(&a.area2).unwrap_or(CmpOrdering::Equal)); let mut interior_halfedges: Vec = Vec::default(); for tp in &tri_priority { @@ -675,8 +682,8 @@ impl MeshBoolImpl { } } - let mut ids: Vec = (0..num_halfedge).collect(); - ids.sort_by_key(|&i| edge[i as usize]); + let mut ids: Vec = (0..num_halfedge).into_par_iter().collect(); + ids.par_sort_by_key(|&i| edge[i as usize]); ids } else { // For larger vertex count, we separate the ids into slices for halfedges @@ -748,7 +755,7 @@ impl MeshBoolImpl { ids[i as usize] = i; } - ids.sort_unstable_by_key(|&i| { + ids[start as usize..end as usize].sort_unstable_by_key(|&i| { let entry = &entries[i as usize]; (entry.large_vert, entry.tri) }); @@ -952,6 +959,7 @@ impl MeshBoolImpl { vec_resize(&mut self.vert_normal, num_vert); let vert_halfedge_map: Vec = (0..self.num_vert()) + .into_par_iter() .map(|_| AtomicI32::new(i32::MAX)) .collect(); @@ -1005,10 +1013,13 @@ impl MeshBoolImpl { } } } else { - for i in 0..self.halfedge.len() { - let i = i as i32; - atomic_min(i, self.halfedge[i as usize].start_vert); - } + self.halfedge + .par_iter_mut() + .enumerate() + .for_each(|(i, halfedge)| { + let i = i as i32; + atomic_min(i, halfedge.start_vert); + }); } for vert in 0..self.num_vert() { diff --git a/src/parallel.rs b/src/parallel.rs index 140ce30..623f3cb 100644 --- a/src/parallel.rs +++ b/src/parallel.rs @@ -1,4 +1,5 @@ use crate::{common::LossyInto, vec::vec_uninit}; +use rayon::prelude::*; use std::ops::{Add, AddAssign}; ///Compute the inclusive prefix sum for the range `[first, last)` @@ -66,7 +67,7 @@ where ///must be equal or non-overlapping. pub fn exclusive_scan_in_place(io: &mut [IO], init: IO) where - IO: Copy + AddAssign, + IO: Copy + AddAssign + Send + Sync, { let mut acc = init; for i in 0..io.len() { @@ -128,12 +129,16 @@ where ///The map range, input range and the output range must not overlap. pub fn gather(map: &[Map], input: &[IO], output: &mut [IO]) where - IO: Copy, - Map: Copy + LossyInto, + IO: Copy + Send + Sync, + Map: Copy + LossyInto + Send + Sync, { - for i in 0..map.len() { - output[i] = input[map[i].lossy_into()]; - } + output + .par_iter_mut() + .zip(map.par_iter()) + .for_each(|(o, m)| { + let i: usize = (*m).lossy_into(); + *o = input[i]; + }); } ///`gather` copies elements from a source array into a destination range diff --git a/src/sort.rs b/src/sort.rs index f11e2e6..a2cf0ba 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -6,6 +6,7 @@ use crate::parallel::{inclusive_scan, scatter}; use crate::utils::permute; use crate::vec::{vec_resize, vec_resize_nofill, vec_uninit}; use nalgebra::Point3; +use rayon::prelude::*; use std::mem; const K_NO_CODE: u32 = 0xFFFFFFFF; @@ -71,12 +72,13 @@ impl MeshBoolImpl { fn sort_verts(&mut self) { let num_vert = self.num_vert(); let mut vert_morton: Vec = unsafe { vec_uninit(num_vert) }; - for vert in 0..num_vert { - vert_morton[vert] = morton_code(self.vert_pos[vert], self.bbox); - } + self.vert_pos + .par_iter() + .map(|vert_p| morton_code(*vert_p, self.bbox)) + .collect_into_vec(&mut vert_morton); - let mut vert_new2old: Vec<_> = (0..num_vert as i32).collect(); - vert_new2old.sort_by_key(|&i| vert_morton[i as usize]); + let mut vert_new2old: Vec<_> = (0..num_vert as i32).into_par_iter().collect(); + vert_new2old.par_sort_by_key(|&i| vert_morton[i as usize]); self.reindex_verts(&vert_new2old, num_vert); @@ -100,16 +102,16 @@ impl MeshBoolImpl { let mut vert_old2new: Vec = unsafe { vec_uninit(old_num_vert) }; scatter(0..self.num_vert() as i32, vert_new2old, &mut vert_old2new); let has_prop = self.num_prop() > 0; - for edge in &mut self.halfedge { + self.halfedge.par_iter_mut().for_each(|edge| { if edge.start_vert < 0 { - continue; + return; } edge.start_vert = vert_old2new[edge.start_vert as usize]; edge.end_vert = vert_old2new[edge.end_vert as usize]; if !has_prop { edge.prop_vert = edge.start_vert; } - } + }); } fn compact_props(&mut self) { @@ -143,9 +145,9 @@ impl MeshBoolImpl { } } - for edge in &mut self.halfedge { + self.halfedge.par_iter_mut().for_each(|edge| { edge.prop_vert = prop_old2new[edge.prop_vert as usize]; - } + }); } ///Fills the faceBox and faceMorton input with the bounding boxes and Morton @@ -157,34 +159,39 @@ impl MeshBoolImpl { unsafe { vec_resize_nofill(face_morton, self.num_tri()); } - for face in 0..self.num_tri() { - // Removed tris are marked by all halfedges having pairedHalfedge - // = -1, and this will sort them to the end (the Morton code only - // uses the first 30 of 32 bits). - if self.halfedge[(3 * face) as usize].paired_halfedge < 0 { - face_morton[face] = K_NO_CODE; - continue; - } - - let mut center = Point3::::new(0.0, 0.0, 0.0); - - for i in 0..3 { - let pos = self.vert_pos[self.halfedge[(3 * face + i) as usize].start_vert as usize]; - center += pos.coords; - face_box[face].union_point(pos); - } - - center /= 3.; - - face_morton[face] = morton_code(center, self.bbox); - } + face_box + .par_iter_mut() + .zip_eq(face_morton.par_iter_mut()) + .enumerate() + .for_each(|(face, (face_box_v, face_morton_v))| { + // Removed tris are marked by all halfedges having pairedHalfedge + // = -1, and this will sort them to the end (the Morton code only + // uses the first 30 of 32 bits). + if self.halfedge[(3 * face) as usize].paired_halfedge < 0 { + *face_morton_v = K_NO_CODE; + return; + } + + let mut center = Point3::::new(0.0, 0.0, 0.0); + + for i in 0..3 { + let pos = + self.vert_pos[self.halfedge[(3 * face + i) as usize].start_vert as usize]; + center += pos.coords; + face_box_v.union_point(pos); + } + + center /= 3.; + + *face_morton_v = morton_code(center, self.bbox); + }); } ///Sorts the faces of this manifold according to their input Morton code. The ///bounding box and Morton code arrays are also sorted accordingly. fn sort_faces(&mut self, face_box: &mut Vec, face_morton: &mut Vec) { - let mut face_new2old: Vec<_> = (0..self.num_tri() as i32).collect(); - face_new2old.sort_by_key(|&i| face_morton[i as usize]); + let mut face_new2old: Vec<_> = (0..self.num_tri() as i32).into_par_iter().collect(); + face_new2old.par_sort_by_key(|&i| face_morton[i as usize]); // Tris were flagged for removal with pairedHalfedge = -1 and assigned kNoCode // to sort them to the end, which allows them to be removed. @@ -217,18 +224,20 @@ impl MeshBoolImpl { let mut face_old2new = unsafe { vec_uninit(old_halfedge.len() / 3) }; scatter(0..num_tri as i32, face_new2old, &mut face_old2new); - for new_face in 0..num_tri { - let new_face = new_face as i32; - let old_face = face_new2old[new_face as usize]; - for i in 0..3 { - let old_edge = 3 * old_face + i; - let mut edge = old_halfedge[old_edge as usize]; - let paired_face = edge.paired_halfedge / 3; - let offset = edge.paired_halfedge - 3 * paired_face; - edge.paired_halfedge = 3 * face_old2new[paired_face as usize] + offset; - let new_edge = 3 * new_face + i; - self.halfedge[new_edge as usize] = edge; - } - } + self.halfedge + .par_chunks_mut(3) + .enumerate() + .for_each(|(new_face, halfedge_chunk)| { + let new_face = new_face as i32; + let old_face = face_new2old[new_face as usize]; + for i in 0..3 { + let old_edge = 3 * old_face + i; + let mut edge = old_halfedge[old_edge as usize]; + let paired_face = edge.paired_halfedge / 3; + let offset = edge.paired_halfedge - 3 * paired_face; + edge.paired_halfedge = 3 * face_old2new[paired_face as usize] + offset; + halfedge_chunk[i as usize] = edge; + } + }); } } diff --git a/src/utils.rs b/src/utils.rs index a4e827c..8e8938a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -46,8 +46,8 @@ pub const fn prev3_i32(i: i32) -> i32 { pub fn permute(in_out: &mut Vec, new2old: &[Map]) where - IO: Copy, - Map: Copy + LossyInto, + IO: Copy + Send + Sync, + Map: Copy + LossyInto + Send + Sync, { let mut tmp = unsafe { vec_uninit(new2old.len()) }; mem::swap(&mut tmp, in_out);