From 30ce603e60ff64fe1a731872abe6ccf3bf32357a Mon Sep 17 00:00:00 2001 From: jmackay2 Date: Wed, 28 May 2025 00:08:34 -0400 Subject: [PATCH 1/2] Add OpenMP threading to search --- benchmarks/search/radius_search.cpp | 18 ++++++++++++++++-- search/CMakeLists.txt | 2 +- search/include/pcl/search/impl/search.hpp | 4 ++++ search/include/pcl/search/search.h | 18 +++++++++++++++++- 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/benchmarks/search/radius_search.cpp b/benchmarks/search/radius_search.cpp index d6b391e1f6f..ae376d0a107 100644 --- a/benchmarks/search/radius_search.cpp +++ b/benchmarks/search/radius_search.cpp @@ -46,10 +46,14 @@ static void BM_KdTreeAll(benchmark::State& state, const pcl::PointCloud::Ptr cloudIn, const double searchRadius, - const size_t neighborLimit) + const size_t neighborLimit, + const bool threaded) { pcl::search::KdTree kdtree(false); kdtree.setInputCloud(cloudIn); + if (threaded) { + kdtree.setNumberOfThreads(0); + } // Leaving indices empty to have it search through all points pcl::Indices indices; @@ -123,7 +127,17 @@ main(int argc, char** argv) ->Unit(benchmark::kMicrosecond); benchmark::RegisterBenchmark( - "KdTreeAll", &BM_KdTreeAll, cloudFiltered, searchRadius, neighborLimit) + "KdTreeAll", &BM_KdTreeAll, cloudFiltered, searchRadius, neighborLimit, false) + ->Unit(benchmark::kMicrosecond) + ->UseManualTime() + ->Iterations(1); + + benchmark::RegisterBenchmark("KdTreeAllThreaded", + &BM_KdTreeAll, + cloudFiltered, + searchRadius, + neighborLimit, + true) ->Unit(benchmark::kMicrosecond) ->UseManualTime() ->Iterations(1); diff --git a/search/CMakeLists.txt b/search/CMakeLists.txt index bf0c499fb39..51f6ebcd947 100644 --- a/search/CMakeLists.txt +++ b/search/CMakeLists.txt @@ -3,7 +3,7 @@ set(SUBSYS_DESC "Point cloud generic search library") set(SUBSYS_DEPS common kdtree octree) PCL_SUBSYS_OPTION(build "${SUBSYS_NAME}" "${SUBSYS_DESC}" ON) -PCL_SUBSYS_DEPEND(build NAME ${SUBSYS_NAME} DEPS ${SUBSYS_DEPS} EXT_DEPS flann) +PCL_SUBSYS_DEPEND(build NAME ${SUBSYS_NAME} DEPS ${SUBSYS_DEPS} OPT_DEPS OpenMP EXT_DEPS flann) PCL_ADD_DOC("${SUBSYS_NAME}") diff --git a/search/include/pcl/search/impl/search.hpp b/search/include/pcl/search/impl/search.hpp index 9f24d244acd..0c82e1f3c16 100644 --- a/search/include/pcl/search/impl/search.hpp +++ b/search/include/pcl/search/impl/search.hpp @@ -120,6 +120,7 @@ pcl::search::Search::nearestKSearch ( { k_indices.resize (cloud.size ()); k_sqr_distances.resize (cloud.size ()); + #pragma omp parallel for num_threads(num_threads_) for (std::size_t i = 0; i < cloud.size (); i++) nearestKSearch (cloud, static_cast (i), k, k_indices[i], k_sqr_distances[i]); } @@ -127,6 +128,7 @@ pcl::search::Search::nearestKSearch ( { k_indices.resize (indices.size ()); k_sqr_distances.resize (indices.size ()); + #pragma omp parallel for num_threads(num_threads_) for (std::size_t i = 0; i < indices.size (); i++) nearestKSearch (cloud, indices[i], k, k_indices[i], k_sqr_distances[i]); } @@ -172,6 +174,7 @@ pcl::search::Search::radiusSearch ( { k_indices.resize (cloud.size ()); k_sqr_distances.resize (cloud.size ()); + #pragma omp parallel for num_threads(num_threads_) for (std::size_t i = 0; i < cloud.size (); i++) radiusSearch (cloud, static_cast (i), radius,k_indices[i], k_sqr_distances[i], max_nn); } @@ -179,6 +182,7 @@ pcl::search::Search::radiusSearch ( { k_indices.resize (indices.size ()); k_sqr_distances.resize (indices.size ()); + #pragma omp parallel for num_threads(num_threads_) for (std::size_t i = 0; i < indices.size (); i++) radiusSearch (cloud,indices[i],radius,k_indices[i],k_sqr_distances[i], max_nn); } diff --git a/search/include/pcl/search/search.h b/search/include/pcl/search/search.h index 0991c98a967..76a55c50917 100644 --- a/search/include/pcl/search/search.h +++ b/search/include/pcl/search/search.h @@ -384,7 +384,7 @@ namespace pcl pc.resize (cloud.size ()); for (std::size_t i = 0; i < cloud.size (); ++i) pcl::for_each_type (pcl::NdConcatenateFunctor (cloud[i], pc[i])); - radiusSearch (pc, Indices (), radius, k_indices, k_sqr_distances, max_nn); + radiusSearch (pc, Indices(), radius, k_indices, k_sqr_distances, max_nn); } else { @@ -395,6 +395,19 @@ namespace pcl } } + /** \brief Set the number of threads to use for searching over multiple points or indices + * \param[in] nr_threads the number of threads to use (0 automatically sets the threads based on the hardware) + */ + void setNumberOfThreads(unsigned int nr_threads) { + #ifdef _OPENMP + num_threads_ = nr_threads != 0 ? nr_threads : omp_get_num_procs(); + #else + if (nr_threads != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } + #endif + } + protected: void sortResults (Indices& indices, std::vector& distances) const; @@ -403,6 +416,9 @@ namespace pcl IndicesConstPtr indices_; bool sorted_results_; std::string name_; + + /** \brief The number of threads to use when searching over multiple points or indices */ + unsigned int num_threads_{1}; private: struct Compare From 87948e2904a6fae66722f931f3ff46d5e17ca83a Mon Sep 17 00:00:00 2001 From: jmackay2 Date: Sat, 31 May 2025 23:41:49 -0400 Subject: [PATCH 2/2] switch to ptrdiff_t --- search/include/pcl/search/impl/search.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/search/include/pcl/search/impl/search.hpp b/search/include/pcl/search/impl/search.hpp index 0c82e1f3c16..bf9cde67204 100644 --- a/search/include/pcl/search/impl/search.hpp +++ b/search/include/pcl/search/impl/search.hpp @@ -120,16 +120,16 @@ pcl::search::Search::nearestKSearch ( { k_indices.resize (cloud.size ()); k_sqr_distances.resize (cloud.size ()); - #pragma omp parallel for num_threads(num_threads_) - for (std::size_t i = 0; i < cloud.size (); i++) + #pragma omp parallel for num_threads(num_threads_) default(none) shared(cloud, k, k_indices, k_sqr_distances) + for (std::ptrdiff_t i = 0; i < static_cast(cloud.size ()); i++) nearestKSearch (cloud, static_cast (i), k, k_indices[i], k_sqr_distances[i]); } else { k_indices.resize (indices.size ()); k_sqr_distances.resize (indices.size ()); - #pragma omp parallel for num_threads(num_threads_) - for (std::size_t i = 0; i < indices.size (); i++) + #pragma omp parallel for num_threads(num_threads_) default(none) shared(cloud, indices, k, k_indices, k_sqr_distances) + for (std::ptrdiff_t i = 0; i < static_cast(indices.size ()); i++) nearestKSearch (cloud, indices[i], k, k_indices[i], k_sqr_distances[i]); } } @@ -174,16 +174,16 @@ pcl::search::Search::radiusSearch ( { k_indices.resize (cloud.size ()); k_sqr_distances.resize (cloud.size ()); - #pragma omp parallel for num_threads(num_threads_) - for (std::size_t i = 0; i < cloud.size (); i++) + #pragma omp parallel for num_threads(num_threads_) default(none) shared(cloud, radius, k_indices, k_sqr_distances, max_nn) + for (std::ptrdiff_t i = 0; i < static_cast(cloud.size ()); i++) radiusSearch (cloud, static_cast (i), radius,k_indices[i], k_sqr_distances[i], max_nn); } else { k_indices.resize (indices.size ()); k_sqr_distances.resize (indices.size ()); - #pragma omp parallel for num_threads(num_threads_) - for (std::size_t i = 0; i < indices.size (); i++) + #pragma omp parallel for num_threads(num_threads_) default(none) shared(cloud, indices, radius, k_indices, k_sqr_distances, max_nn) + for (std::ptrdiff_t i = 0; i < static_cast(indices.size ()); i++) radiusSearch (cloud,indices[i],radius,k_indices[i],k_sqr_distances[i], max_nn); } }