diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 999a303201..58dd9fcb6d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -349,6 +349,7 @@ if(NOT BUILD_CPU_ONLY) src/cluster/kmeans_transform_double.cu src/cluster/kmeans_transform_float.cu src/cluster/single_linkage_float.cu + src/cluster/spectral.cu src/core/bitset.cu src/core/omp_wrapper.cpp src/distance/detail/kernels/gram_matrix.cu diff --git a/cpp/include/cuvs/cluster/spectral.hpp b/cpp/include/cuvs/cluster/spectral.hpp new file mode 100644 index 0000000000..8ab45aa180 --- /dev/null +++ b/cpp/include/cuvs/cluster/spectral.hpp @@ -0,0 +1,33 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include +#include +#include +#include + +namespace cuvs::cluster::spectral { + +struct params { + int n_clusters; + int n_components; + int n_init; + int n_neighbors; + raft::random::RngState rng_state{0}; +}; + +// TODO: int64_t nnz support (see https://github.com/rapidsai/cuvs/issues/1484) +void fit_predict(raft::resources const& handle, + params config, + raft::device_coo_matrix_view connectivity_graph, + raft::device_vector_view labels); + +void fit_predict(raft::resources const& handle, + params config, + raft::device_coo_matrix_view connectivity_graph, + raft::device_vector_view labels); + +} // namespace cuvs::cluster::spectral diff --git a/cpp/include/cuvs/preprocessing/spectral_embedding.hpp b/cpp/include/cuvs/preprocessing/spectral_embedding.hpp index e8604aa9ae..1c23b9ace0 100644 --- a/cpp/include/cuvs/preprocessing/spectral_embedding.hpp +++ b/cpp/include/cuvs/preprocessing/spectral_embedding.hpp @@ -159,8 +159,22 @@ void transform(raft::resources const& handle, raft::device_coo_matrix_view connectivity_graph, raft::device_matrix_view embedding); +void transform(raft::resources const& handle, + params config, + raft::device_coo_matrix_view connectivity_graph, + raft::device_matrix_view embedding); + /** * @} */ } // namespace cuvs::preprocessing::spectral_embedding + +namespace cuvs::preprocessing::spectral_embedding::helpers { + +void create_connectivity_graph(raft::resources const& handle, + params spectral_embedding_config, + raft::device_matrix_view dataset, + raft::device_coo_matrix& connectivity_graph); + +} // namespace cuvs::preprocessing::spectral_embedding::helpers diff --git a/cpp/src/cluster/detail/spectral.cuh b/cpp/src/cluster/detail/spectral.cuh new file mode 100644 index 0000000000..235fcb6565 --- /dev/null +++ b/cpp/src/cluster/detail/spectral.cuh @@ -0,0 +1,63 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace cuvs::cluster::spectral::detail { + +template +void fit_predict(raft::resources const& handle, + params config, + raft::device_coo_matrix_view connectivity_graph, + raft::device_vector_view labels) +{ + int n_samples = connectivity_graph.structure_view().get_n_rows(); + DataT inertia; + int n_iter; + auto embedding_col_major = + raft::make_device_matrix(handle, n_samples, config.n_components); + auto embedding_row_major = + raft::make_device_matrix(handle, n_samples, config.n_components); + cuvs::preprocessing::spectral_embedding::params spectral_embedding_config; + spectral_embedding_config.n_components = config.n_components; + spectral_embedding_config.n_neighbors = config.n_neighbors; + spectral_embedding_config.norm_laplacian = true; + spectral_embedding_config.drop_first = false; + spectral_embedding_config.seed = config.rng_state.seed; + + cuvs::cluster::kmeans::params kmeans_config; + kmeans_config.n_clusters = config.n_clusters; + kmeans_config.rng_state = config.rng_state; + kmeans_config.n_init = config.n_init; + kmeans_config.oversampling_factor = 0.0; + + cuvs::preprocessing::spectral_embedding::transform( + handle, spectral_embedding_config, connectivity_graph, embedding_col_major.view()); + + raft::linalg::transpose(handle, + embedding_col_major.data_handle(), + embedding_row_major.data_handle(), + n_samples, + config.n_components, + raft::resource::get_cuda_stream(handle)); + + cuvs::cluster::kmeans::fit_predict(handle, + kmeans_config, + embedding_row_major.view(), + std::nullopt, + std::nullopt, + labels, + raft::make_host_scalar_view(&inertia), + raft::make_host_scalar_view(&n_iter)); +} + +} // namespace cuvs::cluster::spectral::detail diff --git a/cpp/src/cluster/spectral.cu b/cpp/src/cluster/spectral.cu new file mode 100644 index 0000000000..b4dfdafbb5 --- /dev/null +++ b/cpp/src/cluster/spectral.cu @@ -0,0 +1,26 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "./detail/spectral.cuh" + +#include + +namespace cuvs::cluster::spectral { + +#define CUVS_INST_SPECTRAL(DataT) \ + void fit_predict(raft::resources const& handle, \ + params config, \ + raft::device_coo_matrix_view connectivity_graph, \ + raft::device_vector_view labels) \ + { \ + detail::fit_predict(handle, config, connectivity_graph, labels); \ + } + +CUVS_INST_SPECTRAL(float); +CUVS_INST_SPECTRAL(double); + +#undef CUVS_INST_SPECTRAL + +} // namespace cuvs::cluster::spectral diff --git a/cpp/src/preprocessing/spectral/detail/spectral_embedding.cuh b/cpp/src/preprocessing/spectral/detail/spectral_embedding.cuh new file mode 100644 index 0000000000..f779c0c2cd --- /dev/null +++ b/cpp/src/preprocessing/spectral/detail/spectral_embedding.cuh @@ -0,0 +1,273 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cuvs::preprocessing::spectral_embedding::detail { + +template +raft::device_csr_matrix_view coo_to_csr_matrix( + raft::resources const& handle, + const int n_samples, + raft::device_vector_view sym_coo_row_ind, + raft::device_coo_matrix_view sym_coo_matrix_view) +{ + auto stream = raft::resource::get_cuda_stream(handle); + + raft::sparse::op::coo_sort(n_samples, + n_samples, + sym_coo_matrix_view.structure_view().get_nnz(), + sym_coo_matrix_view.structure_view().get_rows().data(), + sym_coo_matrix_view.structure_view().get_cols().data(), + sym_coo_matrix_view.get_elements().data(), + stream); + + raft::sparse::convert::sorted_coo_to_csr(sym_coo_matrix_view.structure_view().get_rows().data(), + sym_coo_matrix_view.structure_view().get_nnz(), + sym_coo_row_ind.data_handle(), + n_samples, + stream); + + auto sym_coo_nnz = sym_coo_matrix_view.structure_view().get_nnz(); + raft::copy(sym_coo_row_ind.data_handle() + sym_coo_row_ind.size() - 1, &sym_coo_nnz, 1, stream); + + auto csr_matrix_view = raft::make_device_csr_matrix_view( + const_cast(sym_coo_matrix_view.get_elements().data()), + raft::make_device_compressed_structure_view( + const_cast(sym_coo_row_ind.data_handle()), + const_cast(sym_coo_matrix_view.structure_view().get_cols().data()), + n_samples, + n_samples, + sym_coo_matrix_view.structure_view().get_nnz())); + return csr_matrix_view; +} + +template +raft::device_csr_matrix create_laplacian( + raft::resources const& handle, + params spectral_embedding_config, + raft::device_csr_matrix_view csr_matrix_view, + raft::device_vector_view diagonal) +{ + auto laplacian = spectral_embedding_config.norm_laplacian + ? raft::sparse::linalg::laplacian_normalized(handle, csr_matrix_view, diagonal) + : raft::sparse::linalg::compute_graph_laplacian(handle, csr_matrix_view); + + auto laplacian_elements_view = raft::make_device_vector_view( + laplacian.get_elements().data(), laplacian.structure_view().get_nnz()); + + raft::linalg::unary_op(handle, + raft::make_const_mdspan(laplacian_elements_view), + laplacian_elements_view, + [] __device__(DataT x) { return -x; }); + + return laplacian; +} + +template +void compute_eigenpairs(raft::resources const& handle, + params spectral_embedding_config, + const int n_samples, + raft::device_csr_matrix& laplacian, + raft::device_vector_view diagonal, + raft::device_matrix_view embedding) +{ + auto config = raft::sparse::solver::lanczos_solver_config(); + config.n_components = spectral_embedding_config.n_components; + config.max_iterations = 10 * n_samples; + config.ncv = std::min(n_samples, std::max(2 * config.n_components + 1, 20)); + config.tolerance = 0.0f; + config.which = raft::sparse::solver::LANCZOS_WHICH::LA; + config.seed = spectral_embedding_config.seed; + + auto eigenvalues = + raft::make_device_vector(handle, config.n_components); + auto eigenvectors = + raft::make_device_matrix(handle, n_samples, config.n_components); + + raft::sparse::solver::lanczos_compute_smallest_eigenvectors( + handle, + config, + raft::make_device_csr_matrix_view(laplacian.get_elements().data(), + laplacian.structure_view()), + std::nullopt, + eigenvalues.view(), + eigenvectors.view()); + + if (spectral_embedding_config.norm_laplacian) { + raft::linalg::matrix_vector_op( + handle, + raft::make_const_mdspan(eigenvectors.view()), // input matrix view + raft::make_const_mdspan(diagonal), // input vector view + eigenvectors.view(), // output matrix view (in-place) + [] __device__(DataT elem, DataT diag) { return elem / diag; }); + } + + // Create a sequence of reversed column indices + config.n_components = + spectral_embedding_config.drop_first ? config.n_components - 1 : config.n_components; + auto col_indices = raft::make_device_vector(handle, config.n_components); + + // TODO: https://github.com/rapidsai/raft/issues/2661 + thrust::sequence(thrust::device, + col_indices.data_handle(), + col_indices.data_handle() + config.n_components, + config.n_components - 1, // Start from the last column index + -1 // Decrement (move backward) + ); + + // Create row-major views of the column-major matrices + // This is just a view re-interpretation, no data movement + auto eigenvectors_row_view = raft::make_device_matrix_view( + eigenvectors.data_handle(), + eigenvectors.extent(1), // Swap dimensions for the view + eigenvectors.extent(0)); + + auto embedding_row_view = raft::make_device_matrix_view( + embedding.data_handle(), + embedding.extent(1), // Swap dimensions for the view + embedding.extent(0)); + + raft::matrix::gather( + handle, + raft::make_const_mdspan(eigenvectors_row_view), // Source matrix (as row-major view) + raft::make_const_mdspan(col_indices.view()), // Column indices to gather + embedding_row_view // Destination matrix (as row-major view) + ); +} + +template +void transform(raft::resources const& handle, + params spectral_embedding_config, + raft::device_coo_matrix_view connectivity_graph, + raft::device_matrix_view embedding) +{ + const int n_samples = connectivity_graph.structure_view().get_n_rows(); + + auto sym_coo_row_ind = raft::make_device_vector(handle, n_samples + 1); + auto diagonal = raft::make_device_vector(handle, n_samples); + + auto csr_matrix_view = + coo_to_csr_matrix(handle, n_samples, sym_coo_row_ind.view(), connectivity_graph); + auto laplacian = + create_laplacian(handle, spectral_embedding_config, csr_matrix_view, diagonal.view()); + compute_eigenpairs( + handle, spectral_embedding_config, n_samples, laplacian, diagonal.view(), embedding); +} + +void create_connectivity_graph( + raft::resources const& handle, + cuvs::preprocessing::spectral_embedding::params spectral_embedding_config, + raft::device_matrix_view dataset, + raft::device_coo_matrix& connectivity_graph) +{ + const int n_samples = dataset.extent(0); + const int n_features = dataset.extent(1); + const int k_search = spectral_embedding_config.n_neighbors; + const size_t nnz = n_samples * k_search; + + auto stream = raft::resource::get_cuda_stream(handle); + + cuvs::neighbors::brute_force::search_params search_params; + cuvs::neighbors::brute_force::index_params index_params; + index_params.metric = cuvs::distance::DistanceType::L2SqrtExpanded; + + auto d_indices = raft::make_device_matrix(handle, n_samples, k_search); + auto d_distances = raft::make_device_matrix(handle, n_samples, k_search); + + auto index = + cuvs::neighbors::brute_force::build(handle, index_params, raft::make_const_mdspan(dataset)); + + cuvs::neighbors::brute_force::search( + handle, search_params, index, dataset, d_indices.view(), d_distances.view()); + + auto knn_rows = raft::make_device_vector(handle, nnz); + auto knn_cols = raft::make_device_vector(handle, nnz); + + raft::linalg::unary_op( + handle, make_const_mdspan(d_indices.view()), knn_cols.view(), [] __device__(int64_t x) { + return static_cast(x); + }); + + thrust::tabulate(raft::resource::get_thrust_policy(handle), + knn_rows.data_handle(), + knn_rows.data_handle() + nnz, + [k_search] __device__(int idx) { return idx / k_search; }); + + // set all distances to 1.0f (connectivity KNN graph) + raft::matrix::fill(handle, raft::make_device_vector_view(d_distances.data_handle(), nnz), 1.0f); + + auto coo_matrix_view = raft::make_device_coo_matrix_view( + d_distances.data_handle(), + raft::make_device_coordinate_structure_view( + knn_rows.data_handle(), knn_cols.data_handle(), n_samples, n_samples, nnz)); + + auto sym_coo1_matrix = + raft::make_device_coo_matrix(handle, n_samples, n_samples); + raft::sparse::linalg::coo_symmetrize<128, float, int, int>( + handle, coo_matrix_view, sym_coo1_matrix, [] __device__(int row, int col, float a, float b) { + return 0.5f * (a + b); + }); + + raft::sparse::op::coo_sort(n_samples, + n_samples, + sym_coo1_matrix.structure_view().get_nnz(), + sym_coo1_matrix.structure_view().get_rows().data(), + sym_coo1_matrix.structure_view().get_cols().data(), + sym_coo1_matrix.get_elements().data(), + stream); + + raft::sparse::op::coo_remove_scalar<128, float, int, int>( + handle, + raft::make_device_coo_matrix_view( + sym_coo1_matrix.get_elements().data(), sym_coo1_matrix.structure_view()), + raft::make_host_scalar(0.0f).view(), + connectivity_graph); +} + +void transform(raft::resources const& handle, + params spectral_embedding_config, + raft::device_matrix_view dataset, + raft::device_matrix_view embedding) +{ + const int n_samples = dataset.extent(0); + + auto sym_coo_matrix = + raft::make_device_coo_matrix(handle, n_samples, n_samples); + auto sym_coo_row_ind = raft::make_device_vector(handle, n_samples + 1); + auto diagonal = raft::make_device_vector(handle, n_samples); + + create_connectivity_graph(handle, spectral_embedding_config, dataset, sym_coo_matrix); + auto csr_matrix_view = + coo_to_csr_matrix(handle, n_samples, sym_coo_row_ind.view(), sym_coo_matrix.view()); + auto laplacian = + create_laplacian(handle, spectral_embedding_config, csr_matrix_view, diagonal.view()); + compute_eigenpairs( + handle, spectral_embedding_config, n_samples, laplacian, diagonal.view(), embedding); +} + +} // namespace cuvs::preprocessing::spectral_embedding::detail diff --git a/cpp/src/preprocessing/spectral/spectral_embedding.cu b/cpp/src/preprocessing/spectral/spectral_embedding.cu index f950b9ca7c..37a07e59ca 100644 --- a/cpp/src/preprocessing/spectral/spectral_embedding.cu +++ b/cpp/src/preprocessing/spectral/spectral_embedding.cu @@ -3,265 +3,45 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "./detail/spectral_embedding.cuh" -#include -#include +#include namespace cuvs::preprocessing::spectral_embedding { -void create_connectivity_graph(raft::resources const& handle, - params spectral_embedding_config, - raft::device_matrix_view dataset, - raft::device_matrix_view embedding, - raft::device_coo_matrix& connectivity_graph) -{ - const int n_samples = dataset.extent(0); - const int n_features = dataset.extent(1); - const int k_search = spectral_embedding_config.n_neighbors; - const size_t nnz = n_samples * k_search; - - auto stream = raft::resource::get_cuda_stream(handle); - - cuvs::neighbors::brute_force::search_params search_params; - cuvs::neighbors::brute_force::index_params index_params; - index_params.metric = cuvs::distance::DistanceType::L2SqrtExpanded; - - auto d_indices = raft::make_device_matrix(handle, n_samples, k_search); - auto d_distances = raft::make_device_matrix(handle, n_samples, k_search); - - auto index = - cuvs::neighbors::brute_force::build(handle, index_params, raft::make_const_mdspan(dataset)); - - cuvs::neighbors::brute_force::search( - handle, search_params, index, dataset, d_indices.view(), d_distances.view()); - - auto knn_rows = raft::make_device_vector(handle, nnz); - auto knn_cols = raft::make_device_vector(handle, nnz); - - raft::linalg::unary_op( - handle, make_const_mdspan(d_indices.view()), knn_cols.view(), [] __device__(int64_t x) { - return static_cast(x); - }); - - thrust::tabulate(raft::resource::get_thrust_policy(handle), - knn_rows.data_handle(), - knn_rows.data_handle() + nnz, - [k_search] __device__(int idx) { return idx / k_search; }); - - // set all distances to 1.0f (connectivity KNN graph) - raft::matrix::fill(handle, raft::make_device_vector_view(d_distances.data_handle(), nnz), 1.0f); - - auto coo_matrix_view = raft::make_device_coo_matrix_view( - d_distances.data_handle(), - raft::make_device_coordinate_structure_view( - knn_rows.data_handle(), knn_cols.data_handle(), n_samples, n_samples, nnz)); - - auto sym_coo1_matrix = - raft::make_device_coo_matrix(handle, n_samples, n_samples); - raft::sparse::linalg::coo_symmetrize<128, float, int, int>( - handle, coo_matrix_view, sym_coo1_matrix, [] __device__(int row, int col, float a, float b) { - return 0.5f * (a + b); - }); - - raft::sparse::op::coo_sort(n_samples, - n_samples, - sym_coo1_matrix.structure_view().get_nnz(), - sym_coo1_matrix.structure_view().get_rows().data(), - sym_coo1_matrix.structure_view().get_cols().data(), - sym_coo1_matrix.get_elements().data(), - stream); - - raft::sparse::op::coo_remove_scalar<128, float, int, int>( - handle, - raft::make_device_coo_matrix_view( - sym_coo1_matrix.get_elements().data(), sym_coo1_matrix.structure_view()), - raft::make_host_scalar(0.0f).view(), - connectivity_graph); -} - -raft::device_csr_matrix_view coo_to_csr_matrix( - raft::resources const& handle, - const int n_samples, - raft::device_vector_view sym_coo_row_ind, - raft::device_coo_matrix_view sym_coo_matrix_view) -{ - auto stream = raft::resource::get_cuda_stream(handle); - - raft::sparse::op::coo_sort(n_samples, - n_samples, - sym_coo_matrix_view.structure_view().get_nnz(), - sym_coo_matrix_view.structure_view().get_rows().data(), - sym_coo_matrix_view.structure_view().get_cols().data(), - sym_coo_matrix_view.get_elements().data(), - stream); - - raft::sparse::convert::sorted_coo_to_csr(sym_coo_matrix_view.structure_view().get_rows().data(), - sym_coo_matrix_view.structure_view().get_nnz(), - sym_coo_row_ind.data_handle(), - n_samples, - stream); - - auto sym_coo_nnz = sym_coo_matrix_view.structure_view().get_nnz(); - raft::copy(sym_coo_row_ind.data_handle() + sym_coo_row_ind.size() - 1, &sym_coo_nnz, 1, stream); - - auto csr_matrix_view = raft::make_device_csr_matrix_view( - const_cast(sym_coo_matrix_view.get_elements().data()), - raft::make_device_compressed_structure_view( - const_cast(sym_coo_row_ind.data_handle()), - const_cast(sym_coo_matrix_view.structure_view().get_cols().data()), - n_samples, - n_samples, - sym_coo_matrix_view.structure_view().get_nnz())); - return csr_matrix_view; -} - -raft::device_csr_matrix create_laplacian( - raft::resources const& handle, - params spectral_embedding_config, - raft::device_csr_matrix_view csr_matrix_view, - raft::device_vector_view diagonal) -{ - auto laplacian = spectral_embedding_config.norm_laplacian - ? raft::sparse::linalg::laplacian_normalized(handle, csr_matrix_view, diagonal) - : raft::sparse::linalg::compute_graph_laplacian(handle, csr_matrix_view); - - auto laplacian_elements_view = raft::make_device_vector_view( - laplacian.get_elements().data(), laplacian.structure_view().get_nnz()); - - raft::linalg::unary_op(handle, - raft::make_const_mdspan(laplacian_elements_view), - laplacian_elements_view, - [] __device__(float x) { return -x; }); - - return laplacian; -} - -void compute_eigenpairs(raft::resources const& handle, - params spectral_embedding_config, - const int n_samples, - raft::device_csr_matrix& laplacian, - raft::device_vector_view diagonal, - raft::device_matrix_view embedding) -{ - auto config = raft::sparse::solver::lanczos_solver_config(); - config.n_components = spectral_embedding_config.n_components; - config.max_iterations = 1000; - config.ncv = std::min(n_samples, std::max(2 * config.n_components + 1, 20)); - config.tolerance = 1e-5; - config.which = raft::sparse::solver::LANCZOS_WHICH::LA; - config.seed = spectral_embedding_config.seed; - - auto eigenvalues = - raft::make_device_vector(handle, config.n_components); - auto eigenvectors = - raft::make_device_matrix(handle, n_samples, config.n_components); - - raft::sparse::solver::lanczos_compute_smallest_eigenvectors( - handle, - config, - raft::make_device_csr_matrix_view(laplacian.get_elements().data(), - laplacian.structure_view()), - std::nullopt, - eigenvalues.view(), - eigenvectors.view()); - - if (spectral_embedding_config.norm_laplacian) { - raft::linalg::matrix_vector_op( - handle, - raft::make_const_mdspan(eigenvectors.view()), // input matrix view - raft::make_const_mdspan(diagonal), // input vector view - eigenvectors.view(), // output matrix view (in-place) - [] __device__(float elem, float diag) { return elem / diag; }); +#define CUVS_INST_SPECTRAL_EMBEDDING(DataT) \ + void transform(raft::resources const& handle, \ + params config, \ + raft::device_coo_matrix_view connectivity_graph, \ + raft::device_matrix_view embedding) \ + { \ + detail::transform(handle, config, connectivity_graph, embedding); \ } - // Create a sequence of reversed column indices - config.n_components = - spectral_embedding_config.drop_first ? config.n_components - 1 : config.n_components; - auto col_indices = raft::make_device_vector(handle, config.n_components); - - // TODO: https://github.com/rapidsai/raft/issues/2661 - thrust::sequence(thrust::device, - col_indices.data_handle(), - col_indices.data_handle() + config.n_components, - config.n_components - 1, // Start from the last column index - -1 // Decrement (move backward) - ); - - // Create row-major views of the column-major matrices - // This is just a view re-interpretation, no data movement - auto eigenvectors_row_view = raft::make_device_matrix_view( - eigenvectors.data_handle(), - eigenvectors.extent(1), // Swap dimensions for the view - eigenvectors.extent(0)); +CUVS_INST_SPECTRAL_EMBEDDING(float); +CUVS_INST_SPECTRAL_EMBEDDING(double); - auto embedding_row_view = raft::make_device_matrix_view( - embedding.data_handle(), - embedding.extent(1), // Swap dimensions for the view - embedding.extent(0)); - - raft::matrix::gather( - handle, - raft::make_const_mdspan(eigenvectors_row_view), // Source matrix (as row-major view) - raft::make_const_mdspan(col_indices.view()), // Column indices to gather - embedding_row_view // Destination matrix (as row-major view) - ); -} +#undef CUVS_INST_SPECTRAL_EMBEDDING +// Non-template functions void transform(raft::resources const& handle, - params spectral_embedding_config, + params config, raft::device_matrix_view dataset, raft::device_matrix_view embedding) { - const int n_samples = dataset.extent(0); - - auto sym_coo_matrix = - raft::make_device_coo_matrix(handle, n_samples, n_samples); - auto sym_coo_row_ind = raft::make_device_vector(handle, n_samples + 1); - auto diagonal = raft::make_device_vector(handle, n_samples); - - create_connectivity_graph(handle, spectral_embedding_config, dataset, embedding, sym_coo_matrix); - auto csr_matrix_view = - coo_to_csr_matrix(handle, n_samples, sym_coo_row_ind.view(), sym_coo_matrix.view()); - auto laplacian = - create_laplacian(handle, spectral_embedding_config, csr_matrix_view, diagonal.view()); - compute_eigenpairs( - handle, spectral_embedding_config, n_samples, laplacian, diagonal.view(), embedding); + detail::transform(handle, config, dataset, embedding); } -void transform(raft::resources const& handle, - params spectral_embedding_config, - raft::device_coo_matrix_view connectivity_graph, - raft::device_matrix_view embedding) -{ - const int n_samples = connectivity_graph.structure_view().get_n_rows(); +} // namespace cuvs::preprocessing::spectral_embedding - auto sym_coo_row_ind = raft::make_device_vector(handle, n_samples + 1); - auto diagonal = raft::make_device_vector(handle, n_samples); +namespace cuvs::preprocessing::spectral_embedding::helpers { - auto csr_matrix_view = - coo_to_csr_matrix(handle, n_samples, sym_coo_row_ind.view(), connectivity_graph); - auto laplacian = - create_laplacian(handle, spectral_embedding_config, csr_matrix_view, diagonal.view()); - compute_eigenpairs( - handle, spectral_embedding_config, n_samples, laplacian, diagonal.view(), embedding); +void create_connectivity_graph(raft::resources const& handle, + params spectral_embedding_config, + raft::device_matrix_view dataset, + raft::device_coo_matrix& connectivity_graph) +{ + detail::create_connectivity_graph(handle, spectral_embedding_config, dataset, connectivity_graph); } -} // namespace cuvs::preprocessing::spectral_embedding +} // namespace cuvs::preprocessing::spectral_embedding::helpers diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9e43c8da5c..56b53ef697 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -117,7 +117,7 @@ ConfigureTest( ConfigureTest( NAME CLUSTER_TEST PATH cluster/kmeans.cu cluster/kmeans_balanced.cu cluster/kmeans_find_k.cu cluster/linkage.cu - cluster/connect_knn.cu + cluster/connect_knn.cu cluster/spectral.cu GPUS 1 PERCENT 100 ) diff --git a/cpp/tests/cluster/spectral.cu b/cpp/tests/cluster/spectral.cu new file mode 100644 index 0000000000..39cb44f4ec --- /dev/null +++ b/cpp/tests/cluster/spectral.cu @@ -0,0 +1,202 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +namespace cuvs { + +struct SpectralClusteringInputs { + int n_row; + int n_col; + int n_clusters; + int n_components; + int n_neighbors; + int n_init; + float cluster_std; + uint64_t seed; +}; + +template +class SpectralClusteringTest : public ::testing::TestWithParam { + public: + SpectralClusteringTest() + : d_labels(0, raft::resource::get_cuda_stream(handle)), + d_labels_ref(0, raft::resource::get_cuda_stream(handle)) + { + } + + void basicTest() + { + testparams = ::testing::TestWithParam::GetParam(); + + int n_samples = testparams.n_row; + int n_features = testparams.n_col; + + cluster::spectral::params params; + params.n_clusters = testparams.n_clusters; + params.n_components = testparams.n_components; + params.n_neighbors = testparams.n_neighbors; + params.n_init = testparams.n_init; + params.rng_state = raft::random::RngState(testparams.seed); + + auto X = raft::make_device_matrix(handle, n_samples, n_features); + auto labels = raft::make_device_vector(handle, n_samples); + auto stream = raft::resource::get_cuda_stream(handle); + + raft::random::make_blobs(X.data_handle(), + labels.data_handle(), + n_samples, + n_features, + params.n_clusters, + stream, + true, + nullptr, + nullptr, + testparams.cluster_std, + false, + -10.0f, + 10.0f, + testparams.seed); + + d_labels.resize(n_samples, stream); + d_labels_ref.resize(n_samples, stream); + + raft::copy(d_labels_ref.data(), labels.data_handle(), n_samples, stream); + + auto connectivity_graph_float = + raft::make_device_coo_matrix(handle, n_samples, n_samples); + + cuvs::preprocessing::spectral_embedding::params embed_params; + embed_params.n_neighbors = params.n_neighbors; + embed_params.seed = params.rng_state.seed; + + cuvs::preprocessing::spectral_embedding::helpers::create_connectivity_graph( + handle, embed_params, X.view(), connectivity_graph_float); + + // For double precision test, convert the graph from float to double + if constexpr (std::is_same_v) { + auto nnz = connectivity_graph_float.structure_view().get_nnz(); + auto connectivity_graph_double = + raft::make_device_coo_matrix(handle, n_samples, n_samples, nnz); + + raft::copy(connectivity_graph_double.structure_view().get_rows().data(), + connectivity_graph_float.structure_view().get_rows().data(), + nnz, + stream); + raft::copy(connectivity_graph_double.structure_view().get_cols().data(), + connectivity_graph_float.structure_view().get_cols().data(), + nnz, + stream); + + auto float_elements_view = raft::make_device_vector_view( + connectivity_graph_float.view().get_elements().data(), nnz); + auto double_elements_view = raft::make_device_vector_view( + connectivity_graph_double.view().get_elements().data(), nnz); + + raft::linalg::unary_op( + handle, float_elements_view, double_elements_view, [] __device__(float x) { + return static_cast(x); + }); + + cluster::spectral::fit_predict( + handle, + params, + connectivity_graph_double.view(), + raft::make_device_vector_view(d_labels.data(), n_samples)); + } else { + cluster::spectral::fit_predict( + handle, + params, + connectivity_graph_float.view(), + raft::make_device_vector_view(d_labels.data(), n_samples)); + } + + raft::resource::sync_stream(handle, stream); + + score = + raft::stats::adjusted_rand_index(d_labels_ref.data(), d_labels.data(), n_samples, stream); + + if (score < 0.8) { + std::stringstream ss; + ss << "Expected: " << raft::arr2Str(d_labels_ref.data(), 25, "d_labels_ref", stream); + std::cout << (ss.str().c_str()) << '\n'; + ss.str(std::string()); + ss << "Actual: " << raft::arr2Str(d_labels.data(), 25, "d_labels", stream); + std::cout << (ss.str().c_str()) << '\n'; + std::cout << "Score = " << score << '\n'; + } + } + + void SetUp() override { basicTest(); } + + protected: + raft::resources handle; + SpectralClusteringInputs testparams; + rmm::device_uvector d_labels; + rmm::device_uvector d_labels_ref; + double score; + cluster::spectral::params params; +}; + +const std::vector inputs = { + // Small datasets with well-separated clusters + {100, 10, 2, 2, 10, 3, 0.3f, 42ULL}, // Tighter clusters for better separation + {200, 20, 3, 3, 15, 3, 0.3f, 123ULL}, + {500, 15, 4, 4, 20, 3, 0.3f, 456ULL}, + + // Medium datasets + {1000, 32, 5, 5, 25, 5, 0.3f, 789ULL}, + {2000, 50, 6, 6, 30, 5, 0.3f, 111ULL}, + + // Larger datasets with more clusters + {5000, 100, 8, 8, 40, 5, 0.3f, 222ULL}, + {10000, 50, 10, 10, 50, 5, 0.3f, 333ULL}, + + {1000, 30, 5, 5, 20, 3, 0.3f, 444ULL}, + {1000, 30, 3, 3, 20, 3, 0.3f, 555ULL}, + + // Varying cluster separation + {500, 20, 3, 3, 15, 3, 0.2f, 666ULL}, // Very tight clusters + {500, 20, 3, 3, 15, 3, 0.5f, 777ULL}, // More spread but still reasonable +}; + +typedef SpectralClusteringTest SpectralClusteringTestF; +typedef SpectralClusteringTest SpectralClusteringTestD; + +TEST_P(SpectralClusteringTestF, Result) +{ + ASSERT_GT(score, 0.7) << "Adjusted Rand Index is too low: " << score; +} + +TEST_P(SpectralClusteringTestD, Result) +{ + ASSERT_GT(score, 0.7) << "Adjusted Rand Index (double) is too low: " << score; +} + +INSTANTIATE_TEST_CASE_P(SpectralClusteringTests, + SpectralClusteringTestF, + ::testing::ValuesIn(inputs)); + +INSTANTIATE_TEST_CASE_P(SpectralClusteringTests, + SpectralClusteringTestD, + ::testing::ValuesIn(inputs)); + +} // namespace cuvs