diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 05388c3682..bdab48e630 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -217,12 +217,21 @@ if(NOT BUILD_CPU_ONLY) src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim128_t8.cu src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim256_t16.cu src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim512_t32.cu src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_float_uint32_dim128_t8.cu src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_float_uint32_dim256_t16.cu src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_float_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_half_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_half_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_half_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_int8_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_int8_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_int8_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_uint8_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_uint8_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_uint8_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim512_t32.cu src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim128_t8.cu src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim256_t16.cu src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim512_t32.cu @@ -238,45 +247,108 @@ if(NOT BUILD_CPU_ONLY) src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim128_t8.cu src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim256_t16.cu src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_half_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_half_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_half_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_int8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_int8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_int8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_uint8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_uint8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_CosineExpanded_uint8_uint32_dim512_t32.cu src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim128_t8.cu src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim256_t16.cu src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim512_t32.cu src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim128_t8.cu src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim256_t16.cu src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t4_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t4_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t8_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t8_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t16_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t16_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t4_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t4_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t8_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t8_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t16_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t16_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t4_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t4_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t8_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t8_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t16_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t16_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t4_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t4_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t8_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t8_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t16_8pq_8subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t16_8pq_8subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_2subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_2subd_half_fp8true.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_4subd_half_fp8false.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_4subd_half_fp8true.cu src/neighbors/detail/cagra/search_multi_cta_float_uint32.cu src/neighbors/detail/cagra/search_multi_cta_half_uint32.cu src/neighbors/detail/cagra/search_multi_cta_int8_uint32.cu diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index fb1b1549af..c1ca7a35de 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -204,6 +204,8 @@ enum class search_algo { enum class hash_mode { HASH = 0, SMALL = 1, AUTO = 100 }; +enum class internal_dtype { F16 = 0, E5M2 = 1, AUTO = 100 }; + struct search_params : cuvs::neighbors::search_params { /** Maximum number of queries to search at the same time (batch size). Auto select when 0.*/ size_t max_queries = 0; @@ -277,6 +279,10 @@ struct search_params : cuvs::neighbors::search_params { * negative, in which case the filtering rate is automatically calculated. */ float filtering_rate = -1.0; + + /** Data type of the query vector and codebook table on shared memory. Currently, only VPQ + * supports FP8. **/ + internal_dtype smem_dtype = internal_dtype::AUTO; }; /** diff --git a/cpp/src/neighbors/detail/cagra/cagra_search.cuh b/cpp/src/neighbors/detail/cagra/cagra_search.cuh index 45328377be..a6bd624ba2 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_search.cuh @@ -153,6 +153,10 @@ void search_main(raft::resources const& res, // Dispatch search parameters based on the dataset kind. if (auto* strided_dset = dynamic_cast*>(&index.data()); strided_dset != nullptr) { + if (params.smem_dtype != cuvs::neighbors::cagra::internal_dtype::AUTO || + params.smem_dtype != cuvs::neighbors::cagra::internal_dtype::F16) { + RAFT_LOG_WARN("In this search mode, only AUTO or F16 are supported as the smem_dtype."); + } // Search using a plain (strided) row-major dataset RAFT_EXPECTS(index.metric() != cuvs::distance::DistanceType::CosineExpanded || index.dataset_norms().has_value(), diff --git a/cpp/src/neighbors/detail/cagra/compute_distance-ext.cuh b/cpp/src/neighbors/detail/cagra/compute_distance-ext.cuh index f0eababe36..c0fef4f0cf 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance-ext.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance-ext.cuh @@ -39,24 +39,6 @@ extern template struct standard_descriptor_spec; -extern template struct vpq_descriptor_spec; -extern template struct vpq_descriptor_spec; extern template struct standard_descriptor_spec; -extern template struct vpq_descriptor_spec; -extern template struct vpq_descriptor_spec; extern template struct standard_descriptor_spec; extern template struct vpq_descriptor_spec; + float, + true>; extern template struct vpq_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; + float, + false>; extern template struct vpq_descriptor_spec; + float, + true>; extern template struct vpq_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; + float, + false>; extern template struct vpq_descriptor_spec; + float, + true>; extern template struct vpq_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; + float, + false>; extern template struct vpq_descriptor_spec; + float, + true>; extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; + float, + false>; extern template struct vpq_descriptor_spec; + float, + true>; extern template struct vpq_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; + float, + false>; extern template struct vpq_descriptor_spec; + float, + true>; extern template struct vpq_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; + float, + false>; extern template struct vpq_descriptor_spec; + float, + true>; extern template struct vpq_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; + float, + false>; extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; + float, + false>; extern template struct vpq_descriptor_spec; -extern template struct standard_descriptor_spec; -extern template struct standard_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; +extern template struct standard_descriptor_spec; -extern template struct vpq_descriptor_spec; -extern template struct vpq_descriptor_spec; extern template struct standard_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; extern template struct vpq_descriptor_spec; + float, + false>; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; extern template struct vpq_descriptor_spec; + float, + false>; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; +extern template struct vpq_descriptor_spec; extern template struct standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec>; @@ -541,63 +1573,351 @@ using descriptor_instances = instance_selector< standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec>; diff --git a/cpp/src/neighbors/detail/cagra/compute_distance.cu b/cpp/src/neighbors/detail/cagra/compute_distance.cu index a0b7209814..ec4eec28ac 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance.cu @@ -22,63 +22,351 @@ template struct instance_selector< standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, - vpq_descriptor_spec, - vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, + vpq_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec, standard_descriptor_spec>; diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py b/cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py index fde2081c12..9a3261f77e 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py @@ -30,11 +30,14 @@ """ mxdim_team = [(128, 8), (256, 16), (512, 32)] +vpq_2_4_mxdim_team = [(64, 4), (128, 8), (256, 16), (512, 32)] +vpq_8_mxdim_team = [(128, 4), (256, 8), (512, 16), (1024, 32)] +vrq_mxdim_team = [(64, 4), (128, 8), (256, 16), (512, 32)] # mxdim_team = [(64, 8), (128, 16), (256, 32)] # mxdim_team = [(32, 8), (64, 16), (128, 32)] pq_bits = [8] -pq_lens = [2, 4] +pq_lens = [2, 4, 8] # rblock = [(256, 4), (512, 2), (1024, 1)] # rcandidates = [32] @@ -76,26 +79,33 @@ f.write(template.format(includes=includes, content=content)) cmake_list.append(f" src/neighbors/detail/cagra/{path}") - # CAGRA-Q - for code_book_t in code_book_types: - for pq_len in pq_lens: + for pq_len in pq_lens: + vpq_mxdim_team = ( + vpq_8_mxdim_team if pq_len == 8 else vpq_2_4_mxdim_team + ) + for mxdim, team in vpq_mxdim_team: + # CAGRA-Q + for code_book_t in code_book_types: for pq_bit in pq_bits: for metric in ["L2Expanded"]: - path = f"compute_distance_vpq_{metric}_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{pq_len}subd_{code_book_t}.cu" - includes = '#include "compute_distance_vpq-impl.cuh"' - params = f"{metric_prefix}{metric}, {team}, {mxdim}, {pq_bit}, {pq_len}, {code_book_t}, {data_t}, {idx_t}, {distance_t}" - spec = f"vpq_descriptor_spec<{params}>" - content = f"""template struct {spec};""" - specs.append(spec) - with open(path, "w") as f: - f.write( - template.format( - includes=includes, content=content - ) - ) - cmake_list.append( - f" src/neighbors/detail/cagra/{path}" + for enable_fp8 in ["true", "false"]: + path = f"compute_distance_vpq_{metric}_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{pq_len}subd_{code_book_t}_fp8{enable_fp8}.cu" + includes = ( + '#include "compute_distance_vpq-impl.cuh"' ) + params = f"{metric_prefix}{metric}, {team}, {mxdim}, {pq_bit}, {pq_len}, {code_book_t}, {data_t}, {idx_t}, {distance_t}, {enable_fp8}" + spec = f"vpq_descriptor_spec<{params}>" + content = f"""template struct {spec};""" + specs.append(spec) + with open(path, "w") as f: + f.write( + template.format( + includes=includes, content=content + ) + ) + cmake_list.append( + f" src/neighbors/detail/cagra/{path}" + ) # CAGRA (Binary Hamming distance) for mxdim, team in mxdim_team: diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh index cdafb173ed..5de2478702 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq-impl.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -15,6 +15,27 @@ namespace cuvs::neighbors::cagra::detail { +template +struct smem_val_type_t; + +template +struct smem_val_type_t> { + using smem_val_pack_t = half2; + using smem_val_t = half; + using smem_val_pack_uint_t = uint32_t; + static constexpr uint32_t num_packed_elements = 2; +}; + +template +struct smem_val_type_t> { + using smem_val_pack_t = device::fp8xN; + using smem_val_t = typename smem_val_pack_t::unit_t; + using smem_val_pack_uint_t = typename smem_val_pack_t::uint_t; + static constexpr uint32_t num_packed_elements = smem_val_pack_t::num_elements; +}; + template + typename DistanceT, + bool EnableFP8> struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t { using base_type = dataset_descriptor_base_t; using CODE_BOOK_T = CodebookT; @@ -42,6 +64,7 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t, "Only CODE_BOOK_T = `half` is supported now"); @@ -85,7 +108,9 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t(); + (1 << PQ_BITS) * PQ_LEN * + utils::size_of::smem_val_pack_uint_t>() / + smem_val_type_t::num_packed_elements; _RAFT_HOST_DEVICE cagra_q_dataset_descriptor_t(setup_workspace_type* setup_workspace_impl, compute_distance_type* compute_distance_impl, @@ -119,7 +144,9 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t(dim, DatasetBlockDim) * sizeof(QUERY_T); + raft::round_up_safe(dim, DatasetBlockDim) * + utils::size_of::smem_val_pack_uint_t>() / + smem_val_type_t::num_packed_elements; } }; @@ -139,12 +166,18 @@ _RAFT_DEVICE __noinline__ auto setup_workspace_vpq(const DescriptorT* that, const typename DescriptorT::DATA_T* queries_ptr, uint32_t query_id) -> const DescriptorT* { - using QUERY_T = typename DescriptorT::QUERY_T; - using CODE_BOOK_T = typename DescriptorT::CODE_BOOK_T; - using word_type = uint32_t; - constexpr auto kDatasetBlockDim = DescriptorT::kDatasetBlockDim; - constexpr auto PQ_BITS = DescriptorT::kPqBits; - constexpr auto PQ_LEN = DescriptorT::kPqLen; + using QUERY_T = typename DescriptorT::QUERY_T; + using CODE_BOOK_T = typename DescriptorT::CODE_BOOK_T; + using word_type = uint32_t; + constexpr auto kDatasetBlockDim = DescriptorT::kDatasetBlockDim; + constexpr auto PQ_BITS = DescriptorT::kPqBits; + constexpr auto PQ_LEN = DescriptorT::kPqLen; + constexpr auto EnableFP8 = DescriptorT::kEnableFP8; + using smem_val_config = smem_val_type_t; + using smem_val_t = typename smem_val_config::smem_val_t; + using smem_val_pack_uint_t = typename smem_val_config::smem_val_pack_uint_t; + using smem_val_pack_t = typename smem_val_config::smem_val_pack_t; + constexpr auto num_packed_elements = smem_val_config::num_packed_elements; auto* r = reinterpret_cast(smem_ptr); @@ -166,20 +199,36 @@ _RAFT_DEVICE __noinline__ auto setup_workspace_vpq(const DescriptorT* that, __syncthreads(); // Copy PQ table - for (unsigned i = threadIdx.x * 2; i < (1 << PQ_BITS) * PQ_LEN; i += blockDim.x * 2) { - half2 buf2; - buf2.x = r->pq_code_book_ptr()[i]; - buf2.y = r->pq_code_book_ptr()[i + 1]; - + for (unsigned i = threadIdx.x * smem_val_config::num_packed_elements; + i < (1 << PQ_BITS) * PQ_LEN; + i += blockDim.x * smem_val_config::num_packed_elements) { // Change the order of PQ code book array to reduce the // frequency of bank conflicts. - constexpr auto num_elements_per_bank = 4 / utils::size_of(); - constexpr auto num_banks_per_subspace = PQ_LEN / num_elements_per_bank; - const auto j = i / num_elements_per_bank; - const auto smem_index = - (j / num_banks_per_subspace) + (j % num_banks_per_subspace) * (1 << PQ_BITS); - - device::sts(codebook_buf + smem_index * sizeof(half2), buf2); + constexpr auto num_elements_per_bank = + smem_val_config::num_packed_elements / + (utils::size_of() / utils::size_of()); + + if constexpr (PQ_LEN >= num_elements_per_bank) { // safety + constexpr auto num_banks_per_subspace = PQ_LEN / num_elements_per_bank; + const auto j = i / num_elements_per_bank; + const auto smem_index = + (j / num_banks_per_subspace) + (j % num_banks_per_subspace) * (1 << PQ_BITS); + + if constexpr (num_packed_elements == 2) { + half2 buf2; + buf2.x = r->pq_code_book_ptr()[i]; + buf2.y = r->pq_code_book_ptr()[i + 1]; + device::sts(codebook_buf + smem_index * sizeof(smem_val_pack_t), buf2); + } else if constexpr (num_packed_elements == 4 || num_packed_elements == 8) { + smem_val_pack_t buf; +#pragma unroll + for (uint32_t k = 0; k < num_packed_elements; k++) { + buf.data.x1[k] = + static_cast(static_cast(r->pq_code_book_ptr()[i + k])); + } + device::sts(codebook_buf + smem_index * sizeof(smem_val_pack_uint_t), buf.as_uint()); + } + } } } @@ -188,20 +237,34 @@ _RAFT_DEVICE __noinline__ auto setup_workspace_vpq(const DescriptorT* that, constexpr cuvs::spatial::knn::detail::utils::mapping mapping{}; auto smem_query_ptr = - reinterpret_cast(reinterpret_cast(smem_ptr) + sizeof(DescriptorT) + - DescriptorT::kSMemCodeBookSizeInBytes); - for (unsigned i = threadIdx.x * 2; i < dim; i += blockDim.x * 2) { - half2 buf2{0, 0}; - if (i < dim) { buf2.x = mapping(queries_ptr[i]); } - if (i + 1 < dim) { buf2.y = mapping(queries_ptr[i + 1]); } - if constexpr ((PQ_BITS == 8) && (PQ_LEN % 2 == 0)) { + reinterpret_cast(reinterpret_cast(smem_ptr) + sizeof(DescriptorT) + + DescriptorT::kSMemCodeBookSizeInBytes); + for (unsigned i = threadIdx.x * num_packed_elements; i < dim; + i += blockDim.x * num_packed_elements) { + smem_val_pack_t buf; + if constexpr (num_packed_elements == 2) { + if (i < dim) { static_cast(static_cast(buf.x = mapping(queries_ptr[i]))); } + if (i + 1 < dim) { + static_cast(static_cast(buf.y = mapping(queries_ptr[i + 1]))); + } + } else if constexpr (num_packed_elements == 4 || num_packed_elements == 8) { +#pragma unroll + for (uint32_t k = 0; k < num_packed_elements; k++) { + if (i + k < dim) { + buf.data.x1[k] = static_cast(static_cast(mapping(queries_ptr[i + k]))); + } + } + } + + if constexpr ((PQ_BITS == 8) && (PQ_LEN % num_packed_elements == 0)) { // Transpose the queries buffer to avoid bank conflicts in compute_distance. constexpr uint32_t vlen = 4; // **** DO NOT CHANGE **** - constexpr auto kStride = vlen * PQ_LEN / 2; - reinterpret_cast(smem_query_ptr)[transpose(i / 2)] = - buf2; + constexpr auto kStride = vlen * PQ_LEN / num_packed_elements; + reinterpret_cast( + smem_query_ptr)[transpose( + i / num_packed_elements)] = buf; } else { - (reinterpret_cast(smem_query_ptr + i))[0] = buf2; + (reinterpret_cast(smem_query_ptr + i))[0] = buf; } } @@ -223,12 +286,21 @@ _RAFT_DEVICE RAFT_DEVICE_INLINE_FUNCTION auto compute_distance_vpq_worker( constexpr auto DatasetBlockDim = DescriptorT::kDatasetBlockDim; constexpr auto PQ_BITS = DescriptorT::kPqBits; constexpr auto PQ_LEN = DescriptorT::kPqLen; + constexpr auto EnableFP8 = DescriptorT::kEnableFP8; + using PQ_CODEBOOK_LOAD_T = uint32_t; + + using smem_val_config = smem_val_type_t; + using smem_val_t = typename smem_val_config::smem_val_t; + using smem_val_pack_uint_t = typename smem_val_config::smem_val_pack_uint_t; + using smem_val_pack_t = typename smem_val_config::smem_val_pack_t; + constexpr uint32_t num_packed_elements = smem_val_config::num_packed_elements; const uint32_t query_ptr = pq_codebook_ptr + DescriptorT::kSMemCodeBookSizeInBytes; static_assert(PQ_BITS == 8, "Only pq_bits == 8 is supported at the moment."); - constexpr uint32_t vlen = 4; // **** DO NOT CHANGE **** + constexpr uint32_t vlen = utils::size_of() / utils::size_of(); constexpr uint32_t nelem = raft::div_rounding_up_unsafe(DatasetBlockDim / PQ_LEN, TeamSize * vlen); + static_assert(DatasetBlockDim / PQ_LEN >= TeamSize * vlen, "DatasetBlockDim is too small"); constexpr auto kTeamMask = DescriptorT::kTeamSize - 1; constexpr auto kTeamVLen = TeamSize * vlen; @@ -239,53 +311,107 @@ _RAFT_DEVICE RAFT_DEVICE_INLINE_FUNCTION auto compute_distance_vpq_worker( for (uint32_t elem_offset = 0; elem_offset * PQ_LEN < dim; elem_offset += DatasetBlockDim / PQ_LEN) { // Loading PQ codes - uint32_t pq_codes[nelem]; + PQ_CODEBOOK_LOAD_T pq_codes[nelem]; #pragma unroll for (std::uint32_t e = 0; e < nelem; e++) { const std::uint32_t k = e * kTeamVLen + elem_offset + laneId * vlen; if (k >= n_subspace) break; - // Loading 4 x 8-bit PQ-codes using 32-bit load ops (from device memory) - device::ldg_cg(pq_codes[e], reinterpret_cast(dataset_ptr + 4 + k)); + + if constexpr (std::is_same_v) { + device::ldg_cg(pq_codes[e], + reinterpret_cast(dataset_ptr + 4 + k)); + } else { + pq_codes[e] = *reinterpret_cast(dataset_ptr + 4 + k); + } } // if constexpr (PQ_LEN % 2 == 0) { - // **** Use half2 for distance computation **** + if constexpr (PQ_LEN >= num_packed_elements) { // safety + // **** Use half2 for distance computation **** #pragma unroll - for (std::uint32_t e = 0; e < nelem; e++) { - const std::uint32_t k = e * kTeamVLen + elem_offset + laneId * vlen; - if (k >= n_subspace) break; - // Loading VQ code-book - half2 vq_vals[PQ_LEN][vlen / 2]; + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = e * kTeamVLen + elem_offset + laneId * vlen; + if (k >= n_subspace) break; + // Loading VQ code-book + half2 vq_vals[PQ_LEN][vlen / 2]; #pragma unroll - for (std::uint32_t m = 0; m < PQ_LEN; m++) { - const uint32_t d = (vlen * m) + (PQ_LEN * k); - if (d >= dim) break; - device::ldg_ca(vq_vals[m], vq_code_book_ptr + d); - } - // Compute distance - std::uint32_t pq_code = pq_codes[e]; + for (std::uint32_t m = 0; m < PQ_LEN; m++) { + const uint32_t d = (vlen * m) + (PQ_LEN * k); + if (d >= dim) break; + device::ldg_ca(vq_vals[m], vq_code_book_ptr + d); + } + // Compute distance + PQ_CODEBOOK_LOAD_T pq_code = pq_codes[e]; #pragma unroll - for (std::uint32_t v = 0; v < vlen; v++) { - if (PQ_LEN * (v + k) >= dim) break; + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_LEN * (v + k) >= dim) break; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN / num_packed_elements; m++) { + constexpr uint32_t vq_val_pack_num_elements = 2; + constexpr auto kQueryBlock = DatasetBlockDim / (vlen * PQ_LEN); + std::uint32_t vq_half2_index = m * (num_packed_elements / vq_val_pack_num_elements) + + (PQ_LEN / vq_val_pack_num_elements) * v; + + uint32_t query_val_index; + if constexpr (num_packed_elements == 2) { + query_val_index = + vq_half2_index * kQueryBlock + elem_offset * (PQ_LEN / 2) + e * TeamSize + laneId; + } else if constexpr (PQ_LEN == num_packed_elements) { + query_val_index = elem_offset + + v * (DatasetBlockDim / (num_packed_elements * vlen)) + + e * TeamSize + laneId; + } else { + const uint32_t query_vec_element_id = + (elem_offset + e * vlen * TeamSize + v + laneId * vlen) * PQ_LEN / + num_packed_elements; + constexpr auto kStride = vlen * PQ_LEN / num_packed_elements; + query_val_index = + transpose(query_vec_element_id); + } + + if constexpr (num_packed_elements == 2) { + smem_val_pack_t c2, q2; + // Loading PQ code book from smem + device::lds(c2, + pq_codebook_ptr + sizeof(smem_val_pack_uint_t) * + ((1 << PQ_BITS) * m + ((pq_code & 0xff)))); + + // Loading query vector from smem + device::lds(q2, query_ptr + sizeof(smem_val_pack_t) * query_val_index); + // L2 distance + auto dist = + q2 - c2 - reinterpret_cast(vq_vals)[vq_half2_index]; + dist = dist * dist; + norm += static_cast(dist.x + dist.y); + } else if constexpr (num_packed_elements == 4 || num_packed_elements == 8) { + smem_val_pack_t c_vec, q_vec; + // Loading PQ code book from smem + device::lds(c_vec.as_uint(), + pq_codebook_ptr + sizeof(smem_val_pack_uint_t) * + ((1 << PQ_BITS) * m + ((pq_code & 0xff)))); + device::lds(q_vec.as_uint(), + query_ptr + sizeof(smem_val_pack_uint_t) * query_val_index); + + half2 c2_, q2_; + #pragma unroll - for (std::uint32_t m = 0; m < PQ_LEN / 2; m++) { - constexpr auto kQueryBlock = DatasetBlockDim / (vlen * PQ_LEN); - const std::uint32_t d1 = m + (PQ_LEN / 2) * v; - const std::uint32_t d = - d1 * kQueryBlock + elem_offset * (PQ_LEN / 2) + e * TeamSize + laneId; - half2 q2, c2; - // Loading query vector from smem - device::lds(q2, query_ptr + sizeof(half2) * d); - // Loading PQ code book from smem - device::lds(c2, - pq_codebook_ptr + - sizeof(CODE_BOOK_T) * ((1 << PQ_BITS) * 2 * m + (2 * (pq_code & 0xff)))); - // L2 distance - auto dist = q2 - c2 - reinterpret_cast(vq_vals)[d1]; - dist = dist * dist; - norm += static_cast(dist.x + dist.y); + for (uint32_t bi = 0; bi < num_packed_elements / 2; bi++) { + // Loading query vector from smem + c2_ = c_vec.as_half2(bi); + q2_ = q_vec.as_half2(bi); + // L2 distance + auto dist = + q2_ - c2_ - + reinterpret_cast(vq_vals)[vq_half2_index]; + dist = dist * dist; + norm += static_cast(dist.x + dist.y); + + vq_half2_index += 1; + } + } + } + pq_code >>= 8; } - pq_code >>= 8; } } } else { @@ -308,8 +434,8 @@ _RAFT_DEVICE RAFT_DEVICE_INLINE_FUNCTION auto compute_distance_vpq_worker( #pragma unroll for (std::uint32_t v = 0; v < vlen; v++) { if (PQ_LEN * (v + k) >= dim) break; - CODE_BOOK_T pq_vals[PQ_LEN]; - device::lds(pq_vals, pq_codebook_ptr + sizeof(CODE_BOOK_T) * PQ_LEN * (pq_code & 0xff)); + CODE_BOOK_T smem_vals[PQ_LEN]; + device::lds(smem_vals, pq_codebook_ptr + sizeof(CODE_BOOK_T) * PQ_LEN * (pq_code & 0xff)); #pragma unroll for (std::uint32_t m = 0; m < PQ_LEN; m++) { const std::uint32_t d1 = m + (PQ_LEN * v); @@ -317,7 +443,7 @@ _RAFT_DEVICE RAFT_DEVICE_INLINE_FUNCTION auto compute_distance_vpq_worker( // if (d >= dataset_dim) break; DISTANCE_T diff; device::lds(diff, query_ptr + sizeof(QUERY_T) * d); - diff -= static_cast(pq_vals[m]); + diff -= static_cast(smem_vals[m]); diff -= static_cast(reinterpret_cast(vq_vals)[d1]); norm += diff * diff; @@ -355,7 +481,8 @@ template + typename DistanceT, + bool EnableFP8> RAFT_KERNEL __launch_bounds__(1, 1) vpq_dataset_descriptor_init_kernel(dataset_descriptor_base_t* out, const std::uint8_t* encoded_dataset_ptr, @@ -373,7 +500,8 @@ RAFT_KERNEL __launch_bounds__(1, 1) CodebookT, DataT, IndexT, - DistanceT>; + DistanceT, + EnableFP8>; using base_type = typename desc_type::base_type; new (out) desc_type( reinterpret_cast(&setup_workspace_vpq), @@ -394,7 +522,8 @@ template + typename DistanceT, + bool EnableFP8> dataset_descriptor_host vpq_descriptor_spec::init_(const cagra::search_params& params, + DistanceT, + EnableFP8>::init_(const cagra::search_params& params, const std::uint8_t* encoded_dataset_ptr, uint32_t encoded_dataset_dim, const CodebookT* vq_code_book_ptr, @@ -420,7 +550,8 @@ vpq_descriptor_spec; + DistanceT, + EnableFP8>; using base_type = typename desc_type::base_type; desc_type dd_host{nullptr, @@ -442,7 +573,8 @@ vpq_descriptor_spec + DistanceT, + EnableFP8> <<<1, 1, 0, stream>>>(dev_ptr, encoded_dataset_ptr, encoded_dataset_dim, diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq.hpp b/cpp/src/neighbors/detail/cagra/compute_distance_vpq.hpp index 2b69a1cef4..0f55b3efb2 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq.hpp @@ -21,7 +21,8 @@ template + typename DistanceT, + bool EnableFP8> struct vpq_descriptor_spec : public instance_spec { using base_type = instance_spec; using typename base_type::data_type; @@ -63,12 +64,18 @@ struct vpq_descriptor_spec : public instance_spec { const DatasetT& dataset, cuvs::distance::DistanceType metric) -> double { + const auto fp8_natively_supported = raft::getComputeCapability().first >= 9; + const auto use_fp8 = + params.smem_dtype == cuvs::neighbors::cagra::internal_dtype::E5M2 || + (params.smem_dtype == cuvs::neighbors::cagra::internal_dtype::AUTO && fp8_natively_supported); + // If explicit team_size is specified and doesn't match the instance, discard it if (params.team_size != 0 && TeamSize != params.team_size) { return -1.0; } if (cuvs::distance::DistanceType::L2Expanded != metric) { return -1.0; } // Match codebook params if (dataset.pq_bits() != PqBits) { return -1.0; } if (dataset.pq_len() != PqLen) { return -1.0; } + if (use_fp8 != EnableFP8) { return -1.0; } // Otherwise, favor the closest dataset dimensionality. constexpr std::uint32_t preferred_load_elmes_per_thread = 16; /*magic number that is good based on experiments.*/ diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..0eeba4602c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..4a059f133e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t4_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t4_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..314f233573 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t4_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t4_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t4_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..369b44f743 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t4_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..87927cd478 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half_fp8true.cu new file mode 100644 index 0000000000..33232e7a64 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..f7290b2b5e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..d4b0360c01 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..07b9021ad7 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half_fp8true.cu new file mode 100644 index 0000000000..92aafecd4f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..75f433ed4a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..25cdfcf44b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t8_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t8_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..12c1166902 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t8_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t8_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t8_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..4fd44ce5a8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t8_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t16_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t16_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..c2a3b9f565 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t16_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t16_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t16_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..29a694b72d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t16_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..48782764f2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half_fp8true.cu new file mode 100644 index 0000000000..da99ab9173 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..0164636430 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..d6918aab34 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..9ba5ae5005 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_2subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_2subd_half_fp8true.cu new file mode 100644 index 0000000000..b9a4f4ebdf --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_2subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..65a1455dca --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..fc41ff9109 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim64_t4_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..ed1f9afc26 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..37fd1ad8c5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t4_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t4_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..0a50234576 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t4_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t4_8pq_8subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t4_8pq_8subd_half_fp8true.cu index 680f594261..56b4a2f6fd 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t4_8pq_8subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -18,13 +18,14 @@ namespace cuvs::neighbors::cagra::detail { using namespace cuvs::distance; template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half_fp8false.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half_fp8false.cu index bbbc147de7..02c0559dd9 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half_fp8false.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + false>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half_fp8true.cu index 9b29bb8ffc..fa6c5305d2 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -23,8 +23,9 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..4680d19ab9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half_fp8true.cu index b0883184c1..11e75f61b6 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half_fp8false.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half_fp8false.cu index 6d7850cf8c..42bb7660d9 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half_fp8false.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + false>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half_fp8true.cu index 08cd7590bc..520e36c602 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -23,8 +23,9 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half_fp8false.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half_fp8false.cu index 040fa4456d..023bed430d 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half_fp8false.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + false>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half_fp8true.cu index 6610d1d87b..c40e843fa5 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -23,8 +23,9 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t8_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t8_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..f2e07f0c5e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t8_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t8_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t8_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..123c117c32 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t8_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t16_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t16_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..4d94ea3c71 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t16_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t16_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t16_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..9a55456931 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t16_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half_fp8false.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half_fp8false.cu index 70ae484456..8fac7c2659 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half_fp8false.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + false>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half_fp8true.cu index e251d13331..c83b911d36 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -23,8 +23,9 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half_fp8false.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half_fp8false.cu index c0f889af28..2b801907b9 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half_fp8false.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + false>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half_fp8true.cu index 0b37928ab7..c07ede51e9 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -23,8 +23,9 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..2652edfc8c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_2subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_2subd_half_fp8true.cu new file mode 100644 index 0000000000..95aadfc5d9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_2subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..85f46ec0f5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..db6c599e14 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim64_t4_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..b9b38960af --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..1bc6a46138 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t4_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t4_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..4b856ff203 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t4_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t4_8pq_8subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t4_8pq_8subd_half_fp8true.cu index d59e7c9078..2e84b879e8 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t4_8pq_8subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -18,13 +18,14 @@ namespace cuvs::neighbors::cagra::detail { using namespace cuvs::distance; template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..1a03321b2f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half_fp8true.cu index f4bb7d1e31..b46995999d 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..05d9febaeb --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..4e3a5322d3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..99a955fcba --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half_fp8true.cu index ff0672de06..b0eb39d62b 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..e8fe498589 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half_fp8true.cu index bc160382be..e24dc1ef20 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t8_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t8_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..8c40f8482e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t8_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t8_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t8_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..b857508f52 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t8_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t16_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t16_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..cc3e33adc1 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t16_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t16_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t16_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..de0860b278 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t16_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..c07ce1ff7a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half_fp8true.cu index 894a1eae7b..ff7158f5d2 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..ea1a6e975b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half_fp8true.cu index 4aa48daee0..c21c9c5c10 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..b707ad056b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_2subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_2subd_half_fp8true.cu new file mode 100644 index 0000000000..9c273805d9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_2subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..3fa2ef8170 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..c1d4456e2d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim64_t4_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..1e09109eb8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim1024_t32_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..9ea862c9bc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim1024_t32_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t4_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t4_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..bfcc48f462 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t4_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t4_8pq_8subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t4_8pq_8subd_half_fp8true.cu index b0e824d788..238572cf5f 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t4_8pq_8subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -18,13 +18,14 @@ namespace cuvs::neighbors::cagra::detail { using namespace cuvs::distance; template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..58698a9760 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half_fp8true.cu index fc9f5043ac..8388bae580 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..584a58fcf1 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..2f8b58b9e1 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..b735134e70 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half_fp8true.cu index b7755c2d17..71d93ebe04 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..ba28f84414 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half_fp8true.cu index 5457ea76e7..70653e69e2 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t8_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t8_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..81a29015de --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t8_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t8_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t8_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..6254aae41a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t8_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t16_8pq_8subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t16_8pq_8subd_half_fp8false.cu new file mode 100644 index 0000000000..2223290eff --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t16_8pq_8subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t16_8pq_8subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t16_8pq_8subd_half_fp8true.cu new file mode 100644 index 0000000000..f3f7c0ae07 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t16_8pq_8subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..cc487728cd --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half_fp8true.cu index 4225ea81a3..0da175b065 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..690b8a90f7 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half_fp8true.cu similarity index 82% rename from cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half.cu rename to cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half_fp8true.cu index dfcecd31b3..d3c5e032f8 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half_fp8true.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,7 @@ template struct vpq_descriptor_spec; + float, + true>; } // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_2subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_2subd_half_fp8false.cu new file mode 100644 index 0000000000..b5ae8f18d8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_2subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_2subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_2subd_half_fp8true.cu new file mode 100644 index 0000000000..97f100c53f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_2subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_4subd_half_fp8false.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_4subd_half_fp8false.cu new file mode 100644 index 0000000000..f17eae07db --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_4subd_half_fp8false.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_4subd_half_fp8true.cu b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_4subd_half_fp8true.cu new file mode 100644 index 0000000000..b94a11d287 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim64_t4_8pq_4subd_half_fp8true.cu @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * NOTE: this file is generated by compute_distance_00_generate.py + * + * Make changes there and run in this directory: + * + * > python compute_distance_00_generate.py + * + */ + +#include "compute_distance_vpq-impl.cuh" + +namespace cuvs::neighbors::cagra::detail { + +using namespace cuvs::distance; +template struct vpq_descriptor_spec; + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/device_common.hpp b/cpp/src/neighbors/detail/cagra/device_common.hpp index 8a5bb6ba1f..aa0e3847b5 100644 --- a/cpp/src/neighbors/detail/cagra/device_common.hpp +++ b/cpp/src/neighbors/detail/cagra/device_common.hpp @@ -1,9 +1,11 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once +// #define _CLK_BREAKDOWN + #include "hashmap.hpp" #include "utils.hpp" @@ -175,6 +177,9 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes( const IndexT* __restrict__ parent_indices, const IndexT* __restrict__ internal_topk_list, const uint32_t search_width, +#ifdef _CLK_BREAKDOWN + std::uint64_t& clk_compute_actual_distance, +#endif int* __restrict__ result_position = nullptr, const int max_result_position = 0) { @@ -227,17 +232,60 @@ RAFT_DEVICE_INLINE_FUNCTION void compute_distance_to_child_nodes( // > const auto child_dist = dataset_desc.compute_distance(child_id, child_id != invalid_index); // Instead, we manually inline this function for performance reasons. // This allows us to move the fetching of the arguments from shared memory out of the loop. +#ifdef _CLK_BREAKDOWN + const auto start_clock = clock64(); +#endif const DistanceT child_dist = device::team_sum( (child_id != invalid_index) ? compute_distance(args, child_id) : (lead_lane ? raft::upper_bound() : 0), team_size_bits); __syncwarp(); +#ifdef _CLK_BREAKDOWN + clk_compute_actual_distance += clock64() - start_clock; +#endif // Store the distance if (valid_i && lead_lane) { result_child_distances_ptr[j] = child_dist; } } } +template +struct uintN_t {}; +template <> +struct uintN_t<32> { + using type = uint32_t; +}; +template <> +struct uintN_t<64> { + using type = uint64_t; +}; + +template +struct fp8xN {}; + +template +struct fp8xN { + using uint_t = typename uintN_t<8 * NumPacked>::type; + using unit_t = __nv_fp8_e5m2; + using x2_t = __nv_fp8x2_storage_t; + static constexpr uint32_t num_elements = NumPacked; + + union { + unit_t x1[num_elements]; + x2_t x2[num_elements / 2]; + uint_t u; + } data; + + HDI fp8xN() { data.u = 0; } + + HDI uint_t& as_uint() { return data.u; } + HDI uint_t as_uint() const { return data.u; } + HDI half2 as_half2(const uint32_t i) const + { + return __nv_cvt_fp8x2_to_halfraw2(data.x2[i], __NV_E5M2); + } +}; + RAFT_DEVICE_INLINE_FUNCTION void lds(float& x, uint32_t addr) { asm volatile("ld.shared.f32 {%0}, [%1];" : "=f"(x) : "r"(addr)); @@ -282,6 +330,11 @@ RAFT_DEVICE_INLINE_FUNCTION void lds(uint32_t& x, uint32_t addr) asm volatile("ld.shared.u32 {%0}, [%1];" : "=r"(x) : "r"(addr)); } +RAFT_DEVICE_INLINE_FUNCTION void lds(uint64_t& x, uint32_t addr) +{ + asm volatile("ld.shared.u64 {%0}, [%1];" : "=l"(x) : "r"(addr)); +} + RAFT_DEVICE_INLINE_FUNCTION void lds(uint32_t& x, const uint32_t* addr) { lds(x, uint32_t(__cvta_generic_to_shared(addr))); @@ -299,6 +352,16 @@ RAFT_DEVICE_INLINE_FUNCTION void lds(uint4& x, const uint4* addr) lds(x, uint32_t(__cvta_generic_to_shared(addr))); } +RAFT_DEVICE_INLINE_FUNCTION void sts(uint32_t addr, const uint32_t& x) +{ + asm volatile("st.shared.u32 [%0], %1;" : : "r"(addr), "r"(reinterpret_cast(x))); +} + +RAFT_DEVICE_INLINE_FUNCTION void sts(uint32_t addr, const uint64_t& x) +{ + asm volatile("st.shared.u64 [%0], %1;" : : "r"(addr), "l"(reinterpret_cast(x))); +} + RAFT_DEVICE_INLINE_FUNCTION void sts(uint32_t addr, const half2& x) { asm volatile("st.shared.v2.u16 [%0], {%1, %2};" diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 1720841c91..9374d4fd45 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -187,11 +187,12 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel( const auto cta_id = blockIdx.x; // local CTA ID #ifdef _CLK_BREAKDOWN - uint64_t clk_init = 0; - uint64_t clk_compute_1st_distance = 0; - uint64_t clk_topk = 0; - uint64_t clk_pickup_parents = 0; - uint64_t clk_compute_distance = 0; + uint64_t clk_init = 0; + uint64_t clk_compute_1st_distance = 0; + uint64_t clk_topk = 0; + uint64_t clk_pickup_parents = 0; + uint64_t clk_compute_distance = 0; + uint64_t clk_compute_actual_distance = 0; uint64_t clk_start; #define _CLK_START() clk_start = clock64() #define _CLK_REC(V) V += clock64() - clk_start; @@ -327,6 +328,9 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel( parent_indices_buffer, result_indices_buffer, 1, +#ifdef _CLK_BREAKDOWN + clk_compute_actual_distance, +#endif result_position, result_buffer_size_32); // __syncthreads(); @@ -440,6 +444,7 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel( ", topk, %lu" ", pickup_parents, %lu" ", distance, %lu" + ", hash, %lu" "\n", __FILE__, __LINE__, @@ -449,7 +454,7 @@ RAFT_KERNEL __launch_bounds__(1024, 1) search_kernel( clk_compute_1st_distance, clk_topk, clk_pickup_parents, - clk_compute_distance); + clk_compute_distance - clk_compute_actual_distance); } #endif } diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 96e0c419f2..116e68b268 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -55,8 +55,6 @@ namespace cuvs::neighbors::cagra::detail { namespace single_cta_search { -// #define _CLK_BREAKDOWN - template RAFT_DEVICE_INLINE_FUNCTION void pickup_next_parents(std::uint32_t* const terminate_flag, INDEX_T* const next_parent_indices, @@ -576,13 +574,14 @@ __device__ void search_core( }; #ifdef _CLK_BREAKDOWN - std::uint64_t clk_init = 0; - std::uint64_t clk_compute_1st_distance = 0; - std::uint64_t clk_topk = 0; - std::uint64_t clk_reset_hash = 0; - std::uint64_t clk_pickup_parents = 0; - std::uint64_t clk_restore_hash = 0; - std::uint64_t clk_compute_distance = 0; + std::uint64_t clk_init = 0; + std::uint64_t clk_compute_1st_distance = 0; + std::uint64_t clk_topk = 0; + std::uint64_t clk_reset_hash = 0; + std::uint64_t clk_pickup_parents = 0; + std::uint64_t clk_restore_hash = 0; + std::uint64_t clk_compute_distance = 0; + std::uint64_t clk_compute_actual_distance = 0; std::uint64_t clk_start; #define _CLK_START() clk_start = clock64() #define _CLK_REC(V) V += clock64() - clk_start; @@ -783,7 +782,12 @@ __device__ void search_core( 0, parent_list_buffer, result_indices_buffer, - search_width); + search_width +#ifdef _CLK_BREAKDOWN + , + clk_compute_actual_distance +#endif + ); __syncthreads(); _CLK_REC(clk_compute_distance); @@ -942,6 +946,7 @@ __device__ void search_core( ", pickup_parents, %lu" ", restore_hash, %lu" ", distance, %lu" + ", hash, %lu" "\n", __FILE__, __LINE__, @@ -953,7 +958,8 @@ __device__ void search_core( clk_reset_hash, clk_pickup_parents, clk_restore_hash, - clk_compute_distance); + clk_compute_actual_distance, + clk_compute_distance - clk_compute_actual_distance); } #endif } diff --git a/cpp/tests/neighbors/ann_cagra.cuh b/cpp/tests/neighbors/ann_cagra.cuh index ee16467e13..dfa365f240 100644 --- a/cpp/tests/neighbors/ann_cagra.cuh +++ b/cpp/tests/neighbors/ann_cagra.cuh @@ -6,12 +6,14 @@ #include "../test_utils.cuh" #include "ann_utils.cuh" +#include "vpq_utils.cuh" #include #include "naive_knn.cuh" #include #include +#include #include #include #include @@ -458,6 +460,46 @@ class AnnCagraTest : public ::testing::TestWithParam { raft::update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); raft::resource::sync_stream(handle_); + + reference_recall = 1; + if (ps.compression.has_value()) { + auto decoded_dataset = + raft::make_device_matrix(handle_, ps.n_rows, ps.dim); + cuvs::neighbors::decode_vpq_dataset( + decoded_dataset.view(), + dynamic_cast&>(index.data()), + raft::resource::get_cuda_stream(handle_)); + auto indices_out_view = raft::make_device_matrix_view( + indices_dev.data(), ps.n_queries, ps.k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, ps.k); + + cuvs::neighbors::naive_knn(handle_, + dists_out_view.data_handle(), + indices_out_view.data_handle(), + search_queries.data(), + decoded_dataset.data_handle(), + ps.n_queries, + ps.n_rows, + ps.dim, + ps.k, + ps.metric); + std::vector indices_vpq_dataset(queries_size); + std::vector distances_vpq_dataset(queries_size); + raft::update_host( + distances_vpq_dataset.data(), dists_out_view.data_handle(), queries_size, stream_); + raft::update_host( + indices_vpq_dataset.data(), indices_out_view.data_handle(), queries_size, stream_); + + reference_recall = std::get<1>(calc_recall(indices_naive, + indices_vpq_dataset, + distances_naive, + distances_vpq_dataset, + ps.n_queries, + ps.k, + 0)); + printf("reference_recall = %e\n", reference_recall); + } } // for (int i = 0; i < min(ps.n_queries, 10); i++) { @@ -467,7 +509,7 @@ class AnnCagraTest : public ::testing::TestWithParam { // print_vector("T", distances_naive.data() + i * ps.k, ps.k, std::cout); // print_vector("C", distances_Cagra.data() + i * ps.k, ps.k, std::cout); // } - double min_recall = ps.min_recall; + double min_recall = ps.min_recall * reference_recall; EXPECT_TRUE(eval_neighbours(indices_naive, indices_Cagra, distances_naive, @@ -516,6 +558,7 @@ class AnnCagraTest : public ::testing::TestWithParam { AnnCagraInputs ps; rmm::device_uvector database; rmm::device_uvector search_queries; + double reference_recall; }; template @@ -1393,7 +1436,8 @@ inline std::vector generate_inputs() {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL, cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); // don't demand high recall // without refinement - for (uint32_t pq_len : {2}) { // for now, only pq_len = 2 is supported, more options coming soon + for (uint32_t pq_len : + {2, 4, 8}) { // for now, only pq_len = 2 is supported, more options coming soon for (uint32_t vq_n_centers : {100, 1000}) { for (auto input : inputs2) { vpq_params ps{}; diff --git a/cpp/tests/neighbors/ann_utils.cuh b/cpp/tests/neighbors/ann_utils.cuh index 8a908c0187..a5bb7c5268 100644 --- a/cpp/tests/neighbors/ann_utils.cuh +++ b/cpp/tests/neighbors/ann_utils.cuh @@ -196,7 +196,7 @@ auto eval_recall(const std::vector& expected_idx, double min_recall, bool test_unique = true) -> testing::AssertionResult { - auto [actual_recall, match_count, total_count] = + auto [actual_recall, index_based_actual_recall, match_count, total_count] = calc_recall(expected_idx, actual_idx, rows, cols); double error_margin = (actual_recall - min_recall) / std::max(1.0 - min_recall, eps); RAFT_LOG_INFO("Recall = %f (%zu/%zu), the error is %2.1f%% %s the threshold (eps = %f).", @@ -228,8 +228,9 @@ auto calc_recall(const std::vector& expected_idx, size_t cols, double eps) { - size_t match_count = 0; - size_t total_count = static_cast(rows) * static_cast(cols); + size_t match_count = 0; + size_t index_match_count = 0; + size_t total_count = static_cast(rows) * static_cast(cols); for (size_t i = 0; i < rows; ++i) { for (size_t k = 0; k < cols; ++k) { size_t idx_k = i * cols + k; // row major assumption! @@ -248,8 +249,28 @@ auto calc_recall(const std::vector& expected_idx, } } } - return std::make_tuple( - static_cast(match_count) / static_cast(total_count), match_count, total_count); + + // Index based recall + for (size_t i = 0; i < rows; ++i) { + for (size_t k = 0; k < cols; ++k) { + size_t idx_k = i * cols + k; // row major assumption! + auto act_idx = actual_idx[idx_k]; + for (size_t j = 0; j < cols; ++j) { + size_t idx = i * cols + j; // row major assumption! + auto exp_idx = expected_idx[idx]; + + if (act_idx == exp_idx) { + index_match_count++; + break; + } + } + } + } + + return std::make_tuple(static_cast(match_count) / static_cast(total_count), + static_cast(index_match_count) / static_cast(total_count), + match_count, + total_count); } /** same as eval_recall, but in case indices do not match, @@ -266,7 +287,7 @@ auto eval_neighbours(const std::vector& expected_idx, bool test_unique = true, size_t max_duplicates = 0) -> testing::AssertionResult { - auto [actual_recall, match_count, total_count] = + auto [actual_recall, index_based_actual_recall, match_count, total_count] = calc_recall(expected_idx, actual_idx, expected_dist, actual_dist, rows, cols, eps); double error_margin = (actual_recall - min_recall) / std::max(1.0 - min_recall, eps); diff --git a/cpp/tests/neighbors/vpq_utils.cuh b/cpp/tests/neighbors/vpq_utils.cuh new file mode 100644 index 0000000000..8ceb371413 --- /dev/null +++ b/cpp/tests/neighbors/vpq_utils.cuh @@ -0,0 +1,66 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include + +namespace cuvs::neighbors { +template +__global__ void decode_vpq_dataset_kernel(data_t* const decoded_dataset_ptr, + const uint32_t ldd, + const math_t* const vq_codebook_ptr, + const uint32_t ldv, + const math_t* const pq_codebook_ptr, + const uint32_t pq_subspace_dim, + const uint32_t pq_table_size, + const uint32_t dataset_dim, + const size_t dataset_size, + const uint8_t* const data_ptr, + const uint32_t ldi) +{ + constexpr uint32_t warp_size = 32; + const size_t batch_id = (blockIdx.x * blockDim.x + threadIdx.x) / warp_size; + if (batch_id >= dataset_size) { return; } + + const auto local_data_ptr = data_ptr + ldi * batch_id; + const auto vq_code = *reinterpret_cast(local_data_ptr); + const auto pq_code_ptr = local_data_ptr + sizeof(uint32_t); + const auto vq_vec_ptr = vq_codebook_ptr + vq_code * ldv; + auto local_dst_ptr = decoded_dataset_ptr + batch_id * ldd; + + const auto lane_id = threadIdx.x % warp_size; + for (uint32_t i = lane_id; i < dataset_dim; i += warp_size) { + const auto pq_code = pq_code_ptr[i / pq_subspace_dim]; + const auto pq_v = pq_codebook_ptr[pq_code * pq_subspace_dim + (i % pq_subspace_dim)]; + + local_dst_ptr[i] = static_cast(vq_vec_ptr[i]) + static_cast(pq_v); + } +} + +template +void decode_vpq_dataset(raft::device_matrix_view decoded_dataset, + const cuvs::neighbors::vpq_dataset& vpq_dataset, + cudaStream_t cuda_stream) +{ + const auto dataset_size = decoded_dataset.extent(0); + RAFT_EXPECTS(vpq_dataset.data.extent(0) == dataset_size, "Dataset sizes mismatch"); + + constexpr uint32_t block_size = 256; + constexpr uint32_t warp_size = 32; + constexpr int64_t vecs_per_cta = block_size / warp_size; + const auto grid_size = raft::div_rounding_up_safe(decoded_dataset.extent(0), vecs_per_cta); + + decode_vpq_dataset_kernel + <<>>(decoded_dataset.data_handle(), + decoded_dataset.stride(0), + vpq_dataset.vq_code_book.data_handle(), + vpq_dataset.vq_code_book.stride(0), + vpq_dataset.pq_code_book.data_handle(), + vpq_dataset.pq_len(), + 1u << vpq_dataset.pq_bits(), + vpq_dataset.dim(), + dataset_size, + vpq_dataset.data.data_handle(), + vpq_dataset.data.stride(0)); +} +} // namespace cuvs::neighbors