Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions c/include/cuvs/neighbors/ivf_pq.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,16 @@ cuvsError_t cuvsIvfPqIndexGetDim(cuvsIvfPqIndex_t index, int64_t* dim);
/** Get the size of the index */
cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* size);

/** Get the dimensionality of an encoded vector after compression by PQ. */
cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t* pq_dim);

/** Get the bit length of an encoded vector element after compression by PQ.*/
cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t* pq_bits);

/** Get the Dimensionality of a subspace, i.e. the number of vector
* components mapped to a subspace */
cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index, int64_t* pq_len);

/**
* @brief Get the cluster centers corresponding to the lists in the original space
*
Expand All @@ -279,6 +289,51 @@ cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManagedTensor* ce
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index, DLManagedTensor* pq_centers);

/**
* @brief Get the sizes of each list
*
* @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
* @param[out] list_sizes Output tensor that will be populated with a non-owning view of the data
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index, DLManagedTensor* list_sizes);

/**
* @brief Unpack `n_rows` consecutive PQ encoded vectors of a single list (cluster) in the
* compressed index starting at given `offset`, not expanded to one code per byte. Each code in the
* output buffer occupies ceildiv(index.pq_dim() * index.pq_bits(), 8) bytes.
*
* @param[in] res raft resource
* @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
* @param[out] out_codes
* the destination buffer [n_rows, ceildiv(index.pq_dim() * index.pq_bits(), 8)].
* The length `n_rows` defines how many records to unpack,
* offset + n_rows must be smaller than or equal to the list size.
* This DLManagedTensor must already point to allocated device memory
* @param[in] label
* The id of the list (cluster) to decode.
* @param[in] offset
* How many records in the list to skip.
*/
cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
cuvsIvfPqIndex_t index,
DLManagedTensor* out_codes,
uint32_t label,
uint32_t offset);
/**
* @brief Get the indices of each vector in a ivf-pq list
*
* @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
* @param[in] label
* The id of the list (cluster) to decode.
* @param[out] out_labels
* output tensor that will be populated with a non-owning view of the data
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
uint32_t label,
DLManagedTensor* out_labels);
/**
* @}
*/
Expand Down
65 changes: 42 additions & 23 deletions c/src/core/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,18 +263,33 @@ void _copy_matrix(cuvsResources_t res, DLManagedTensor* src_managed, DLManagedTe
{
DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;

int64_t src_row_stride = src.strides == nullptr ? src.shape[1] : src.strides[0];
int64_t dst_row_stride = dst.strides == nullptr ? dst.shape[1] : dst.strides[0];
auto res_ptr = reinterpret_cast<raft::resources*>(res);

raft::copy_matrix<T>(static_cast<T*>(dst.data),
dst_row_stride,
static_cast<const T*>(src.data),
src_row_stride,
src.shape[1],
src.shape[0],
raft::resource::get_cuda_stream(*res_ptr));
auto res_ptr = reinterpret_cast<raft::resources*>(res);
auto stream = raft::resource::get_cuda_stream(*res_ptr);

if (src.ndim == 2) {
// use raft::copy_matrix for 2D tensors - this will handle copying from strided to non-strided
// views well
int64_t src_row_stride = src.strides == nullptr ? src.shape[1] : src.strides[0];
int64_t dst_row_stride = dst.strides == nullptr ? dst.shape[1] : dst.strides[0];

raft::copy_matrix<T>(static_cast<T*>(dst.data),
dst_row_stride,
static_cast<const T*>(src.data),
src_row_stride,
src.shape[1],
src.shape[0],
stream);
} else {
// Otherwise use cudaMemcpyAsync - and assert that we don't have strided data
RAFT_EXPECTS(src.strides == nullptr, "cuvsCopyMatrix only supports strides with 2D inputs");
RAFT_EXPECTS(dst.strides == nullptr, "cuvsCopyMatrix only supports strides with 2D inputs");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this raft::copy be used to suppport strides in non-2D output?


size_t elements = 1;
for (int64_t i = 0; i < src.ndim; ++i) {
elements *= src.shape[i];
}
raft::copy<T>(static_cast<T*>(dst.data), static_cast<const T*>(src.data), elements, stream);
}
}
} // namespace

Expand All @@ -286,8 +301,7 @@ extern "C" cuvsError_t cuvsMatrixCopy(cuvsResources_t res,
DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;

RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");
RAFT_EXPECTS(dst.ndim == 2, "dst should be a 2 dimensional tensor");
RAFT_EXPECTS(src.ndim == dst.ndim, "src and dst tensors should have the same dimensions");

for (int64_t i = 0; i < src.ndim; ++i) {
RAFT_EXPECTS(src.shape[i] == dst.shape[i], "shape mismatch between src and dst tensors");
Expand Down Expand Up @@ -350,21 +364,26 @@ extern "C" cuvsError_t cuvsMatrixSliceRows(cuvsResources_t res,

DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;
RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");
RAFT_EXPECTS(src.ndim <= 2, "src should be a 1 or 2 dimensional tensor");
RAFT_EXPECTS(src.shape != nullptr, "shape should be initialized in the src tensor");

dst.dtype = src.dtype;
dst.device = src.device;
dst.ndim = 2;
dst.shape = new int64_t[2];
dst.ndim = src.ndim;
dst.shape = new int64_t[dst.ndim];
dst.shape[0] = end - start;
dst.shape[1] = src.shape[1];

int64_t row_strides = dst.shape[1];
if (src.strides) {
dst.strides = new int64_t[2];
row_strides = dst.strides[0] = src.strides[0];
dst.strides[1] = src.strides[1];
int64_t row_strides = 1;

if (dst.ndim == 2) {
dst.shape[1] = src.shape[1];
row_strides = dst.shape[1];

if (src.strides) {
dst.strides = new int64_t[2];
row_strides = dst.strides[0] = src.strides[0];
dst.strides[1] = src.strides[1];
}
}

dst.data = static_cast<char*>(src.data) + start * row_strides * (dst.dtype.bits / 8);
Expand Down
80 changes: 80 additions & 0 deletions c/src/neighbors/ivf_pq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,38 @@ void _get_pq_centers(cuvsIvfPqIndex index, DLManagedTensor* centers)
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
cuvs::core::to_dlpack(index_ptr->pq_centers(), centers);
}

template <typename IdxT>
void _get_list_sizes(cuvsIvfPqIndex index, DLManagedTensor* list_sizes)
{
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
cuvs::core::to_dlpack(index_ptr->list_sizes(), list_sizes);
}

template <typename IdxT>
void _unpack_contiguous_list_data(cuvsResources_t res,
cuvsIvfPqIndex index,
DLManagedTensor* out_codes,
uint32_t label,
uint32_t offset)
{
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
using mdspan_type = raft::device_matrix_view<uint8_t, uint32_t, raft::row_major>;
auto mds = cuvs::core::from_dlpack<mdspan_type>(out_codes);
auto res_ptr = reinterpret_cast<raft::resources*>(res);

cuvs::neighbors::ivf_pq::helpers::codepacker::unpack_contiguous_list_data(
*res_ptr, *index_ptr, mds.data_handle(), mds.extent(0), label, offset);
}

template <typename IdxT>
void _get_list_indices(cuvsIvfPqIndex index,
uint32_t label,
DLManagedTensor* out_labels)
{
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
cuvs::core::to_dlpack(index_ptr->lists()[label]->indices.view(), out_labels);
}
} // namespace

extern "C" cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index)
Expand Down Expand Up @@ -361,6 +393,30 @@ extern "C" cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* si
});
}

extern "C" cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t* pq_dim)
{
return cuvs::core::translate_exceptions([=] {
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
*pq_dim = index_ptr->pq_dim();
});
}

extern "C" cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t* pq_bits)
{
return cuvs::core::translate_exceptions([=] {
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
*pq_bits = index_ptr->pq_bits();
});
}

extern "C" cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index, int64_t* pq_len)
{
return cuvs::core::translate_exceptions([=] {
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
*pq_len = index_ptr->pq_len();
});
}

extern "C" cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManagedTensor* centers)
{
return cuvs::core::translate_exceptions([=] { _get_centers<int64_t>(*index, centers); });
Expand All @@ -371,3 +427,27 @@ extern "C" cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index,
{
return cuvs::core::translate_exceptions([=] { _get_pq_centers<int64_t>(*index, pq_centers); });
}

extern "C" cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index,
DLManagedTensor* list_sizes)
{
return cuvs::core::translate_exceptions([=] { _get_list_sizes<int64_t>(*index, list_sizes); });
}

extern "C" cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
cuvsIvfPqIndex_t index,
DLManagedTensor* out_codes,
uint32_t label,
uint32_t offset)
{
return cuvs::core::translate_exceptions(
[=] { _unpack_contiguous_list_data<int64_t>(res, *index, out_codes, label, offset); });
}

extern "C" cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
uint32_t label,
DLManagedTensor* out_labels)
{
return cuvs::core::translate_exceptions(
[=] { _get_list_indices<int64_t>(*index, label, out_labels); });
}
12 changes: 6 additions & 6 deletions cpp/include/cuvs/neighbors/ivf_pq.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ struct index : cuvs::neighbors::index {
/** The dimensionality of an encoded vector after compression by PQ. */
uint32_t pq_dim() const noexcept;

/** Dimensionality of a subspaces, i.e. the number of vector components mapped to a subspace */
/** Dimensionality of a subspace, i.e. the number of vector components mapped to a subspace */
uint32_t pq_len() const noexcept;

/** The number of vectors in a PQ codebook (`1 << pq_bits`). */
Expand Down Expand Up @@ -2489,7 +2489,7 @@ void pack_contiguous_list_data(raft::resources const& res,
* raft::copy(&list_size, index.list_sizes().data_handle() + label, 1,
* resource::get_cuda_stream(res)); resource::sync_stream(res);
* // allocate the buffer for the output
* auto codes = raft::make_device_matrix<float>(res, list_size, index.pq_dim());
* auto codes = raft::make_device_matrix<uint8_t>(res, list_size, index.pq_dim());
* // unpack the whole list
* ivf_pq::helpers::codepacker::unpack_list_data(res, index, codes.view(), label, 0);
* @endcode
Expand Down Expand Up @@ -2563,11 +2563,11 @@ void unpack_list_data(raft::resources const& res,
* raft::resource::get_cuda_stream(res));
* raft::resource::sync_stream(res);
* // allocate the buffer for the output
* auto codes = raft::make_device_matrix<float>(res, list_size, raft::ceildiv(index.pq_dim() *
* index.pq_bits(), 8));
* auto codes = raft::make_device_matrix<uint8_t>(res, list_size, raft::ceildiv(index.pq_dim() *
* index.pq_bits(), 8));
* // unpack the whole list
* ivf_pq::helpers::codepacker::unpack_list_data(res, index, codes.data_handle(), list_size,
* label, 0);
* ivf_pq::helpers::codepacker::unpack_contiguous_list_data(res, index, codes.data_handle(),
* list_size, label, 0);
* @endcode
*
* @param[in] res raft resource
Expand Down
22 changes: 22 additions & 0 deletions python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,34 @@ cdef extern from "cuvs/neighbors/ivf_pq.h" nogil:

cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t * size)

cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index,
int64_t * pq_dim)

cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index,
int64_t * pq_bits)

cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index,
int64_t * pq_len)

cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index,
DLManagedTensor * centers)

cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index,
DLManagedTensor * list_sizes)

cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index,
DLManagedTensor * centers)

cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
cuvsIvfPqIndex_t index,
DLManagedTensor* out,
uint32_t label,
uint32_t offset)

cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
uint32_t label,
DLManagedTensor* out)

cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
cuvsIvfPqIndexParams* params,
DLManagedTensor* dataset,
Expand Down
Loading
Loading