rapidsai · benfred · Oct 14, 2025 · Oct 14, 2025 · Oct 22, 2025 · Oct 25, 2025
@@ -259,6 +259,16 @@ cuvsError_t cuvsIvfPqIndexGetDim(cuvsIvfPqIndex_t index, int64_t* dim);
 /** Get the size of the index */
 cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* size);
 
+/** Get the dimensionality of an encoded vector after compression by PQ. */
+cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t* pq_dim);
+
+/** Get the bit length of an encoded vector element after compression by PQ.*/
+cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t* pq_bits);
+
+/** Get the Dimensionality of a subspace, i.e. the number of vector
+ * components mapped to a subspace */
+cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index, int64_t* pq_len);
+
 /**
  * @brief Get the cluster centers corresponding to the lists in the original space
  *
@@ -279,6 +289,51 @@ cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManagedTensor* ce
  * @return cuvsError_t
  */
 cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index, DLManagedTensor* pq_centers);
+
+/**
+ * @brief Get the sizes of each list
+ *
+ * @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
+ * @param[out] list_sizes Output tensor that will be populated with a non-owning view of the data
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index, DLManagedTensor* list_sizes);
+
+/**
+ * @brief Unpack `n_rows` consecutive PQ encoded vectors of a single list (cluster) in the
+ * compressed index starting at given `offset`, not expanded to one code per byte. Each code in the
+ * output buffer occupies ceildiv(index.pq_dim() * index.pq_bits(), 8) bytes.
+ *
+ * @param[in] res raft resource
+ * @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
+ * @param[out] out_codes
+ *   the destination buffer [n_rows, ceildiv(index.pq_dim() * index.pq_bits(), 8)].
+ *   The length `n_rows` defines how many records to unpack,
+ *   offset + n_rows must be smaller than or equal to the list size.
+ *   This DLManagedTensor must already point to allocated device memory
+ * @param[in] label
+ *   The id of the list (cluster) to decode.
+ * @param[in] offset
+ *   How many records in the list to skip.
+ */
+cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
+                                                   cuvsIvfPqIndex_t index,
+                                                   DLManagedTensor* out_codes,
+                                                   uint32_t label,
+                                                   uint32_t offset);
+/**
+ * @brief Get the indices of each vector in a ivf-pq list
+ *
+ * @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
+ * @param[in] label
+ *   The id of the list (cluster) to decode.
+ * @param[out] out_labels
+ *   output tensor that will be populated with a non-owning view of the data
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
+                                         uint32_t label,
+                                         DLManagedTensor* out_labels);
 /**
  * @}
  */

@@ -263,18 +263,33 @@ void _copy_matrix(cuvsResources_t res, DLManagedTensor* src_managed, DLManagedTe
 {
   DLTensor& src = src_managed->dl_tensor;
   DLTensor& dst = dst_managed->dl_tensor;
-
-  int64_t src_row_stride = src.strides == nullptr ? src.shape[1] : src.strides[0];
-  int64_t dst_row_stride = dst.strides == nullptr ? dst.shape[1] : dst.strides[0];
-  auto res_ptr           = reinterpret_cast<raft::resources*>(res);
-
-  raft::copy_matrix<T>(static_cast<T*>(dst.data),
-                       dst_row_stride,
-                       static_cast<const T*>(src.data),
-                       src_row_stride,
-                       src.shape[1],
-                       src.shape[0],
-                       raft::resource::get_cuda_stream(*res_ptr));
+  auto res_ptr  = reinterpret_cast<raft::resources*>(res);
+  auto stream   = raft::resource::get_cuda_stream(*res_ptr);
+
+  if (src.ndim == 2) {
+    // use raft::copy_matrix for 2D tensors - this will handle copying from strided to non-strided
+    // views well
+    int64_t src_row_stride = src.strides == nullptr ? src.shape[1] : src.strides[0];
+    int64_t dst_row_stride = dst.strides == nullptr ? dst.shape[1] : dst.strides[0];
+
+    raft::copy_matrix<T>(static_cast<T*>(dst.data),
+                         dst_row_stride,
+                         static_cast<const T*>(src.data),
+                         src_row_stride,
+                         src.shape[1],
+                         src.shape[0],
+                         stream);
+  } else {
+    // Otherwise use cudaMemcpyAsync - and assert that we don't have strided data
+    RAFT_EXPECTS(src.strides == nullptr, "cuvsCopyMatrix only supports strides with 2D inputs");
+    RAFT_EXPECTS(dst.strides == nullptr, "cuvsCopyMatrix only supports strides with 2D inputs");
+
+    size_t elements = 1;
+    for (int64_t i = 0; i < src.ndim; ++i) {
+      elements *= src.shape[i];
+    }
+    raft::copy<T>(static_cast<T*>(dst.data), static_cast<const T*>(src.data), elements, stream);
+  }
 }
 }  // namespace
 
@@ -286,8 +301,7 @@ extern "C" cuvsError_t cuvsMatrixCopy(cuvsResources_t res,
     DLTensor& src = src_managed->dl_tensor;
     DLTensor& dst = dst_managed->dl_tensor;
 
-    RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");
-    RAFT_EXPECTS(dst.ndim == 2, "dst should be a 2 dimensional tensor");
+    RAFT_EXPECTS(src.ndim == dst.ndim, "src and dst tensors should have the same dimensions");
 
     for (int64_t i = 0; i < src.ndim; ++i) {
       RAFT_EXPECTS(src.shape[i] == dst.shape[i], "shape mismatch between src and dst tensors");
@@ -350,21 +364,26 @@ extern "C" cuvsError_t cuvsMatrixSliceRows(cuvsResources_t res,
 
     DLTensor& src = src_managed->dl_tensor;
     DLTensor& dst = dst_managed->dl_tensor;
-    RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");
+    RAFT_EXPECTS(src.ndim <= 2, "src should be a 1 or 2 dimensional tensor");
     RAFT_EXPECTS(src.shape != nullptr, "shape should be initialized in the src tensor");
 
     dst.dtype    = src.dtype;
     dst.device   = src.device;
-    dst.ndim     = 2;
-    dst.shape    = new int64_t[2];
+    dst.ndim     = src.ndim;
+    dst.shape    = new int64_t[dst.ndim];
     dst.shape[0] = end - start;
-    dst.shape[1] = src.shape[1];
 
-    int64_t row_strides = dst.shape[1];
-    if (src.strides) {
-      dst.strides = new int64_t[2];
-      row_strides = dst.strides[0] = src.strides[0];
-      dst.strides[1]               = src.strides[1];
+    int64_t row_strides = 1;
+
+    if (dst.ndim == 2) {
+      dst.shape[1] = src.shape[1];
+      row_strides = dst.shape[1];
+
+      if (src.strides) {
+        dst.strides = new int64_t[2];
+        row_strides = dst.strides[0] = src.strides[0];
+        dst.strides[1]               = src.strides[1];
+      }
     }
 
     dst.data = static_cast<char*>(src.data) + start * row_strides * (dst.dtype.bits / 8);

@@ -167,6 +167,38 @@ void _get_pq_centers(cuvsIvfPqIndex index, DLManagedTensor* centers)
   auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
   cuvs::core::to_dlpack(index_ptr->pq_centers(), centers);
 }
+
+template <typename IdxT>
+void _get_list_sizes(cuvsIvfPqIndex index, DLManagedTensor* list_sizes)
+{
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
+  cuvs::core::to_dlpack(index_ptr->list_sizes(), list_sizes);
+}
+
+template <typename IdxT>
+void _unpack_contiguous_list_data(cuvsResources_t res,
+                                  cuvsIvfPqIndex index,
+                                  DLManagedTensor* out_codes,
+                                  uint32_t label,
+                                  uint32_t offset)
+{
+  auto index_ptr    = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
+  using mdspan_type = raft::device_matrix_view<uint8_t, uint32_t, raft::row_major>;
+  auto mds          = cuvs::core::from_dlpack<mdspan_type>(out_codes);
+  auto res_ptr      = reinterpret_cast<raft::resources*>(res);
+
+  cuvs::neighbors::ivf_pq::helpers::codepacker::unpack_contiguous_list_data(
+    *res_ptr, *index_ptr, mds.data_handle(), mds.extent(0), label, offset);
+}
+
+template <typename IdxT>
+void _get_list_indices(cuvsIvfPqIndex index,
+                       uint32_t label,
+                       DLManagedTensor* out_labels)
+{
+  auto index_ptr    = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
+  cuvs::core::to_dlpack(index_ptr->lists()[label]->indices.view(), out_labels);
+}
 }  // namespace
 
 extern "C" cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index)
@@ -361,6 +393,30 @@ extern "C" cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* si
   });
 }
 
+extern "C" cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t* pq_dim)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
+    *pq_dim        = index_ptr->pq_dim();
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t* pq_bits)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
+    *pq_bits       = index_ptr->pq_bits();
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index, int64_t* pq_len)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
+    *pq_len        = index_ptr->pq_len();
+  });
+}
+
 extern "C" cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManagedTensor* centers)
 {
   return cuvs::core::translate_exceptions([=] { _get_centers<int64_t>(*index, centers); });
@@ -371,3 +427,27 @@ extern "C" cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index,
 {
   return cuvs::core::translate_exceptions([=] { _get_pq_centers<int64_t>(*index, pq_centers); });
 }
+
+extern "C" cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index,
+                                                  DLManagedTensor* list_sizes)
+{
+  return cuvs::core::translate_exceptions([=] { _get_list_sizes<int64_t>(*index, list_sizes); });
+}
+
+extern "C" cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
+                                                              cuvsIvfPqIndex_t index,
+                                                              DLManagedTensor* out_codes,
+                                                              uint32_t label,
+                                                              uint32_t offset)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { _unpack_contiguous_list_data<int64_t>(res, *index, out_codes, label, offset); });
+}
+
+extern "C" cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
+                                                    uint32_t label,
+                                                    DLManagedTensor* out_labels)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { _get_list_indices<int64_t>(*index, label, out_labels); });
+}
@@ -390,7 +390,7 @@ struct index : cuvs::neighbors::index {
   /** The dimensionality of an encoded vector after compression by PQ. */
   uint32_t pq_dim() const noexcept;
 
-  /** Dimensionality of a subspaces, i.e. the number of vector components mapped to a subspace */
+  /** Dimensionality of a subspace, i.e. the number of vector components mapped to a subspace */
   uint32_t pq_len() const noexcept;
 
   /** The number of vectors in a PQ codebook (`1 << pq_bits`). */
@@ -2489,7 +2489,7 @@ void pack_contiguous_list_data(raft::resources const& res,
  *   raft::copy(&list_size, index.list_sizes().data_handle() + label, 1,
  * resource::get_cuda_stream(res)); resource::sync_stream(res);
  *   // allocate the buffer for the output
- *   auto codes = raft::make_device_matrix<float>(res, list_size, index.pq_dim());
+ *   auto codes = raft::make_device_matrix<uint8_t>(res, list_size, index.pq_dim());
  *   // unpack the whole list
  *   ivf_pq::helpers::codepacker::unpack_list_data(res, index, codes.view(), label, 0);
  * @endcode
@@ -2563,11 +2563,11 @@ void unpack_list_data(raft::resources const& res,
  *     raft::resource::get_cuda_stream(res));
  *   raft::resource::sync_stream(res);
  *   // allocate the buffer for the output
- *   auto codes = raft::make_device_matrix<float>(res, list_size, raft::ceildiv(index.pq_dim() *
- *     index.pq_bits(), 8));
+ *   auto codes = raft::make_device_matrix<uint8_t>(res, list_size, raft::ceildiv(index.pq_dim() *
+ *      index.pq_bits(), 8));
  *   // unpack the whole list
- *   ivf_pq::helpers::codepacker::unpack_list_data(res, index, codes.data_handle(), list_size,
- * label, 0);
+ *   ivf_pq::helpers::codepacker::unpack_contiguous_list_data(res, index, codes.data_handle(),
+ *      list_size, label, 0);
  * @endcode
  *
  * @param[in] res raft resource

@@ -80,12 +80,34 @@ cdef extern from "cuvs/neighbors/ivf_pq.h" nogil:
 
     cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t * size)
 
+    cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index,
+                                       int64_t * pq_dim)
+
+    cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index,
+                                        int64_t * pq_bits)
+
+    cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index,
+                                       int64_t * pq_len)
+
     cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index,
                                          DLManagedTensor * centers)
 
+    cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index,
+                                           DLManagedTensor * list_sizes)
+
     cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index,
                                            DLManagedTensor * centers)
 
+    cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
+                                                       cuvsIvfPqIndex_t index,
+                                                       DLManagedTensor* out,
+                                                       uint32_t label,
+                                                       uint32_t offset)
+
+    cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
+                                             uint32_t label,
+                                             DLManagedTensor* out)
+
     cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
                                cuvsIvfPqIndexParams* params,
                                DLManagedTensor* dataset,