From 339d316c6a680694e0af364f3096f8921200eb85 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Tue, 14 Oct 2025 10:38:58 -0700
Subject: [PATCH 1/3] Expose pq list data to python

---
 cpp/include/cuvs/neighbors/ivf_pq.h          | 42 +++++++++
 cpp/include/cuvs/neighbors/ivf_pq.hpp        | 12 +--
 cpp/src/core/c_api.cpp                       | 42 ++++++---
 cpp/src/neighbors/ivf_pq_c.cpp               | 63 +++++++++++++
 python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd | 18 ++++
 python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx | 93 ++++++++++++++++++++
 6 files changed, 250 insertions(+), 20 deletions(-)

diff --git a/cpp/include/cuvs/neighbors/ivf_pq.h b/cpp/include/cuvs/neighbors/ivf_pq.h
index 3ef7b8b7af..c52557747d 100644
--- a/cpp/include/cuvs/neighbors/ivf_pq.h
+++ b/cpp/include/cuvs/neighbors/ivf_pq.h
@@ -270,6 +270,16 @@ cuvsError_t cuvsIvfPqIndexGetDim(cuvsIvfPqIndex_t index, int64_t* dim);
 /** Get the size of the index */
 cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* size);
 
+/** Get the dimensionality of an encoded vector after compression by PQ. */
+cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t* pq_dim);
+
+/** Get the bit length of an encoded vector element after compression by PQ.*/
+cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t* pq_bits);
+
+/** Get the Dimensionality of a subspace, i.e. the number of vector
+ * components mapped to a subspace */
+cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index, int64_t* pq_len);
+
 /**
  * @brief Get the cluster centers corresponding to the lists in the original space
  *
@@ -290,6 +300,38 @@ cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManagedTensor* ce
  * @return cuvsError_t
  */
 cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index, DLManagedTensor* pq_centers);
+
+/**
+ * @brief Get the sizes of each list
+ *
+ * @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
+ * @param[out] list_sizes Output tensor that will be populated with a non-owning view of the data
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index, DLManagedTensor* list_sizes);
+
+/**
+ * @brief Unpack `n_rows` consecutive PQ encoded vectors of a single list (cluster) in the
+ * compressed index starting at given `offset`, not expanded to one code per byte. Each code in the
+ * output buffer occupies ceildiv(index.pq_dim() * index.pq_bits(), 8) bytes.
+ *
+ * @param[in] res raft resource
+ * @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
+ * @param[out] out_codes
+ *   the destination buffer [n_rows, ceildiv(index.pq_dim() * index.pq_bits(), 8)].
+ *   The length `n_rows` defines how many records to unpack,
+ *   offset + n_rows must be smaller than or equal to the list size.
+ *   This DLManagedTensor must already point to allocated device memory
+ * @param[in] label
+ *   The id of the list (cluster) to decode.
+ * @param[in] offset
+ *   How many records in the list to skip.
+ */
+cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
+                                                   cuvsIvfPqIndex_t index,
+                                                   DLManagedTensor* out_codes,
+                                                   uint32_t label,
+                                                   uint32_t offset);
 /**
  * @}
  */
diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp
index 593403f2aa..bac66f6b3e 100644
--- a/cpp/include/cuvs/neighbors/ivf_pq.hpp
+++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp
@@ -401,7 +401,7 @@ struct index : cuvs::neighbors::index {
   /** The dimensionality of an encoded vector after compression by PQ. */
   uint32_t pq_dim() const noexcept;
 
-  /** Dimensionality of a subspaces, i.e. the number of vector components mapped to a subspace */
+  /** Dimensionality of a subspace, i.e. the number of vector components mapped to a subspace */
   uint32_t pq_len() const noexcept;
 
   /** The number of vectors in a PQ codebook (`1 << pq_bits`). */
@@ -2500,7 +2500,7 @@ void pack_contiguous_list_data(raft::resources const& res,
  *   raft::copy(&list_size, index.list_sizes().data_handle() + label, 1,
  * resource::get_cuda_stream(res)); resource::sync_stream(res);
  *   // allocate the buffer for the output
- *   auto codes = raft::make_device_matrix<float>(res, list_size, index.pq_dim());
+ *   auto codes = raft::make_device_matrix<uint8_t>(res, list_size, index.pq_dim());
  *   // unpack the whole list
  *   ivf_pq::helpers::codepacker::unpack_list_data(res, index, codes.view(), label, 0);
  * @endcode
@@ -2574,11 +2574,11 @@ void unpack_list_data(raft::resources const& res,
  *     raft::resource::get_cuda_stream(res));
  *   raft::resource::sync_stream(res);
  *   // allocate the buffer for the output
- *   auto codes = raft::make_device_matrix<float>(res, list_size, raft::ceildiv(index.pq_dim() *
- *     index.pq_bits(), 8));
+ *   auto codes = raft::make_device_matrix<uint8_t>(res, list_size, raft::ceildiv(index.pq_dim() *
+ *      index.pq_bits(), 8));
  *   // unpack the whole list
- *   ivf_pq::helpers::codepacker::unpack_list_data(res, index, codes.data_handle(), list_size,
- * label, 0);
+ *   ivf_pq::helpers::codepacker::unpack_contiguous_list_data(res, index, codes.data_handle(),
+ *      list_size, label, 0);
  * @endcode
  *
  * @param[in] res raft resource
diff --git a/cpp/src/core/c_api.cpp b/cpp/src/core/c_api.cpp
index 3cf6db457b..5ff29b1c40 100644
--- a/cpp/src/core/c_api.cpp
+++ b/cpp/src/core/c_api.cpp
@@ -228,18 +228,33 @@ void _copy_matrix(cuvsResources_t res, DLManagedTensor* src_managed, DLManagedTe
 {
   DLTensor& src = src_managed->dl_tensor;
   DLTensor& dst = dst_managed->dl_tensor;
-
-  int64_t src_row_stride = src.strides == nullptr ? src.shape[1] : src.strides[0];
-  int64_t dst_row_stride = dst.strides == nullptr ? dst.shape[1] : dst.strides[0];
-  auto res_ptr           = reinterpret_cast<raft::resources*>(res);
-
-  raft::copy_matrix<T>(static_cast<T*>(dst.data),
-                       dst_row_stride,
-                       static_cast<const T*>(src.data),
-                       src_row_stride,
-                       src.shape[1],
-                       src.shape[0],
-                       raft::resource::get_cuda_stream(*res_ptr));
+  auto res_ptr  = reinterpret_cast<raft::resources*>(res);
+  auto stream   = raft::resource::get_cuda_stream(*res_ptr);
+
+  if (src.ndim == 2) {
+    // use raft::copy_matrix for 2D tensors - this will handle copying from strided to non-strided
+    // views well
+    int64_t src_row_stride = src.strides == nullptr ? src.shape[1] : src.strides[0];
+    int64_t dst_row_stride = dst.strides == nullptr ? dst.shape[1] : dst.strides[0];
+
+    raft::copy_matrix<T>(static_cast<T*>(dst.data),
+                         dst_row_stride,
+                         static_cast<const T*>(src.data),
+                         src_row_stride,
+                         src.shape[1],
+                         src.shape[0],
+                         stream);
+  } else {
+    // Otherwise use cudaMemcpyAsync - and assert that we don't have strided data
+    RAFT_EXPECTS(src.strides == nullptr, "cuvsCopyMatrix only supports strides with 2D inputs");
+    RAFT_EXPECTS(dst.strides == nullptr, "cuvsCopyMatrix only supports strides with 2D inputs");
+
+    size_t elements = 1;
+    for (int64_t i = 0; i < src.ndim; ++i) {
+      elements *= src.shape[i];
+    }
+    cudaMemcpyAsync(dst.data, src.data, elements * sizeof(T), cudaMemcpyDefault, stream);
+  }
 }
 }  // namespace
 
@@ -251,8 +266,7 @@ extern "C" cuvsError_t cuvsMatrixCopy(cuvsResources_t res,
     DLTensor& src = src_managed->dl_tensor;
     DLTensor& dst = dst_managed->dl_tensor;
 
-    RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");
-    RAFT_EXPECTS(dst.ndim == 2, "dst should be a 2 dimensional tensor");
+    RAFT_EXPECTS(src.ndim == dst.ndim, "src and dst tensors should have the same dimensions");
 
     for (int64_t i = 0; i < src.ndim; ++i) {
       RAFT_EXPECTS(src.shape[i] == dst.shape[i], "shape mismatch between src and dst tensors");
diff --git a/cpp/src/neighbors/ivf_pq_c.cpp b/cpp/src/neighbors/ivf_pq_c.cpp
index 5c10d4111b..d9a47cfd57 100644
--- a/cpp/src/neighbors/ivf_pq_c.cpp
+++ b/cpp/src/neighbors/ivf_pq_c.cpp
@@ -177,6 +177,29 @@ void _get_pq_centers(cuvsIvfPqIndex index, DLManagedTensor* centers)
   auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
   cuvs::core::to_dlpack(index_ptr->pq_centers(), centers);
 }
+
+template <typename IdxT>
+void _get_list_sizes(cuvsIvfPqIndex index, DLManagedTensor* list_sizes)
+{
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
+  cuvs::core::to_dlpack(index_ptr->list_sizes(), list_sizes);
+}
+
+template <typename IdxT>
+void _unpack_contiguous_list_data(cuvsResources_t res,
+                                  cuvsIvfPqIndex index,
+                                  DLManagedTensor* out_codes,
+                                  uint32_t label,
+                                  uint32_t offset)
+{
+  auto index_ptr    = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
+  using mdspan_type = raft::device_matrix_view<uint8_t, uint32_t, raft::row_major>;
+  auto mds          = cuvs::core::from_dlpack<mdspan_type>(out_codes);
+  auto res_ptr      = reinterpret_cast<raft::resources*>(res);
+
+  cuvs::neighbors::ivf_pq::helpers::codepacker::unpack_contiguous_list_data(
+    *res_ptr, *index_ptr, mds.data_handle(), mds.extent(0), label, offset);
+}
 }  // namespace
 
 extern "C" cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index)
@@ -371,6 +394,30 @@ extern "C" cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* si
   });
 }
 
+extern "C" cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t* pq_dim)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
+    *pq_dim        = index_ptr->pq_dim();
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t* pq_bits)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
+    *pq_bits       = index_ptr->pq_bits();
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index, int64_t* pq_len)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<int64_t>*>(index->addr);
+    *pq_len        = index_ptr->pq_len();
+  });
+}
+
 extern "C" cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManagedTensor* centers)
 {
   return cuvs::core::translate_exceptions([=] { _get_centers<int64_t>(*index, centers); });
@@ -381,3 +428,19 @@ extern "C" cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index,
 {
   return cuvs::core::translate_exceptions([=] { _get_pq_centers<int64_t>(*index, pq_centers); });
 }
+
+extern "C" cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index,
+                                                  DLManagedTensor* list_sizes)
+{
+  return cuvs::core::translate_exceptions([=] { _get_list_sizes<int64_t>(*index, list_sizes); });
+}
+
+extern "C" cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
+                                                              cuvsIvfPqIndex_t index,
+                                                              DLManagedTensor* out_codes,
+                                                              uint32_t label,
+                                                              uint32_t offset)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { _unpack_contiguous_list_data<int64_t>(res, *index, out_codes, label, offset); });
+}
diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd
index 928a0cba1b..e67ba3e398 100644
--- a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd
+++ b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd
@@ -91,12 +91,30 @@ cdef extern from "cuvs/neighbors/ivf_pq.h" nogil:
 
     cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t * size)
 
+    cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index,
+                                       int64_t * pq_dim)
+
+    cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index,
+                                        int64_t * pq_bits)
+
+    cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index,
+                                       int64_t * pq_len)
+
     cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index,
                                          DLManagedTensor * centers)
 
+    cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index,
+                                           DLManagedTensor * list_sizes)
+
     cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index,
                                            DLManagedTensor * centers)
 
+    cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
+                                                       cuvsIvfPqIndex_t index,
+                                                       DLManagedTensor* out,
+                                                       uint32_t label,
+                                                       uint32_t offset)
+
     cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
                                cuvsIvfPqIndexParams* params,
                                DLManagedTensor* dataset,
diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx
index 5e13b3bf3b..ccb6f76422 100644
--- a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx
+++ b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx
@@ -261,6 +261,29 @@ cdef class Index:
         check_cuvs(cuvsIvfPqIndexGetDim(self.index, &dim))
         return dim
 
+    @property
+    def pq_dim(self):
+        """ The dimensionality of an encoded vector after compression by PQ """
+        cdef int64_t pq_dim
+        check_cuvs(cuvsIvfPqIndexGetPqDim(self.index, &pq_dim))
+        return pq_dim
+
+    @property
+    def pq_len(self):
+        """ The dimensionality of a subspace, i.e. the number of vector
+        components mapped to a subspace """
+        cdef int64_t pq_len
+        check_cuvs(cuvsIvfPqIndexGetPqLen(self.index, &pq_len))
+        return pq_len
+
+    @property
+    def pq_bits(self):
+        """ The bit length of an encoded vector element after
+        compression by PQ. """
+        cdef int64_t pq_bits
+        check_cuvs(cuvsIvfPqIndexGetPqBits(self.index, &pq_bits))
+        return pq_bits
+
     def __len__(self):
         cdef int64_t size
         check_cuvs(cuvsIvfPqIndexGetSize(self.index, &size))
@@ -294,6 +317,76 @@ cdef class Index:
         output.parent = self
         return output
 
+    @property
+    def list_sizes(self):
+        """ Get the sizes of each list """
+        if not self.trained:
+            raise ValueError("Index needs to be built before getting"
+                             " list sizes")
+        output = DeviceTensorView()
+        cdef cydlpack.DLManagedTensor * tensor = \
+            <cydlpack.DLManagedTensor*><size_t>output.get_handle()
+        check_cuvs(cuvsIvfPqIndexGetListSizes(self.index, tensor))
+        output.parent = self
+        return output
+
+    @auto_sync_resources
+    def lists(self, resources=None):
+        """ Iterates through the pq-encoded list data
+
+        This function returns an iterator over each list,
+        with each value being the pq-encoded data for the
+        entire list
+
+        Parameters
+        ----------
+        {resources_docstring}
+        """
+        list_sizes = self.list_sizes.copy_to_host()
+        for i, list_size in enumerate(list_sizes):
+            yield self.list_data(i, n_rows=list_size, resources=resources)
+
+    @auto_sync_resources
+    def list_data(self, label, n_rows=0, offset=0, out_codes=None,
+                  resources=None):
+        """ Gets unpacked list data for a single list (cluster)
+
+        Parameters
+        ----------
+        label, int:
+            The cluster to get data for
+        n_rows, int:
+            The number of rows to return for the cluster (0 is all rows)
+        offset, int:
+            The row to start getting data at
+        out_codes, CAI
+            Optional buffer to hold memory. Will be created if None
+        {resources_docstring}
+        """
+        if n_rows == 0:
+            n_rows = self.list_sizes.copy_to_host()[label]
+
+        n_cols = int(np.ceil(self.pq_dim * self.pq_bits / 8))
+
+        if out_codes is None:
+            out_codes = device_ndarray.empty((n_rows, n_cols), dtype="ubyte")
+
+        out_codes_cai= wrap_array(out_codes)
+        _check_input_array(out_codes_cai, [np.dtype("ubyte")],
+                           exp_rows=n_rows, exp_cols=n_cols)
+
+        cdef cydlpack.DLManagedTensor* out_codes_dlpack = \
+            cydlpack.dlpack_c(out_codes_cai)
+
+        cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+        check_cuvs(cuvsIvfPqIndexUnpackContiguousListData(res,
+                                                          self.index,
+                                                          out_codes_dlpack,
+                                                          label,
+                                                          offset))
+        return out_codes
+
 
 @auto_sync_resources
 def build(IndexParams index_params, dataset, resources=None):

From 55101a8257dee7f4a10760f4064a191e7bb19a84 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Fri, 24 Oct 2025 17:08:43 -0700
Subject: [PATCH 2/3] Add python bindings for ivf-pq list ids

And add a basic unittest for this functionality
---
 c/include/cuvs/neighbors/ivf_pq.h            | 13 +++++++++
 c/src/core/c_api.cpp                         | 23 ++++++++++------
 c/src/neighbors/ivf_pq.cpp                   | 17 ++++++++++++
 python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd |  4 +++
 python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx | 29 +++++++++++++++++++-
 python/cuvs/cuvs/tests/test_ivf_pq.py        |  5 ++++
 6 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/c/include/cuvs/neighbors/ivf_pq.h b/c/include/cuvs/neighbors/ivf_pq.h
index c52557747d..69c4f2ec1a 100644
--- a/c/include/cuvs/neighbors/ivf_pq.h
+++ b/c/include/cuvs/neighbors/ivf_pq.h
@@ -332,6 +332,19 @@ cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
                                                    DLManagedTensor* out_codes,
                                                    uint32_t label,
                                                    uint32_t offset);
+/**
+ * @brief Get the indices of each vector in a ivf-pq list
+ *
+ * @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
+ * @param[in] label
+ *   The id of the list (cluster) to decode.
+ * @param[out] out_labels
+ *   output tensor that will be populated with a non-owning view of the data
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
+                                         uint32_t label,
+                                         DLManagedTensor* out_labels);
 /**
  * @}
  */
diff --git a/c/src/core/c_api.cpp b/c/src/core/c_api.cpp
index e5d595a66f..c791b837eb 100644
--- a/c/src/core/c_api.cpp
+++ b/c/src/core/c_api.cpp
@@ -330,21 +330,26 @@ extern "C" cuvsError_t cuvsMatrixSliceRows(cuvsResources_t res,
 
     DLTensor& src = src_managed->dl_tensor;
     DLTensor& dst = dst_managed->dl_tensor;
-    RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");
+    RAFT_EXPECTS(src.ndim <= 2, "src should be a 1 or 2 dimensional tensor");
     RAFT_EXPECTS(src.shape != nullptr, "shape should be initialized in the src tensor");
 
     dst.dtype    = src.dtype;
     dst.device   = src.device;
-    dst.ndim     = 2;
-    dst.shape    = new int64_t[2];
+    dst.ndim     = src.ndim;
+    dst.shape    = new int64_t[dst.ndim];
     dst.shape[0] = end - start;
-    dst.shape[1] = src.shape[1];
 
-    int64_t row_strides = dst.shape[1];
-    if (src.strides) {
-      dst.strides = new int64_t[2];
-      row_strides = dst.strides[0] = src.strides[0];
-      dst.strides[1]               = src.strides[1];
+    int64_t row_strides = 1;
+
+    if (dst.ndim == 2) {
+      dst.shape[1] = src.shape[1];
+      row_strides = dst.shape[1];
+
+      if (src.strides) {
+        dst.strides = new int64_t[2];
+        row_strides = dst.strides[0] = src.strides[0];
+        dst.strides[1]               = src.strides[1];
+      }
     }
 
     dst.data = static_cast<char*>(src.data) + start * row_strides * (dst.dtype.bits / 8);
diff --git a/c/src/neighbors/ivf_pq.cpp b/c/src/neighbors/ivf_pq.cpp
index 635ef1ad42..cde2017b61 100644
--- a/c/src/neighbors/ivf_pq.cpp
+++ b/c/src/neighbors/ivf_pq.cpp
@@ -201,6 +201,15 @@ void _unpack_contiguous_list_data(cuvsResources_t res,
   cuvs::neighbors::ivf_pq::helpers::codepacker::unpack_contiguous_list_data(
     *res_ptr, *index_ptr, mds.data_handle(), mds.extent(0), label, offset);
 }
+
+template <typename IdxT>
+void _get_list_indices(cuvsIvfPqIndex index,
+                       uint32_t label,
+                       DLManagedTensor* out_labels)
+{
+  auto index_ptr    = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
+  cuvs::core::to_dlpack(index_ptr->lists()[label]->indices.view(), out_labels);
+}
 }  // namespace
 
 extern "C" cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index)
@@ -445,3 +454,11 @@ extern "C" cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t re
   return cuvs::core::translate_exceptions(
     [=] { _unpack_contiguous_list_data<int64_t>(res, *index, out_codes, label, offset); });
 }
+
+extern "C" cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
+                                                    uint32_t label,
+                                                    DLManagedTensor* out_labels)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { _get_list_indices<int64_t>(*index, label, out_labels); });
+}
diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd
index e67ba3e398..3774983102 100644
--- a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd
+++ b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pxd
@@ -115,6 +115,10 @@ cdef extern from "cuvs/neighbors/ivf_pq.h" nogil:
                                                        uint32_t label,
                                                        uint32_t offset)
 
+    cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
+                                             uint32_t label,
+                                             DLManagedTensor* out)
+
     cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
                                cuvsIvfPqIndexParams* params,
                                DLManagedTensor* dataset,
diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx
index ccb6f76422..c3395d1a7f 100644
--- a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx
+++ b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx
@@ -344,7 +344,10 @@ cdef class Index:
         """
         list_sizes = self.list_sizes.copy_to_host()
         for i, list_size in enumerate(list_sizes):
-            yield self.list_data(i, n_rows=list_size, resources=resources)
+            indices = self.list_indices(i, n_rows=list_size)
+            list_data = self.list_data(i, n_rows=list_size,
+                                       resources=resources)
+            yield indices, list_data
 
     @auto_sync_resources
     def list_data(self, label, n_rows=0, offset=0, out_codes=None,
@@ -387,6 +390,30 @@ cdef class Index:
                                                           offset))
         return out_codes
 
+    def list_indices(self, label, n_rows=0):
+        """ Gets indices for a single cluster (list)
+
+        Parameters
+        ----------
+        label, int:
+            The cluster to get data for
+        n_rows, int, optional
+            Number of rows in the list
+        """
+        output = DeviceTensorView()
+        cdef cydlpack.DLManagedTensor * tensor = \
+            <cydlpack.DLManagedTensor*><size_t>output.get_handle()
+        check_cuvs(cuvsIvfPqIndexGetListIndices(self.index, label, tensor))
+        output.parent = self
+
+        # the indices tensor being returned here is larger than the number of
+        # rows in the actual list, and the remaining values are padded out
+        # with -1.
+        # fix this by slicing down to the number of rows in the actual list
+        if n_rows == 0:
+            n_rows = self.list_sizes.copy_to_host()[label]
+        return output.slice_rows(0, n_rows)
+
 
 @auto_sync_resources
 def build(IndexParams index_params, dataset, resources=None):
diff --git a/python/cuvs/cuvs/tests/test_ivf_pq.py b/python/cuvs/cuvs/tests/test_ivf_pq.py
index a3eb8a9df4..f45379a002 100644
--- a/python/cuvs/cuvs/tests/test_ivf_pq.py
+++ b/python/cuvs/cuvs/tests/test_ivf_pq.py
@@ -127,6 +127,11 @@ def run_ivf_pq_build_search_test(
     assert len(pq_centers.shape) == 3
     assert pq_centers.shape[2] == 1 << pq_bits
 
+    all_list_ids = set()
+    for list_ids, list_data in index.lists():
+        all_list_ids.update(list_ids.copy_to_host())
+    assert all_list_ids == set(np.arange(n_rows))
+
     if not compare:
         return
 

From 4662187228e50ffb6093cf55427dd1829072c61f Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 17 Nov 2025 10:57:56 -0800
Subject: [PATCH 3/3] code review feedback

---
 c/src/core/c_api.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/c/src/core/c_api.cpp b/c/src/core/c_api.cpp
index acb1e15749..2d83f31b9b 100644
--- a/c/src/core/c_api.cpp
+++ b/c/src/core/c_api.cpp
@@ -263,7 +263,7 @@ void _copy_matrix(cuvsResources_t res, DLManagedTensor* src_managed, DLManagedTe
     for (int64_t i = 0; i < src.ndim; ++i) {
       elements *= src.shape[i];
     }
-    cudaMemcpyAsync(dst.data, src.data, elements * sizeof(T), cudaMemcpyDefault, stream);
+    raft::copy<T>(static_cast<T*>(dst.data), static_cast<const T*>(src.data), elements, stream);
   }
 }
 }  // namespace