rapidsai
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 3 additions & 5 deletions b/‎.github/CODEOWNERS‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎ci/build_wheel.sh‎
Lines changed: 8 additions & 0 deletions b/‎ci/build_wheel.sh‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎ci/test_wheel_cuvs.sh‎
Lines changed: 6 additions & 0 deletions b/‎ci/test_wheel_cuvs.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎cpp/CMakeLists.txt‎
Lines changed: 9 additions & 2 deletions b/‎cpp/CMakeLists.txt‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎cpp/include/cuvs/core/detail/interop.hpp‎
Lines changed: 26 additions & 0 deletions b/‎cpp/include/cuvs/core/detail/interop.hpp‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎cpp/include/cuvs/core/interop.hpp‎
Lines changed: 14 additions & 1 deletion b/‎cpp/include/cuvs/core/interop.hpp‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎cpp/include/cuvs/neighbors/nn_descent.h‎
Lines changed: 20 additions & 2 deletions b/‎cpp/include/cuvs/neighbors/nn_descent.h‎
Lines changed: 20 additions & 2 deletions
@@ -15,11 +15,6 @@ CMakeLists.txt     @rapidsai/cuvs-cmake-codeowners
 **/cmake/          @rapidsai/cuvs-cmake-codeowners
 *.cmake            @rapidsai/cuvs-cmake-codeowners
 
-#build code owners
-python/setup.py    @rapidsai/cuvs-build-codeowners
-build.sh           @rapidsai/cuvs-build-codeowners
-**/build.sh        @rapidsai/cuvs-build-codeowners
-
 #CI code owners
 /.github/                @rapidsai/ci-codeowners
 /ci/                     @rapidsai/ci-codeowners
@@ -31,3 +26,6 @@ build.sh           @rapidsai/cuvs-build-codeowners
 dependencies.yaml        @rapidsai/packaging-codeowners
 /build.sh                @rapidsai/packaging-codeowners
 pyproject.toml           @rapidsai/packaging-codeowners
+python/setup.py          @rapidsai/packaging-codeowners
+build.sh                 @rapidsai/packaging-codeowners
+**/build.sh              @rapidsai/packaging-codeowners
@@ -35,6 +35,14 @@ if [[ "${package_dir}" != "python/libcuvs" ]]; then
     )
 fi
 
+RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
+if [[ ${RAPIDS_CUDA_MAJOR} != "11" ]]; then
+    EXCLUDE_ARGS+=(
+      --exclude "libnccl.so.*"
+    )
+    export SKBUILD_CMAKE_ARGS="-DUSE_NCCL_RUNTIME_WHEEL=ON"
+fi
+
 rapids-logger "Building '${package_name}' wheel"
 
 sccache --zero-stats
 
@@ -3,6 +3,12 @@
 
 set -euo pipefail
 
+# Delete system libnccl.so to ensure the wheel is used
+RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
+if [[ ${RAPIDS_CUDA_MAJOR} != "11" ]]; then
+  rm -rf /usr/lib64/libnccl*
+fi
+
 mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
 RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libcuvs-dep
 
@@ -578,15 +578,22 @@ if(BUILD_SHARED_LIBS)
     )
 
     if(BUILD_MG_ALGOS)
-      set(CUVS_COMMS_DEPENDENCY nccl)
+      rapids_find_generate_module(
+        NCCL
+        HEADER_NAMES nccl.h
+        LIBRARY_NAMES nccl
+      )
+      find_package(NCCL REQUIRED)
+      target_link_libraries(cuvs_objs PRIVATE NCCL::NCCL)
+      target_link_libraries(cuvs PRIVATE NCCL::NCCL)
     endif()
 
     # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target.
     target_link_libraries(
       cuvs
       PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES}
       PRIVATE nvidia::cutlass::cutlass $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
-              cuvs-cagra-search ${CUVS_COMMS_DEPENDENCY}
+              cuvs-cagra-search
     )
 
     if(NOT CUVS_COMPILE_DYNAMIC_ONLY)
 
@@ -141,4 +141,30 @@ inline bool is_c_contiguous(DLManagedTensor* managed_tensor)
   return true;
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+static void free_dlmanaged_tensor_shape(DLManagedTensor* tensor)
+{
+  delete[] tensor->dl_tensor.shape;
+}
+#pragma GCC diagnostic pop
+
+template <typename MdspanType, typename = raft::is_mdspan_t<MdspanType>>
+static void to_dlpack(MdspanType src, DLManagedTensor* dst)
+{
+  auto tensor = &dst->dl_tensor;
+
+  tensor->dtype  = data_type_to_DLDataType<typename MdspanType::value_type>();
+  tensor->device = accessor_type_to_DLDevice<typename MdspanType::accessor_type>();
+  tensor->ndim   = MdspanType::extents_type::rank();
+  tensor->data   = src.data_handle();
+
+  tensor->shape = new int64_t[tensor->ndim];
+  dst->deleter  = free_dlmanaged_tensor_shape;
+
+  for (int64_t i = 0; i < tensor->ndim; ++i) {
+    tensor->shape[i] = src.extent(i);
+  }
+}
+
 }  // namespace cuvs::core::detail
@@ -68,7 +68,7 @@ inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_conti
 inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); }
 
 /**
- * @brief Convert a DLManagedTensor to an mdspan
+ * @brief Convert a DLManagedTensor to a mdspan
  * NOTE: This function only supports compact row-major and col-major layouts.
  *
  * @code {.cpp}
@@ -93,6 +93,19 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor)
   return detail::from_dlpack<MdspanType>(managed_tensor);
 }
 
+/**
+ * @brief Convert a mdspan to a DLManagedTensor
+ *
+ * Converts a mdspan to a DLManagedTensor object. This lets us pass non-owning
+ * views from C++ to C code without copying.  Note that returned DLManagedTensor
+ * is a non-owning view, and doesn't ensure that the underlying memory stays valid.
+ */
+template <typename MdspanType, typename = raft::is_mdspan_t<MdspanType>>
+void to_dlpack(MdspanType src, DLManagedTensor* dst)
+{
+  return detail::to_dlpack(src, dst);
+}
+
 /**
  * @}
  */
 
@@ -171,11 +171,29 @@ cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
 /**
  * @brief Get the KNN graph from a built NN-Descent index
  *
+ * @param[in] res cuvsResources_t opaque C handle
  * @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
- * @param[inout] graph Optional preallocated graph on host memory to store output
+ * @param[out] graph Preallocated graph on host memory to store output
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
+                                       cuvsNNDescentIndex_t index,
+                                       DLManagedTensor* graph);
+
+/**
+ * @brief Get the distances from a build NN_Descent index
+ *
+ * This requires that the `return_distances` parameter was set when building the
+ * graph
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
+ * @param[out] distances Preallocated memory to store the output distances tensor
  * @return cuvsError_t
  */
-cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index, DLManagedTensor* graph);
+cuvsError_t cuvsNNDescentIndexGetDistances(cuvsResources_t res,
+                                           cuvsNNDescentIndex_t index,
+                                           DLManagedTensor* distances);
 #ifdef __cplusplus
 }
 #endif