Skip to content

Commit d2e2be9

Browse files
authored
Merge branch 'branch-25.06' into account-for-raft-update
2 parents da48376 + 23ba17b commit d2e2be9

File tree

18 files changed

+582
-306
lines changed

18 files changed

+582
-306
lines changed

.github/CODEOWNERS

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,6 @@ CMakeLists.txt @rapidsai/cuvs-cmake-codeowners
1515
**/cmake/ @rapidsai/cuvs-cmake-codeowners
1616
*.cmake @rapidsai/cuvs-cmake-codeowners
1717

18-
#build code owners
19-
python/setup.py @rapidsai/cuvs-build-codeowners
20-
build.sh @rapidsai/cuvs-build-codeowners
21-
**/build.sh @rapidsai/cuvs-build-codeowners
22-
2318
#CI code owners
2419
/.github/ @rapidsai/ci-codeowners
2520
/ci/ @rapidsai/ci-codeowners
@@ -31,3 +26,6 @@ build.sh @rapidsai/cuvs-build-codeowners
3126
dependencies.yaml @rapidsai/packaging-codeowners
3227
/build.sh @rapidsai/packaging-codeowners
3328
pyproject.toml @rapidsai/packaging-codeowners
29+
python/setup.py @rapidsai/packaging-codeowners
30+
build.sh @rapidsai/packaging-codeowners
31+
**/build.sh @rapidsai/packaging-codeowners

ci/build_wheel.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ if [[ "${package_dir}" != "python/libcuvs" ]]; then
3535
)
3636
fi
3737

38+
RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
39+
if [[ ${RAPIDS_CUDA_MAJOR} != "11" ]]; then
40+
EXCLUDE_ARGS+=(
41+
--exclude "libnccl.so.*"
42+
)
43+
export SKBUILD_CMAKE_ARGS="-DUSE_NCCL_RUNTIME_WHEEL=ON"
44+
fi
45+
3846
rapids-logger "Building '${package_name}' wheel"
3947

4048
sccache --zero-stats

ci/test_wheel_cuvs.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33

44
set -euo pipefail
55

6+
# Delete system libnccl.so to ensure the wheel is used
7+
RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
8+
if [[ ${RAPIDS_CUDA_MAJOR} != "11" ]]; then
9+
rm -rf /usr/lib64/libnccl*
10+
fi
11+
612
mkdir -p ./dist
713
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
814
RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libcuvs-dep

cpp/CMakeLists.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,15 +578,22 @@ if(BUILD_SHARED_LIBS)
578578
)
579579

580580
if(BUILD_MG_ALGOS)
581-
set(CUVS_COMMS_DEPENDENCY nccl)
581+
rapids_find_generate_module(
582+
NCCL
583+
HEADER_NAMES nccl.h
584+
LIBRARY_NAMES nccl
585+
)
586+
find_package(NCCL REQUIRED)
587+
target_link_libraries(cuvs_objs PRIVATE NCCL::NCCL)
588+
target_link_libraries(cuvs PRIVATE NCCL::NCCL)
582589
endif()
583590

584591
# Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target.
585592
target_link_libraries(
586593
cuvs
587594
PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES}
588595
PRIVATE nvidia::cutlass::cutlass $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
589-
cuvs-cagra-search ${CUVS_COMMS_DEPENDENCY}
596+
cuvs-cagra-search
590597
)
591598

592599
if(NOT CUVS_COMPILE_DYNAMIC_ONLY)

cpp/include/cuvs/core/detail/interop.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,4 +141,30 @@ inline bool is_c_contiguous(DLManagedTensor* managed_tensor)
141141
return true;
142142
}
143143

144+
#pragma GCC diagnostic push
145+
#pragma GCC diagnostic ignored "-Wunused-function"
146+
static void free_dlmanaged_tensor_shape(DLManagedTensor* tensor)
147+
{
148+
delete[] tensor->dl_tensor.shape;
149+
}
150+
#pragma GCC diagnostic pop
151+
152+
template <typename MdspanType, typename = raft::is_mdspan_t<MdspanType>>
153+
static void to_dlpack(MdspanType src, DLManagedTensor* dst)
154+
{
155+
auto tensor = &dst->dl_tensor;
156+
157+
tensor->dtype = data_type_to_DLDataType<typename MdspanType::value_type>();
158+
tensor->device = accessor_type_to_DLDevice<typename MdspanType::accessor_type>();
159+
tensor->ndim = MdspanType::extents_type::rank();
160+
tensor->data = src.data_handle();
161+
162+
tensor->shape = new int64_t[tensor->ndim];
163+
dst->deleter = free_dlmanaged_tensor_shape;
164+
165+
for (int64_t i = 0; i < tensor->ndim; ++i) {
166+
tensor->shape[i] = src.extent(i);
167+
}
168+
}
169+
144170
} // namespace cuvs::core::detail

cpp/include/cuvs/core/interop.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_conti
6868
inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); }
6969

7070
/**
71-
* @brief Convert a DLManagedTensor to an mdspan
71+
* @brief Convert a DLManagedTensor to a mdspan
7272
* NOTE: This function only supports compact row-major and col-major layouts.
7373
*
7474
* @code {.cpp}
@@ -93,6 +93,19 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor)
9393
return detail::from_dlpack<MdspanType>(managed_tensor);
9494
}
9595

96+
/**
97+
* @brief Convert a mdspan to a DLManagedTensor
98+
*
99+
* Converts a mdspan to a DLManagedTensor object. This lets us pass non-owning
100+
* views from C++ to C code without copying. Note that returned DLManagedTensor
101+
* is a non-owning view, and doesn't ensure that the underlying memory stays valid.
102+
*/
103+
template <typename MdspanType, typename = raft::is_mdspan_t<MdspanType>>
104+
void to_dlpack(MdspanType src, DLManagedTensor* dst)
105+
{
106+
return detail::to_dlpack(src, dst);
107+
}
108+
96109
/**
97110
* @}
98111
*/

cpp/include/cuvs/neighbors/nn_descent.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,29 @@ cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
171171
/**
172172
* @brief Get the KNN graph from a built NN-Descent index
173173
*
174+
* @param[in] res cuvsResources_t opaque C handle
174175
* @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
175-
* @param[inout] graph Optional preallocated graph on host memory to store output
176+
* @param[out] graph Preallocated graph on host memory to store output
177+
* @return cuvsError_t
178+
*/
179+
cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
180+
cuvsNNDescentIndex_t index,
181+
DLManagedTensor* graph);
182+
183+
/**
184+
* @brief Get the distances from a build NN_Descent index
185+
*
186+
* This requires that the `return_distances` parameter was set when building the
187+
* graph
188+
*
189+
* @param[in] res cuvsResources_t opaque C handle
190+
* @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
191+
* @param[out] distances Preallocated memory to store the output distances tensor
176192
* @return cuvsError_t
177193
*/
178-
cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index, DLManagedTensor* graph);
194+
cuvsError_t cuvsNNDescentIndexGetDistances(cuvsResources_t res,
195+
cuvsNNDescentIndex_t index,
196+
DLManagedTensor* distances);
179197
#ifdef __cplusplus
180198
}
181199
#endif

0 commit comments

Comments
 (0)