Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ repos:
- id: verify-copyright
files: |
(?x)
[.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs)$|
[.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs|java)$|
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

CMakeLists[.]txt$|
CMakeLists_standalone[.]txt$|
meta[.]yaml$
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cuvs/cluster/kmeans.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extern "C" {
* @{
*/

enum cuvsKMeansInitMethod {
typedef enum {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

/**
* Sample the centroids using the kmeans++ strategy
*/
Expand All @@ -45,7 +45,7 @@ enum cuvsKMeansInitMethod {
* User provides the array of initial centroids
*/
Array
};
} cuvsKMeansInitMethod;

/**
* @brief Hyper-parameters for the kmeans algorithm
Expand Down
29 changes: 29 additions & 0 deletions cpp/include/cuvs/core/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cuda_runtime.h>
#include <dlpack/dlpack.h>
#include <stdbool.h>
#include <stdint.h>

Expand Down Expand Up @@ -176,6 +177,34 @@ cuvsError_t cuvsRMMHostFree(void* ptr, size_t bytes);
*/
cuvsError_t cuvsVersionGet(uint16_t* major, uint16_t* minor, uint16_t* patch);

/**
* @brief Copy a matrix
*
* This function copies a matrix from dst to src. This lets you copy a matrix
* from device memory to host memory (or vice versa), while accounting for
* differences in strides.
*
* Both src and dst must have the same shape and dtype, but can have different
* strides and device type. The memory for the output dst tensor must already be
* allocated and the tensor initialized.
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] src Pointer to DLManagedTensor to copy
* @param[out] dst Pointer to DLManagedTensor to receive copy of data
*/
cuvsError_t cuvsMatrixCopy(cuvsResources_t res, DLManagedTensor* src, DLManagedTensor* dst);

/**
* @brief Slices rows from a matrix
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] src Pointer to DLManagedTensor to copy
* @param[in] start First row index to include in the output
* @param[in] end Last row index to include in the output
* @param[out] dst Pointer to DLManagedTensor to receive slice from matrix
*/
cuvsError_t cuvsMatrixSliceRows(
cuvsResources_t res, DLManagedTensor* src, int64_t start, int64_t end, DLManagedTensor* dst);
/** @} */

#ifdef __cplusplus
Expand Down
24 changes: 16 additions & 8 deletions cpp/include/cuvs/core/detail/interop.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -143,9 +143,10 @@ inline bool is_c_contiguous(DLManagedTensor* managed_tensor)

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function"
static void free_dlmanaged_tensor_shape(DLManagedTensor* tensor)
static void free_dlmanaged_tensor_metadata(DLManagedTensor* tensor)
{
delete[] tensor->dl_tensor.shape;
delete[] tensor->dl_tensor.strides;
}
#pragma GCC diagnostic pop

Expand All @@ -157,14 +158,21 @@ static void to_dlpack(MdspanType src, DLManagedTensor* dst)
tensor->dtype = data_type_to_DLDataType<typename MdspanType::value_type>();
tensor->device = accessor_type_to_DLDevice<typename MdspanType::accessor_type>();
tensor->ndim = MdspanType::extents_type::rank();
tensor->data = src.data_handle();

tensor->shape = new int64_t[tensor->ndim];
dst->deleter = free_dlmanaged_tensor_shape;

tensor->data = const_cast<typename MdspanType::value_type*>(src.data_handle());
tensor->shape = new int64_t[tensor->ndim];
for (int64_t i = 0; i < tensor->ndim; ++i) {
tensor->shape[i] = src.extent(i);
}
}

if constexpr (std::is_same_v<typename MdspanType::layout_type, raft::row_major>) {
tensor->strides = nullptr;
} else {
tensor->strides = new int64_t[tensor->ndim];
for (int64_t i = 0; i < tensor->ndim; ++i) {
tensor->strides[i] = src.stride(i);
}
}

dst->deleter = free_dlmanaged_tensor_metadata;
}
} // namespace cuvs::core::detail
108 changes: 105 additions & 3 deletions cpp/include/cuvs/neighbors/cagra.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -358,6 +358,66 @@ cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
*/
cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim);

/**
* @brief Get size of the CAGRA index
*
* @param[in] index CAGRA index
* @param[out] size return number of vectors in the index
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, uint32_t* size);

/**
* @brief Get graph degree of the CAGRA index
*
* @param[in] index CAGRA index
* @param[out] graph_degree return graph degree
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, uint32_t* graph_degree);

/**
* @brief Returns a view of the CAGRA dataset
*
* This function returns a non-owning view of the CAGRA dataset.
* The output will be referencing device memory that is directly used
* in CAGRA, without copying the dataset at all. This means that the
* output is only valid as long as the CAGRA index is alive, and once
* cuvsCagraIndexDestroy is called on the cagra index - the returned
* dataset view will be invalid.
*
* Note that the DLManagedTensor dataset returned will have an associated
* 'deleter' function that must be called when the dataset is no longer
* needed. This will free up host memory that stores the shape of the
* dataset view.
*
* @param[in] index CAGRA index
* @param[out] dataset the dataset used in cagra
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetDataset(cuvsCagraIndex_t index, DLManagedTensor* dataset);

/**
* @brief Returns a view of the CAGRA graph
*
* This function returns a non-owning view of the CAGRA graph.
* The output will be referencing device memory that is directly used
* in CAGRA, without copying the graph at all. This means that the
* output is only valid as long as the CAGRA index is alive, and once
* cuvsCagraIndexDestroy is called on the cagra index - the returned
* graph view will be invalid.
*
* Note that the DLManagedTensor graph returned will have an associated
* 'deleter' function that must be called when the graph is no longer
* needed. This will free up host memory that stores the metadata for the
* graph view.
*
* @param[in] index CAGRA index
* @param[out] graph the output knn graph.
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetGraph(cuvsCagraIndex_t index, DLManagedTensor* graph);

/**
* @}
*/
Expand Down Expand Up @@ -434,7 +494,8 @@ cuvsError_t cuvsCagraMergeParamsDestroy(cuvsCagraMergeParams_t params);
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params cuvsCagraIndexParams_t used to build CAGRA index
* @param[in] dataset DLManagedTensor* training dataset
* @param[out] index cuvsCagraIndex_t Newly built CAGRA index
* @param[inout] index cuvsCagraIndex_t Newly built CAGRA index. This index needs to be already
* created with cuvsCagraIndexCreate.
* @return cuvsError_t
*/
cuvsError_t cuvsCagraBuild(cuvsResources_t res,
Expand Down Expand Up @@ -609,10 +670,51 @@ cuvsError_t cuvsCagraSerializeToHnswlib(cuvsResources_t res,
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] filename the name of the file that stores the index
* @param[out] index CAGRA index loaded disk
* @param[inout] index cuvsCagraIndex_t CAGRA index loaded from disk. This index needs to be already
* created with cuvsCagraIndexCreate.
*/
cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, const char* filename, cuvsCagraIndex_t index);

/**
* Load index from a dataset and graph
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] metric cuvsDistanceType to use in the index
* @param[in] graph the knn graph to use, shape (size, graph_degree)
* @param[in] dataset the dataset to use, shape (size, dim)
* @param[inout] index cuvsCagraIndex_t CAGRA index populated with the graph and dataset.
* This index needs to be already created with
* cuvsCagraIndexCreate.
*
* @code {.c}
* #include <cuvs/core/c_api.h>
* #include <cuvs/neighbors/cagra.h>
*
* // Create cuvsResources_t
* cuvsResources_t res;
* cuvsError_t res_create_status = cuvsResourcesCreate(&res);
*
* // Create CAGRA index
* cuvsCagraIndex_t index;
* cuvsError_t index_create_status = cuvsCagraIndexCreate(&index);
*
* // Assume a populated `DLManagedTensor` type here for the graph and dataset
* DLManagedTensor dataset;
* DLManagedTensor graph;
*
* cuvsDistanceType metric = L2Expanded;
*
* // Build the CAGRA Index from the graph/dataset
* cuvsError_t status = cuvsCagraIndexFromGraph(res, metric, &graph, &dataset, index);
*
* @endcode
*/
cuvsError_t cuvsCagraIndexFromGraph(cuvsResources_t res,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick- can we please rename this to cuvsCagraIndexFromParams or cuvsCagraIndexFromArgs? I'd like to keep tthe API design consistent and having to specify specific args in the name will get unwieldy quickly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed to cuvsCagraIndexFromArgs in the last commit - (went with FromArgs instead of FromParams - since I think the FromParams could be confused with the Index Params we use to build the index)

cuvsDistanceType metric,
DLManagedTensor* graph,
DLManagedTensor* dataset,
cuvsCagraIndex_t index);

/**
* @brief Merge multiple CAGRA indices into a single CAGRA index.
*
Expand Down
115 changes: 115 additions & 0 deletions cpp/src/core/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <raft/core/resource/cuda_stream.hpp>
#include <raft/core/resources.hpp>
#include <raft/util/cudart_utils.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/managed_memory_resource.hpp>
Expand Down Expand Up @@ -165,3 +166,117 @@ extern "C" cuvsError_t cuvsVersionGet(uint16_t* major, uint16_t* minor, uint16_t
*patch = CUVS_VERSION_PATCH;
return CUVS_SUCCESS;
}

namespace {
template <typename T>
void _copy_matrix(cuvsResources_t res, DLManagedTensor* src_managed, DLManagedTensor* dst_managed)
{
DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;

int64_t src_row_stride = src.strides == nullptr ? src.shape[1] : src.strides[0];
int64_t dst_row_stride = dst.strides == nullptr ? dst.shape[1] : dst.strides[0];
auto res_ptr = reinterpret_cast<raft::resources*>(res);

raft::copy_matrix<T>(static_cast<T*>(dst.data),
dst_row_stride,
static_cast<const T*>(src.data),
src_row_stride,
src.shape[1],
src.shape[0],
raft::resource::get_cuda_stream(*res_ptr));
}
} // namespace

extern "C" cuvsError_t cuvsMatrixCopy(cuvsResources_t res,
DLManagedTensor* src_managed,
DLManagedTensor* dst_managed)
{
return cuvs::core::translate_exceptions([=] {
DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;

RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");
RAFT_EXPECTS(dst.ndim == 2, "dst should be a 2 dimensional tensor");

for (int64_t i = 0; i < src.ndim; ++i) {
RAFT_EXPECTS(src.shape[i] == dst.shape[i], "shape mismatch between src and dst tensors");
}
RAFT_EXPECTS(src.dtype.code == dst.dtype.code, "dtype mismatch between src and dst tensors");

// at some point we could probably copy from a float32 to a float16 here, but for the
// moment this isn't supported
RAFT_EXPECTS(src.dtype.bits == dst.dtype.bits,
"dtype bits width mismatch between src and dst tensors");

if (src.dtype.code == kDLFloat && src.dtype.bits == 32) {
_copy_matrix<float>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLFloat && src.dtype.bits == 16) {
_copy_matrix<half>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLFloat && src.dtype.bits == 64) {
_copy_matrix<double>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLInt && src.dtype.bits == 8) {
_copy_matrix<int8_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLInt && src.dtype.bits == 16) {
_copy_matrix<int16_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLInt && src.dtype.bits == 32) {
_copy_matrix<int32_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLInt && src.dtype.bits == 64) {
_copy_matrix<int64_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLUInt && src.dtype.bits == 8) {
_copy_matrix<uint8_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLUInt && src.dtype.bits == 16) {
_copy_matrix<uint16_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLUInt && src.dtype.bits == 32) {
_copy_matrix<uint32_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLUInt && src.dtype.bits == 64) {
_copy_matrix<uint64_t>(res, src_managed, dst_managed);
} else {
RAFT_FAIL("Unsupported dtype: %d and bits: %d", src.dtype.code, src.dtype.bits);
}
});
}

extern "C" void cuvsMatrixDestroy(DLManagedTensor* tensor)
{
if (tensor->dl_tensor.shape != nullptr) {
delete[] tensor->dl_tensor.shape;
tensor->dl_tensor.shape = nullptr;
}
if (tensor->dl_tensor.strides != nullptr) {
delete[] tensor->dl_tensor.strides;
tensor->dl_tensor.strides = nullptr;
}
}

extern "C" cuvsError_t cuvsMatrixSliceRows(cuvsResources_t res,
DLManagedTensor* src_managed,
int64_t start,
int64_t end,
DLManagedTensor* dst_managed)
{
return cuvs::core::translate_exceptions([=] {
RAFT_EXPECTS(end >= start, "end index must be greater than start index");

DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;
RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");

dst.dtype = src.dtype;
dst.device = src.device;
dst.ndim = 2;
dst.shape = new int64_t[2];
dst.shape[0] = end - start;
dst.shape[1] = src.shape[1];

int64_t row_strides = dst.shape[1];
if (src.strides) {
dst.strides = new int64_t[2];
row_strides = dst.strides[0] = src.strides[0];
dst.strides[1] = src.strides[1];
}

dst.data = static_cast<char*>(src.data) + start * row_strides * (dst.dtype.bits / 8);
dst_managed->deleter = cuvsMatrixDestroy;
});
}
Loading