Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ repos:
- id: verify-copyright
files: |
(?x)
[.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs)$|
[.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs|java)$|
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

CMakeLists[.]txt$|
CMakeLists_standalone[.]txt$|
meta[.]yaml$
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cuvs/cluster/kmeans.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extern "C" {
* @{
*/

enum cuvsKMeansInitMethod {
typedef enum {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

/**
* Sample the centroids using the kmeans++ strategy
*/
Expand All @@ -45,7 +45,7 @@ enum cuvsKMeansInitMethod {
* User provides the array of initial centroids
*/
Array
};
} cuvsKMeansInitMethod;

/**
* @brief Hyper-parameters for the kmeans algorithm
Expand Down
29 changes: 29 additions & 0 deletions cpp/include/cuvs/core/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cuda_runtime.h>
#include <dlpack/dlpack.h>
#include <stdbool.h>
#include <stdint.h>

Expand Down Expand Up @@ -176,6 +177,34 @@ cuvsError_t cuvsRMMHostFree(void* ptr, size_t bytes);
*/
cuvsError_t cuvsVersionGet(uint16_t* major, uint16_t* minor, uint16_t* patch);

/**
* @brief Copy a matrix
*
* This function copies a matrix from dst to src. This lets you copy a matrix
* from device memory to host memory (or vice versa), while accounting for
* differences in strides.
*
* Both src and dst must have the same shape and dtype, but can have different
* strides and device type. The memory for the output dst tensor must already be
* allocated and the tensor initialized.
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] src Pointer to DLManagedTensor to copy
* @param[out] dst Pointer to DLManagedTensor to receive copy of data
*/
cuvsError_t cuvsMatrixCopy(cuvsResources_t res, DLManagedTensor* src, DLManagedTensor* dst);

/**
* @brief Slices rows from a matrix
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] src Pointer to DLManagedTensor to copy
* @param[in] start First row index to include in the output
* @param[in] end Last row index to include in the output
* @param[out] dst Pointer to DLManagedTensor to receive slice from matrix
*/
cuvsError_t cuvsMatrixSliceRows(
cuvsResources_t res, DLManagedTensor* src, int64_t start, int64_t end, DLManagedTensor* dst);
/** @} */

#ifdef __cplusplus
Expand Down
24 changes: 16 additions & 8 deletions cpp/include/cuvs/core/detail/interop.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -143,9 +143,10 @@ inline bool is_c_contiguous(DLManagedTensor* managed_tensor)

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function"
static void free_dlmanaged_tensor_shape(DLManagedTensor* tensor)
static void free_dlmanaged_tensor_metadata(DLManagedTensor* tensor)
{
delete[] tensor->dl_tensor.shape;
delete[] tensor->dl_tensor.strides;
}
#pragma GCC diagnostic pop

Expand All @@ -157,14 +158,21 @@ static void to_dlpack(MdspanType src, DLManagedTensor* dst)
tensor->dtype = data_type_to_DLDataType<typename MdspanType::value_type>();
tensor->device = accessor_type_to_DLDevice<typename MdspanType::accessor_type>();
tensor->ndim = MdspanType::extents_type::rank();
tensor->data = src.data_handle();

tensor->shape = new int64_t[tensor->ndim];
dst->deleter = free_dlmanaged_tensor_shape;

tensor->data = const_cast<typename MdspanType::value_type*>(src.data_handle());
tensor->shape = new int64_t[tensor->ndim];
for (int64_t i = 0; i < tensor->ndim; ++i) {
tensor->shape[i] = src.extent(i);
}
}

if constexpr (std::is_same_v<typename MdspanType::layout_type, raft::row_major>) {
tensor->strides = nullptr;
} else {
tensor->strides = new int64_t[tensor->ndim];
for (int64_t i = 0; i < tensor->ndim; ++i) {
tensor->strides[i] = src.stride(i);
}
}

dst->deleter = free_dlmanaged_tensor_metadata;
}
} // namespace cuvs::core::detail
108 changes: 105 additions & 3 deletions cpp/include/cuvs/neighbors/cagra.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -358,6 +358,66 @@ cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
*/
cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim);

/**
* @brief Get size of the CAGRA index
*
* @param[in] index CAGRA index
* @param[out] size return number of vectors in the index
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, uint32_t* size);

/**
* @brief Get graph degree of the CAGRA index
*
* @param[in] index CAGRA index
* @param[out] graph_degree return graph degree
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, uint32_t* graph_degree);

/**
* @brief Returns a view of the CAGRA dataset
*
* This function returns a non-owning view of the CAGRA dataset.
* The output will be referencing device memory that is directly used
* in CAGRA, without copying the dataset at all. This means that the
* output is only valid as long as the CAGRA index is alive, and once
* cuvsCagraIndexDestroy is called on the cagra index - the returned
* dataset view will be invalid.
*
* Note that the DLManagedTensor dataset returned will have an associated
* 'deleter' function that must be called when the dataset is no longer
* needed. This will free up host memory that stores the shape of the
* dataset view.
*
* @param[in] index CAGRA index
* @param[out] dataset the dataset used in cagra
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetDataset(cuvsCagraIndex_t index, DLManagedTensor* dataset);

/**
* @brief Returns a view of the CAGRA graph
*
* This function returns a non-owning view of the CAGRA graph.
* The output will be referencing device memory that is directly used
* in CAGRA, without copying the graph at all. This means that the
* output is only valid as long as the CAGRA index is alive, and once
* cuvsCagraIndexDestroy is called on the cagra index - the returned
* graph view will be invalid.
*
* Note that the DLManagedTensor graph returned will have an associated
* 'deleter' function that must be called when the graph is no longer
* needed. This will free up host memory that stores the metadata for the
* graph view.
*
* @param[in] index CAGRA index
* @param[out] graph the output knn graph.
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetGraph(cuvsCagraIndex_t index, DLManagedTensor* graph);

/**
* @}
*/
Expand Down Expand Up @@ -434,7 +494,8 @@ cuvsError_t cuvsCagraMergeParamsDestroy(cuvsCagraMergeParams_t params);
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params cuvsCagraIndexParams_t used to build CAGRA index
* @param[in] dataset DLManagedTensor* training dataset
* @param[out] index cuvsCagraIndex_t Newly built CAGRA index
* @param[inout] index cuvsCagraIndex_t Newly built CAGRA index. This index needs to be already
* created with cuvsCagraIndexCreate.
* @return cuvsError_t
*/
cuvsError_t cuvsCagraBuild(cuvsResources_t res,
Expand Down Expand Up @@ -609,10 +670,51 @@ cuvsError_t cuvsCagraSerializeToHnswlib(cuvsResources_t res,
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] filename the name of the file that stores the index
* @param[out] index CAGRA index loaded disk
* @param[inout] index cuvsCagraIndex_t CAGRA index loaded from disk. This index needs to be already
* created with cuvsCagraIndexCreate.
*/
cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, const char* filename, cuvsCagraIndex_t index);

/**
* Load index from a dataset and graph
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] metric cuvsDistanceType to use in the index
* @param[in] graph the knn graph to use, shape (size, graph_degree)
* @param[in] dataset the dataset to use, shape (size, dim)
* @param[inout] index cuvsCagraIndex_t CAGRA index populated with the graph and dataset.
* This index needs to be already created with
* cuvsCagraIndexCreate.
*
* @code {.c}
* #include <cuvs/core/c_api.h>
* #include <cuvs/neighbors/cagra.h>
*
* // Create cuvsResources_t
* cuvsResources_t res;
* cuvsError_t res_create_status = cuvsResourcesCreate(&res);
*
* // Create CAGRA index
* cuvsCagraIndex_t index;
* cuvsError_t index_create_status = cuvsCagraIndexCreate(&index);
*
* // Assume a populated `DLManagedTensor` type here for the graph and dataset
* DLManagedTensor dataset;
* DLManagedTensor graph;
*
* cuvsDistanceType metric = L2Expanded;
*
* // Build the CAGRA Index from the graph/dataset
* cuvsError_t status = cuvsCagraIndexFromArgs(res, metric, &graph, &dataset, index);
*
* @endcode
*/
cuvsError_t cuvsCagraIndexFromArgs(cuvsResources_t res,
cuvsDistanceType metric,
DLManagedTensor* graph,
DLManagedTensor* dataset,
cuvsCagraIndex_t index);

/**
* @brief Merge multiple CAGRA indices into a single CAGRA index.
*
Expand Down
115 changes: 115 additions & 0 deletions cpp/src/core/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <raft/core/resource/cuda_stream.hpp>
#include <raft/core/resources.hpp>
#include <raft/util/cudart_utils.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/managed_memory_resource.hpp>
Expand Down Expand Up @@ -165,3 +166,117 @@ extern "C" cuvsError_t cuvsVersionGet(uint16_t* major, uint16_t* minor, uint16_t
*patch = CUVS_VERSION_PATCH;
return CUVS_SUCCESS;
}

namespace {
template <typename T>
void _copy_matrix(cuvsResources_t res, DLManagedTensor* src_managed, DLManagedTensor* dst_managed)
{
DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;

int64_t src_row_stride = src.strides == nullptr ? src.shape[1] : src.strides[0];
int64_t dst_row_stride = dst.strides == nullptr ? dst.shape[1] : dst.strides[0];
auto res_ptr = reinterpret_cast<raft::resources*>(res);

raft::copy_matrix<T>(static_cast<T*>(dst.data),
dst_row_stride,
static_cast<const T*>(src.data),
src_row_stride,
src.shape[1],
src.shape[0],
raft::resource::get_cuda_stream(*res_ptr));
}
} // namespace

extern "C" cuvsError_t cuvsMatrixCopy(cuvsResources_t res,
DLManagedTensor* src_managed,
DLManagedTensor* dst_managed)
{
return cuvs::core::translate_exceptions([=] {
DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;

RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");
RAFT_EXPECTS(dst.ndim == 2, "dst should be a 2 dimensional tensor");

for (int64_t i = 0; i < src.ndim; ++i) {
RAFT_EXPECTS(src.shape[i] == dst.shape[i], "shape mismatch between src and dst tensors");
}
RAFT_EXPECTS(src.dtype.code == dst.dtype.code, "dtype mismatch between src and dst tensors");

// at some point we could probably copy from a float32 to a float16 here, but for the
// moment this isn't supported
RAFT_EXPECTS(src.dtype.bits == dst.dtype.bits,
"dtype bits width mismatch between src and dst tensors");

if (src.dtype.code == kDLFloat && src.dtype.bits == 32) {
_copy_matrix<float>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLFloat && src.dtype.bits == 16) {
_copy_matrix<half>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLFloat && src.dtype.bits == 64) {
_copy_matrix<double>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLInt && src.dtype.bits == 8) {
_copy_matrix<int8_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLInt && src.dtype.bits == 16) {
_copy_matrix<int16_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLInt && src.dtype.bits == 32) {
_copy_matrix<int32_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLInt && src.dtype.bits == 64) {
_copy_matrix<int64_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLUInt && src.dtype.bits == 8) {
_copy_matrix<uint8_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLUInt && src.dtype.bits == 16) {
_copy_matrix<uint16_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLUInt && src.dtype.bits == 32) {
_copy_matrix<uint32_t>(res, src_managed, dst_managed);
} else if (src.dtype.code == kDLUInt && src.dtype.bits == 64) {
_copy_matrix<uint64_t>(res, src_managed, dst_managed);
} else {
RAFT_FAIL("Unsupported dtype: %d and bits: %d", src.dtype.code, src.dtype.bits);
}
});
}

extern "C" void cuvsMatrixDestroy(DLManagedTensor* tensor)
{
if (tensor->dl_tensor.shape != nullptr) {
delete[] tensor->dl_tensor.shape;
tensor->dl_tensor.shape = nullptr;
}
if (tensor->dl_tensor.strides != nullptr) {
delete[] tensor->dl_tensor.strides;
tensor->dl_tensor.strides = nullptr;
}
}

extern "C" cuvsError_t cuvsMatrixSliceRows(cuvsResources_t res,
DLManagedTensor* src_managed,
int64_t start,
int64_t end,
DLManagedTensor* dst_managed)
{
return cuvs::core::translate_exceptions([=] {
RAFT_EXPECTS(end >= start, "end index must be greater than start index");

DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;
RAFT_EXPECTS(src.ndim == 2, "src should be a 2 dimensional tensor");

dst.dtype = src.dtype;
dst.device = src.device;
dst.ndim = 2;
dst.shape = new int64_t[2];
dst.shape[0] = end - start;
dst.shape[1] = src.shape[1];

int64_t row_strides = dst.shape[1];
if (src.strides) {
dst.strides = new int64_t[2];
row_strides = dst.strides[0] = src.strides[0];
dst.strides[1] = src.strides[1];
}

dst.data = static_cast<char*>(src.data) + start * row_strides * (dst.dtype.bits / 8);
dst_managed->deleter = cuvsMatrixDestroy;
});
}
Loading