Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f4574ad
MG C API
viclafargue Jul 23, 2025
d00bea2
Documentation
viclafargue Jul 31, 2025
3896985
Merge branch 'branch-25.10' into mg-c-api
cjnolet Jul 31, 2025
f8d28d0
switch to MultiGpu
viclafargue Aug 4, 2025
d854345
Renaming and include removal
viclafargue Aug 5, 2025
822b966
answering review
viclafargue Aug 5, 2025
fb4e2b6
fix template instantiation
viclafargue Aug 5, 2025
b1b23ca
Updating the serialization process
viclafargue Aug 5, 2025
7f4c590
copyright edit
viclafargue Aug 11, 2025
147dc33
Merge branch 'branch-25.10' into mg-c-api
viclafargue Aug 11, 2025
5e16895
Merge branch 'branch-25.10' into mg-c-api
cjnolet Aug 14, 2025
8db9b86
Add IVF-Flat half type to MG API
viclafargue Aug 15, 2025
94b0cd5
Merge branch 'branch-25.10' into mg-c-api
viclafargue Aug 18, 2025
650b221
Merge branch 'branch-25.10' into mg-c-api
cjnolet Aug 27, 2025
a41f17c
MG Python API
viclafargue Sep 2, 2025
30363b7
Merge branch 'branch-25.10' into mg-python-api
viclafargue Sep 2, 2025
69ffaf8
Adding documentation
viclafargue Sep 3, 2025
fd1cb41
Merge branch 'branch-25.10' into mg-python-api
cjnolet Sep 3, 2025
bb44474
Answering review
viclafargue Sep 4, 2025
1dfed37
Fixing issues
viclafargue Sep 9, 2025
3ed0192
Merge branch 'branch-25.10' into mg-python-api
viclafargue Sep 9, 2025
15b8f5f
Merge branch 'branch-25.10' into mg-python-api
viclafargue Sep 10, 2025
32b4d1c
Merge branch 'branch-25.10' into mg-python-api
cjnolet Sep 16, 2025
984c6f6
Merge branch 'branch-25.10' into mg-python-api
cjnolet Sep 17, 2025
6400c0f
Merge branch 'branch-25.10' into mg-python-api
viclafargue Sep 19, 2025
9528af3
correct merge typo
viclafargue Sep 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions cpp/include/cuvs/core/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,23 +75,6 @@ cuvsError_t cuvsResourcesCreate(cuvsResources_t* res);
*/
cuvsError_t cuvsResourcesDestroy(cuvsResources_t res);

/**
* @brief Create an Initialized opaque C handle for C++ type `raft::device_resources_snmg`
* for multi-GPU operations
*
* @param[in] res cuvsResources_t opaque C handle
* @return cuvsError_t
*/
cuvsError_t cuvsMultiGpuResourcesCreate(cuvsResources_t* res);

/**
* @brief Destroy and de-allocate opaque C handle for C++ type `raft::device_resources_snmg`
*
* @param[in] res cuvsResources_t opaque C handle
* @return cuvsError_t
*/
cuvsError_t cuvsMultiGpuResourcesDestroy(cuvsResources_t res);

/**
* @brief Set cudaStream_t on cuvsResources_t to queue CUDA kernels on APIs
* that accept a cuvsResources_t handle
Expand Down
4 changes: 2 additions & 2 deletions cpp/scripts/gitutils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -155,7 +155,7 @@ def uncommittedFiles():
ret = []
for f in files.splitlines():
f = f.strip(" ")
f = re.sub("\s+", " ", f) # noqa: W605
f = re.sub(r"\s+", " ", f) # noqa: W605
tmp = f.split(" ", 1)
# only consider staged files or uncommitted files
# in other words, ignore untracked files
Expand Down
50 changes: 48 additions & 2 deletions cpp/src/neighbors/mg_cagra_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,12 @@ extern "C" cuvsError_t cuvsMultiGpuCagraBuild(cuvsResources_t res,
cuvsMultiGpuCagraIndex_t index)
{
return cuvs::core::translate_exceptions([=] {
auto dataset = dataset_tensor->dl_tensor;
auto dataset = dataset_tensor->dl_tensor;

// Multi-GPU CAGRA requires dataset to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(dataset),
"Multi-GPU CAGRA build requires dataset to have host compatible memory");

index->dtype.code = dataset.dtype.code;
index->dtype.bits = dataset.dtype.bits;

Expand Down Expand Up @@ -295,7 +300,29 @@ extern "C" cuvsError_t cuvsMultiGpuCagraSearch(cuvsResources_t res,
DLManagedTensor* distances_tensor)
{
return cuvs::core::translate_exceptions([=] {
auto queries = queries_tensor->dl_tensor;
auto queries = queries_tensor->dl_tensor;
auto neighbors = neighbors_tensor->dl_tensor;
auto distances = distances_tensor->dl_tensor;

// Multi-GPU CAGRA requires all tensors to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(queries),
"Multi-GPU CAGRA search requires queries to have host compatible memory");
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(neighbors),
"Multi-GPU CAGRA search requires neighbors to have host compatible memory");
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(distances),
"Multi-GPU CAGRA search requires distances to have host compatible memory");

// Validate data types
RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64,
"neighbors should be of type int64_t");
RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32,
"distances should be of type float32");

// Check type compatibility between index and queries
RAFT_EXPECTS(queries.dtype.code == index->dtype.code,
"type mismatch between index and queries");
RAFT_EXPECTS(queries.dtype.bits == index->dtype.bits,
"type mismatch between index and queries");

if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
_mg_search<float>(res, *params, *index, queries_tensor, neighbors_tensor, distances_tensor);
Expand All @@ -321,6 +348,25 @@ extern "C" cuvsError_t cuvsMultiGpuCagraExtend(cuvsResources_t res,
return cuvs::core::translate_exceptions([=] {
auto vectors = new_vectors_tensor->dl_tensor;

// Multi-GPU CAGRA requires vectors to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(vectors),
"Multi-GPU CAGRA extend requires new_vectors to have host compatible memory");

// Check type compatibility between index and vectors
RAFT_EXPECTS(vectors.dtype.code == index->dtype.code,
"type mismatch between index and new_vectors");
RAFT_EXPECTS(vectors.dtype.bits == index->dtype.bits,
"type mismatch between index and new_vectors");

// If indices are provided, they should also be in host memory
if (new_indices_tensor != nullptr) {
auto indices = new_indices_tensor->dl_tensor;
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(indices),
"Multi-GPU CAGRA extend requires new_indices to have host compatible memory");
RAFT_EXPECTS(indices.dtype.code == kDLUInt && indices.dtype.bits == 32,
"new_indices should be of type uint32_t");
}

if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) {
_mg_extend<float>(res, *index, new_vectors_tensor, new_indices_tensor);
} else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) {
Expand Down
50 changes: 48 additions & 2 deletions cpp/src/neighbors/mg_ivf_flat_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,12 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatBuild(cuvsResources_t res,
cuvsMultiGpuIvfFlatIndex_t index)
{
return cuvs::core::translate_exceptions([=] {
auto dataset = dataset_tensor->dl_tensor;
auto dataset = dataset_tensor->dl_tensor;

// Multi-GPU IVF-Flat requires dataset to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(dataset),
"Multi-GPU IVF-Flat build requires dataset to have host compatible memory");

index->dtype.code = dataset.dtype.code;
index->dtype.bits = dataset.dtype.bits;

Expand Down Expand Up @@ -292,7 +297,29 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatSearch(cuvsResources_t res,
DLManagedTensor* distances_tensor)
{
return cuvs::core::translate_exceptions([=] {
auto queries = queries_tensor->dl_tensor;
auto queries = queries_tensor->dl_tensor;
auto neighbors = neighbors_tensor->dl_tensor;
auto distances = distances_tensor->dl_tensor;

// Multi-GPU IVF-Flat requires all tensors to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(queries),
"Multi-GPU IVF-Flat search requires queries to have host compatible memory");
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(neighbors),
"Multi-GPU IVF-Flat search requires neighbors to have host compatible memory");
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(distances),
"Multi-GPU IVF-Flat search requires distances to have host compatible memory");

// Validate data types
RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64,
"neighbors should be of type int64_t");
RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32,
"distances should be of type float32");

// Check type compatibility between index and queries
RAFT_EXPECTS(queries.dtype.code == index->dtype.code,
"type mismatch between index and queries");
RAFT_EXPECTS(queries.dtype.bits == index->dtype.bits,
"type mismatch between index and queries");

if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
_mg_search<float>(res, *params, *index, queries_tensor, neighbors_tensor, distances_tensor);
Expand All @@ -318,6 +345,25 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatExtend(cuvsResources_t res,
return cuvs::core::translate_exceptions([=] {
auto vectors = new_vectors_tensor->dl_tensor;

// Multi-GPU IVF-Flat requires vectors to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(vectors),
"Multi-GPU IVF-Flat extend requires new_vectors to have host compatible memory");

// Check type compatibility between index and vectors
RAFT_EXPECTS(vectors.dtype.code == index->dtype.code,
"type mismatch between index and new_vectors");
RAFT_EXPECTS(vectors.dtype.bits == index->dtype.bits,
"type mismatch between index and new_vectors");

// If indices are provided, they should also be in host memory
if (new_indices_tensor != nullptr) {
auto indices = new_indices_tensor->dl_tensor;
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(indices),
"Multi-GPU IVF-Flat extend requires new_indices to have host compatible memory");
RAFT_EXPECTS(indices.dtype.code == kDLInt && indices.dtype.bits == 64,
"new_indices should be of type int64_t");
}

if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) {
_mg_extend<float>(res, *index, new_vectors_tensor, new_indices_tensor);
} else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) {
Expand Down
76 changes: 51 additions & 25 deletions cpp/src/neighbors/mg_ivf_pq_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,12 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqBuild(cuvsResources_t res,
cuvsMultiGpuIvfPqIndex_t index)
{
return cuvs::core::translate_exceptions([=] {
auto dataset = dataset_tensor->dl_tensor;
auto dataset = dataset_tensor->dl_tensor;

// Multi-GPU IVF-PQ requires dataset to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(dataset),
"Multi-GPU IVF-PQ build requires dataset to have host compatible memory");

index->dtype.code = dataset.dtype.code;
index->dtype.bits = dataset.dtype.bits;

Expand Down Expand Up @@ -284,7 +289,29 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqSearch(cuvsResources_t res,
DLManagedTensor* distances_tensor)
{
return cuvs::core::translate_exceptions([=] {
auto queries = queries_tensor->dl_tensor;
auto queries = queries_tensor->dl_tensor;
auto neighbors = neighbors_tensor->dl_tensor;
auto distances = distances_tensor->dl_tensor;

// Multi-GPU IVF-PQ requires all tensors to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(queries),
"Multi-GPU IVF-PQ search requires queries to have host compatible memory");
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(neighbors),
"Multi-GPU IVF-PQ search requires neighbors to have host compatible memory");
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(distances),
"Multi-GPU IVF-PQ search requires distances to have host compatible memory");

// Validate data types
RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64,
"neighbors should be of type int64_t");
RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32,
"distances should be of type float32");

// Check type compatibility between index and queries
RAFT_EXPECTS(queries.dtype.code == index->dtype.code,
"type mismatch between index and queries");
RAFT_EXPECTS(queries.dtype.bits == index->dtype.bits,
"type mismatch between index and queries");

if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
_mg_search<float>(res, *params, *index, queries_tensor, neighbors_tensor, distances_tensor);
Expand All @@ -310,6 +337,25 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqExtend(cuvsResources_t res,
return cuvs::core::translate_exceptions([=] {
auto vectors = new_vectors_tensor->dl_tensor;

// Multi-GPU IVF-PQ requires vectors to be in host memory
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(vectors),
"Multi-GPU IVF-PQ extend requires new_vectors to have host compatible memory");

// Check type compatibility between index and vectors
RAFT_EXPECTS(vectors.dtype.code == index->dtype.code,
"type mismatch between index and new_vectors");
RAFT_EXPECTS(vectors.dtype.bits == index->dtype.bits,
"type mismatch between index and new_vectors");

// If indices are provided, they should also be in host memory
if (new_indices_tensor != nullptr) {
auto indices = new_indices_tensor->dl_tensor;
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(indices),
"Multi-GPU IVF-PQ extend requires new_indices to have host compatible memory");
RAFT_EXPECTS(indices.dtype.code == kDLInt && indices.dtype.bits == 64,
"new_indices should be of type int64_t");
}

if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) {
_mg_extend<float>(res, *index, new_vectors_tensor, new_indices_tensor);
} else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) {
Expand Down Expand Up @@ -381,28 +427,8 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqDistribute(cuvsResources_t res,
cuvsMultiGpuIvfPqIndex_t index)
{
return cuvs::core::translate_exceptions([=] {
std::ifstream is(filename, std::ios::in | std::ios::binary);
if (!is) { RAFT_FAIL("Cannot open file %s", filename); }
char dtype_string[4];
is.read(dtype_string, 4);
auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4));
is.close();

index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<float>(res, filename));
} else if (dtype.kind == 'f' && dtype.itemsize == 2) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<half>(res, filename));
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
index->dtype.code = kDLInt;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<int8_t>(res, filename));
} else if (dtype.kind == 'u' && dtype.itemsize == 1) {
index->dtype.code = kDLUInt;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<uint8_t>(res, filename));
} else {
RAFT_FAIL("Unsupported index dtype");
}
index->dtype.code = kDLFloat;
index->dtype.bits = 32;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<float>(res, filename));
Comment on lines +430 to +432
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change intentional? (do we only support float32 here, and you meant to remove support for the other dtypes?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This functions deserializes a single-GPU index and distributes it across multiple GPUs. The IVF-PQ index does not have a T template parameter, only IdxT (which is always int64_t). Because of this, a file containing a serialized IVF-PQ index is not prepended with an index type (float, half, uint8_t, int8_t). This makes it impossible to find the index type necessary to instantiate a multi-GPU index. This is fundamentally an issue in the multi-GPU API that should be remediated. In the meantime, we should assume that for this specific case (the replication of a single-GPU IVF-PQ index), the user is planning to search with floats.

});
}
14 changes: 13 additions & 1 deletion docs/source/python_api/neighbors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,26 @@ Nearest Neighbors
:language: python
:class: highlight

Single-GPU Algorithms
#####################

.. toctree::
:maxdepth: 2
:caption: Contents:
:caption: Single-GPU ANN Algorithms:

neighbors_brute_force.rst
neighbors_cagra.rst
neighbors_hnsw.rst
neighbors_ivf_flat.rst
neighbors_ivf_pq.rst
neighbors_nn_decent.rst

Multi-GPU Algorithms
####################

.. toctree::
:maxdepth: 2
:caption: Multi-GPU Distributed ANN:

neighbors_multi_gpu.rst
neighbors_all_neighbors.rst
55 changes: 55 additions & 0 deletions docs/source/python_api/neighbors_mg_cagra.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
Multi-GPU CAGRA
===============

Multi-GPU CAGRA extends the graph-based CAGRA algorithm to work across multiple GPUs, providing improved scalability and performance for large-scale vector search. It supports both replicated and sharded distribution modes.

.. role:: py(code)
:language: python
:class: highlight

.. note::
**IMPORTANT**: Multi-GPU CAGRA requires all data (datasets, queries, output arrays) to be in host memory (CPU).
If using CuPy/device arrays, transfer to host with ``array.get()`` or ``cp.asnumpy(array)`` before use.

Index build parameters
######################

.. autoclass:: cuvs.neighbors.mg_cagra.IndexParams
:members:

Index search parameters
#######################

.. autoclass:: cuvs.neighbors.mg_cagra.SearchParams
:members:

Index
#####

.. autoclass:: cuvs.neighbors.mg_cagra.Index
:members:

Index build
###########

.. autofunction:: cuvs.neighbors.mg_cagra.build

Index search
############

.. autofunction:: cuvs.neighbors.mg_cagra.search

Index save
##########

.. autofunction:: cuvs.neighbors.mg_cagra.save

Index load
##########

.. autofunction:: cuvs.neighbors.mg_cagra.load

Index distribute
################

.. autofunction:: cuvs.neighbors.mg_cagra.distribute
Loading