-
Notifications
You must be signed in to change notification settings - Fork 143
[Feat] Add support of logical merge in Cagra #713
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
cb8f16b
6c00e52
450df2a
4debc02
c5ecfe8
b3a99c6
bcfa656
186be95
6138f92
b962830
1e61e1b
590c3a8
a986d2e
73b96e3
f357c6e
37bbfe5
8caa251
e229aee
5076f8f
7610e65
0f42309
acb4704
63e9d55
db0c624
c1023e1
39e7938
9e112a7
f5eb6e4
da8c2e5
0342358
ddef8ce
147be54
acbcb50
82d978b
007b9da
9a69747
e19590d
eca7283
bf0bca5
7d49411
f2ec31c
d08f458
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -276,7 +276,6 @@ struct extend_params { | |
| /** | ||
| * @brief Determines the strategy for merging CAGRA graphs. | ||
| * | ||
| * @note Currently, only the PHYSICAL strategy is supported. | ||
| */ | ||
| enum MergeStrategy { | ||
| /** | ||
|
|
@@ -286,9 +285,16 @@ enum MergeStrategy { | |
| * This is expensive to build but does not impact search latency or quality. | ||
| * Preferred for many smaller CAGRA graphs. | ||
| * | ||
| * @note Currently, this is the only supported strategy. | ||
| */ | ||
| PHYSICAL | ||
| PHYSICAL, | ||
| /** | ||
| * @brief Logical merge: Wraps a new index structure around existing CAGRA graphs | ||
| * and broadcasts the query to each of them. | ||
| * | ||
| * This is a fast merge but incurs a small hit in search latency. | ||
| * Preferred for fewer larger CAGRA graphs. | ||
| */ | ||
| LOGICAL | ||
| }; | ||
|
|
||
| /** | ||
|
|
@@ -563,6 +569,82 @@ struct index : cuvs::neighbors::index { | |
| raft::device_matrix_view<const IdxT, int64_t, raft::row_major> graph_view_; | ||
| std::unique_ptr<neighbors::dataset<int64_t>> dataset_; | ||
| }; | ||
| /** | ||
| * @} | ||
| */ | ||
|
|
||
| /** | ||
| * @defgroup cagra_cpp_composite_index CAGRA composite index type | ||
| * @{ | ||
| */ | ||
|
|
||
| /** | ||
| * @brief Lightweight composite kNN index for CAGRA. | ||
| * | ||
| * This class aggregates logically multiple CAGRA indices into a single composite index, | ||
| * providing a unified interface for kNN search. It is a lightweight structure | ||
| * that does not own or manage the lifecycle of the underlying indices; instead, | ||
| * it holds non-owning pointers to them. | ||
| * | ||
| * All sub-indices within the composite index **must share the same distance metric | ||
| * and dimensionality**. | ||
| * | ||
| * @tparam T Data element type. | ||
| * @tparam IdxT Index type representing dataset.extent(0), used for vector indices. | ||
| */ | ||
|
|
||
| template <typename T, typename IdxT> | ||
| struct composite_index { | ||
| template <typename Container> | ||
| explicit composite_index(Container&& indices) : sub_indices(std::forward<Container>(indices)) | ||
| { | ||
| RAFT_EXPECTS(!sub_indices.empty(), "composite_index requires at least one sub-index."); | ||
|
|
||
| for (auto* idx : sub_indices) { | ||
| RAFT_EXPECTS(idx != nullptr, "sub_indices contains a null pointer."); | ||
| } | ||
|
|
||
| auto& first_index = *sub_indices.front(); | ||
| metric_ = first_index.metric(); | ||
| dim_ = first_index.dim(); | ||
| size_ = 0; | ||
|
|
||
| for (auto* idx : sub_indices) { | ||
| RAFT_EXPECTS(idx->metric() == metric_, "All sub-indices must have the same metric."); | ||
| RAFT_EXPECTS(idx->dim() == dim_, "All sub-indices must have the same dim."); | ||
| size_ += idx->size(); | ||
| } | ||
| } | ||
|
|
||
| public: | ||
| composite_index(const composite_index& other) = default; | ||
| composite_index& operator=(const composite_index& other) = default; | ||
|
|
||
| composite_index(composite_index&& other) noexcept = default; | ||
| composite_index& operator=(composite_index&& other) noexcept = default; | ||
|
|
||
| constexpr inline auto metric() const noexcept -> cuvs::distance::DistanceType { return metric_; } | ||
|
|
||
| constexpr inline auto size() const noexcept -> IdxT { return size_; } | ||
|
|
||
| constexpr inline auto dim() const noexcept -> uint32_t { return dim_; } | ||
|
|
||
| constexpr inline auto graph_degree() const noexcept -> uint32_t | ||
| { | ||
| return sub_indices.front()->graph_degree(); | ||
| } | ||
|
|
||
| constexpr inline auto num_indices() const noexcept -> uint32_t { return sub_indices.size(); } | ||
|
|
||
| public: | ||
| std::vector<cuvs::neighbors::cagra::index<T, IdxT>*> sub_indices; | ||
|
||
|
|
||
| private: | ||
| cuvs::distance::DistanceType metric_; | ||
| IdxT size_; | ||
| uint32_t dim_; | ||
| }; | ||
|
|
||
| /** | ||
| * @} | ||
| */ | ||
|
|
@@ -1123,7 +1205,6 @@ void extend( | |
| * @param[in] sample_filter an optional device filter function object that greenlights samples | ||
| * for a given query. (none_sample_filter for no filtering) | ||
| */ | ||
|
|
||
| void search(raft::resources const& res, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necessarily needed for this PR, but it would be nice if we could work towards having a more agnostic API for the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| cuvs::neighbors::cagra::search_params const& params, | ||
| const cuvs::neighbors::cagra::index<float, uint32_t>& index, | ||
|
|
@@ -1207,7 +1288,105 @@ void search(raft::resources const& res, | |
| raft::device_matrix_view<float, int64_t, raft::row_major> distances, | ||
| const cuvs::neighbors::filtering::base_filter& sample_filter = | ||
| cuvs::neighbors::filtering::none_sample_filter{}); | ||
| /** | ||
| * @brief Search ANN using the composite cagra index. | ||
| * | ||
| * See the [cagra::build](#cagra::build) documentation for a usage example. | ||
| * | ||
| * @param[in] res raft resources | ||
| * @param[in] params configure the search | ||
| * @param[in] index composite cagra index | ||
| * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] | ||
| * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset | ||
| * [n_queries, k] | ||
| * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, | ||
| * k] | ||
| * @param[in] sample_filter an optional device filter function object that greenlights samples | ||
| * for a given query. (none_sample_filter for no filtering) | ||
| */ | ||
| void search(raft::resources const& res, | ||
rhdong marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| cuvs::neighbors::cagra::search_params const& params, | ||
| const cuvs::neighbors::cagra::composite_index<float, uint32_t>& index, | ||
| raft::device_matrix_view<const float, int64_t, raft::row_major> queries, | ||
| raft::device_matrix_view<uint32_t, int64_t, raft::row_major> neighbors, | ||
| raft::device_matrix_view<float, int64_t, raft::row_major> distances, | ||
| const cuvs::neighbors::filtering::base_filter& sample_filter = | ||
| cuvs::neighbors::filtering::none_sample_filter{}); | ||
|
|
||
| /** | ||
| * @brief Search ANN using the composite cagra index. | ||
| * | ||
| * See the [cagra::build](#cagra::build) documentation for a usage example. | ||
| * | ||
| * @param[in] res raft resources | ||
| * @param[in] params configure the search | ||
| * @param[in] index composite cagra index | ||
| * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] | ||
| * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset | ||
| * [n_queries, k] | ||
| * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, | ||
| * k] | ||
| * @param[in] sample_filter an optional device filter function object that greenlights samples | ||
| * for a given query. (none_sample_filter for no filtering) | ||
| */ | ||
| void search(raft::resources const& res, | ||
| cuvs::neighbors::cagra::search_params const& params, | ||
| const cuvs::neighbors::cagra::composite_index<half, uint32_t>& index, | ||
| raft::device_matrix_view<const half, int64_t, raft::row_major> queries, | ||
| raft::device_matrix_view<uint32_t, int64_t, raft::row_major> neighbors, | ||
| raft::device_matrix_view<float, int64_t, raft::row_major> distances, | ||
| const cuvs::neighbors::filtering::base_filter& sample_filter = | ||
| cuvs::neighbors::filtering::none_sample_filter{}); | ||
|
|
||
| /** | ||
| * @brief Search ANN using the composite cagra index. | ||
| * | ||
| * See the [cagra::build](#cagra::build) documentation for a usage example. | ||
| * | ||
| * @param[in] res raft resources | ||
| * @param[in] params configure the search | ||
| * @param[in] index composite cagra index | ||
| * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] | ||
| * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset | ||
| * [n_queries, k] | ||
| * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, | ||
| * k] | ||
| * @param[in] sample_filter an optional device filter function object that greenlights samples | ||
| * for a given query. (none_sample_filter for no filtering) | ||
| */ | ||
| void search(raft::resources const& res, | ||
| cuvs::neighbors::cagra::search_params const& params, | ||
| const cuvs::neighbors::cagra::composite_index<int8_t, uint32_t>& index, | ||
| raft::device_matrix_view<const int8_t, int64_t, raft::row_major> queries, | ||
| raft::device_matrix_view<uint32_t, int64_t, raft::row_major> neighbors, | ||
| raft::device_matrix_view<float, int64_t, raft::row_major> distances, | ||
| const cuvs::neighbors::filtering::base_filter& sample_filter = | ||
| cuvs::neighbors::filtering::none_sample_filter{}); | ||
|
|
||
| /** | ||
| * @brief Search ANN using the composite cagra index. | ||
| * | ||
| * See the [cagra::build](#cagra::build) documentation for a usage example. | ||
| * | ||
| * @param[in] res raft resources | ||
| * @param[in] params configure the search | ||
| * @param[in] index composite cagra index | ||
| * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] | ||
| * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset | ||
| * [n_queries, k] | ||
| * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, | ||
| * k] | ||
| * @param[in] sample_filter an optional device filter function object that greenlights samples | ||
| * for a given query. (none_sample_filter for no filtering) | ||
| */ | ||
| void search(raft::resources const& res, | ||
| cuvs::neighbors::cagra::search_params const& params, | ||
| const cuvs::neighbors::cagra::composite_index<uint8_t, uint32_t>& index, | ||
| raft::device_matrix_view<const uint8_t, int64_t, raft::row_major> queries, | ||
| raft::device_matrix_view<uint32_t, int64_t, raft::row_major> neighbors, | ||
| raft::device_matrix_view<float, int64_t, raft::row_major> distances, | ||
| const cuvs::neighbors::filtering::base_filter& sample_filter = | ||
| cuvs::neighbors::filtering::none_sample_filter{}); | ||
| /** | ||
| * @} | ||
| */ | ||
|
|
@@ -1983,6 +2162,23 @@ auto merge(raft::resources const& res, | |
| const cuvs::neighbors::cagra::merge_params& params, | ||
| std::vector<cuvs::neighbors::cagra::index<uint8_t, uint32_t>*>& indices) | ||
| -> cuvs::neighbors::cagra::index<uint8_t, uint32_t>; | ||
|
|
||
| auto make_composite_index(const cagra::merge_params& params, | ||
rhdong marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| std::vector<cuvs::neighbors::cagra::index<float, uint32_t>*>& indices) | ||
| -> cuvs::neighbors::cagra::composite_index<float, uint32_t>; | ||
|
|
||
| auto make_composite_index(const cagra::merge_params& params, | ||
| std::vector<cuvs::neighbors::cagra::index<half, uint32_t>*>& indices) | ||
| -> cuvs::neighbors::cagra::composite_index<half, uint32_t>; | ||
|
|
||
| auto make_composite_index(const cagra::merge_params& params, | ||
| std::vector<cuvs::neighbors::cagra::index<int8_t, uint32_t>*>& indices) | ||
| -> cuvs::neighbors::cagra::composite_index<int8_t, uint32_t>; | ||
|
|
||
| auto make_composite_index(const cagra::merge_params& params, | ||
| std::vector<cuvs::neighbors::cagra::index<uint8_t, uint32_t>*>& indices) | ||
| -> cuvs::neighbors::cagra::composite_index<uint8_t, uint32_t>; | ||
|
|
||
| /** | ||
| * @} | ||
| */ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.