Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions cpp/src/neighbors/brute_force_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ void _search(cuvsResources_t res,
using neighbors_mdspan_type = raft::device_matrix_view<int64_t, int64_t, raft::row_major>;
using distances_mdspan_type = raft::device_matrix_view<DistT, int64_t, raft::row_major>;
using prefilter_mds_type = raft::device_vector_view<uint32_t, int64_t>;
using prefilter_bmp_type = cuvs::core::bitmap_view<uint32_t, int64_t>;

auto queries_mds = cuvs::core::from_dlpack<queries_mdspan_type>(queries_tensor);
auto neighbors_mds = cuvs::core::from_dlpack<neighbors_mdspan_type>(neighbors_tensor);
Expand All @@ -85,16 +84,25 @@ void _search(cuvsResources_t res,
distances_mds,
cuvs::neighbors::filtering::none_sample_filter{});
} else if (prefilter.type == BITMAP) {
auto prefilter_ptr = reinterpret_cast<DLManagedTensor*>(prefilter.addr);
auto prefilter_mds = cuvs::core::from_dlpack<prefilter_mds_type>(prefilter_ptr);
const auto prefilter = cuvs::neighbors::filtering::bitmap_filter(
using prefilter_bmp_type = cuvs::core::bitmap_view<uint32_t, int64_t>;
auto prefilter_ptr = reinterpret_cast<DLManagedTensor*>(prefilter.addr);
auto prefilter_mds = cuvs::core::from_dlpack<prefilter_mds_type>(prefilter_ptr);
const auto prefilter = cuvs::neighbors::filtering::bitmap_filter(
prefilter_bmp_type((uint32_t*)prefilter_mds.data_handle(),
queries_mds.extent(0),
index_ptr->dataset().extent(0)));
cuvs::neighbors::brute_force::search(
*res_ptr, params, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter);
} else if (prefilter.type == BITSET) {
using prefilter_bst_type = cuvs::core::bitset_view<uint32_t, int64_t>;
auto prefilter_ptr = reinterpret_cast<DLManagedTensor*>(prefilter.addr);
auto prefilter_mds = cuvs::core::from_dlpack<prefilter_mds_type>(prefilter_ptr);
const auto prefilter = cuvs::neighbors::filtering::bitset_filter(
prefilter_bst_type((uint32_t*)prefilter_mds.data_handle(), index_ptr->dataset().extent(0)));
cuvs::neighbors::brute_force::search(
*res_ptr, params, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter);
} else {
RAFT_FAIL("Unsupported prefilter type: BITSET");
RAFT_FAIL("Unsupported prefilter type");
}
}

Expand Down
105 changes: 85 additions & 20 deletions cpp/tests/neighbors/brute_force_c.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ extern "C" void run_brute_force(int64_t n_rows,
float* index_data,
float* query_data,
uint32_t* prefilter_data,
enum cuvsFilterType filter_type,
float* distances_data,
int64_t* neighbors_data,
cuvsDistanceType metric);
Expand Down Expand Up @@ -80,6 +81,35 @@ index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector<b
return nnz;
}

template <typename bitset_t = uint32_t>
void repeat_cpu_bitset(std::vector<bitset_t>& input,
size_t input_bits,
size_t repeat,
std::vector<bitset_t>& output)
{
const size_t output_bits = input_bits * repeat;
const size_t output_units = (output_bits + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8);

std::memset(output.data(), 0, output_units * sizeof(bitset_t));

size_t output_bit_index = 0;

for (size_t r = 0; r < repeat; ++r) {
for (size_t i = 0; i < input_bits; ++i) {
size_t input_unit_index = i / (sizeof(bitset_t) * 8);
size_t input_bit_offset = i % (sizeof(bitset_t) * 8);
bool bit = (input[input_unit_index] >> input_bit_offset) & 1;

size_t output_unit_index = output_bit_index / (sizeof(bitset_t) * 8);
size_t output_bit_offset = output_bit_index % (sizeof(bitset_t) * 8);

output[output_unit_index] |= (static_cast<bitset_t>(bit) << output_bit_offset);

++output_bit_index;
}
}
}

template <typename index_t, typename bitmap_t = uint32_t>
void cpu_convert_to_csr(std::vector<bitmap_t>& bitmap,
index_t rows,
Expand Down Expand Up @@ -205,10 +235,11 @@ void cpu_select_k(const std::vector<index_t>& indptr_h,
}
}

template <typename value_t, typename index_t, typename bitmap_t = uint32_t>
template <typename value_t, typename index_t, typename bits_t = uint32_t>
void cpu_brute_force_with_filter(value_t* query_data,
value_t* index_data,
std::vector<bitmap_t>& filter,
std::vector<bits_t>& filter,
enum cuvsFilterType filter_type,
std::vector<index_t>& out_indices_h,
std::vector<value_t>& out_values_h,
size_t n_queries,
Expand All @@ -219,11 +250,21 @@ void cpu_brute_force_with_filter(value_t* query_data,
bool select_min,
cuvsDistanceType metric)
{
std::vector<value_t> values_h(nnz);
std::vector<index_t> indices_h(nnz);
size_t actual_nnz = nnz;
size_t element = raft::ceildiv(n_queries * n_dataset, size_t(sizeof(bits_t) * 8));

std::vector<bits_t> filter_repeat_h(element);
if (filter_type == BITSET) {
actual_nnz = nnz * n_queries;
repeat_cpu_bitset(filter, n_dataset, n_queries, filter_repeat_h);
} else {
filter_repeat_h = filter;
}
std::vector<value_t> values_h(actual_nnz);
std::vector<index_t> indices_h(actual_nnz);
std::vector<index_t> indptr_h(n_queries + 1);

cpu_convert_to_csr(filter, (index_t)n_queries, (index_t)n_dataset, indices_h, indptr_h);
cpu_convert_to_csr(filter_repeat_h, (index_t)n_queries, (index_t)n_dataset, indices_h, indptr_h);

cpu_sddmm(query_data,
index_data,
Expand Down Expand Up @@ -302,10 +343,11 @@ void recall_eval(T* query_data,
min_recall));
};

template <typename T, typename IdxT, typename bitmap_t = uint32_t>
template <typename T, typename IdxT, typename bits_t = uint32_t>
void recall_eval_with_filter(T* query_data,
T* index_data,
std::vector<bitmap_t>& filter_h,
std::vector<bits_t>& filter_h,
enum cuvsFilterType filter_type,
IdxT* neighbors_d,
T* distances_d,
std::vector<T>& distances_ref_h,
Expand Down Expand Up @@ -337,6 +379,7 @@ void recall_eval_with_filter(T* query_data,
cpu_brute_force_with_filter(queries_h.data(),
indices_h.data(),
filter_h,
filter_type,
neighbors_ref_h,
distances_ref_h,
n_queries,
Expand Down Expand Up @@ -388,6 +431,7 @@ TEST(BruteForceC, BuildSearch)
index_data.data(),
query_data.data(),
filter_data,
NO_FILTER,
distances_data.data(),
neighbors_data.data(),
metric);
Expand All @@ -404,20 +448,20 @@ TEST(BruteForceC, BuildSearch)
metric);
}

TEST(BruteForceC, BuildSearchWithFilter)
void run_test_with_filter(int64_t n_samples,
int64_t n_queries,
int64_t n_dim,
uint32_t n_neighbors,
enum cuvsFilterType filter_type)
{
int64_t n_rows = 8096;
int64_t n_queries = 128;
int64_t n_dim = 32;
uint32_t n_neighbors = 8;

raft::resources handle;
auto stream = raft::resource::get_cuda_stream(handle);

float sparsity = 0.2;
int64_t n_filter = (n_queries * n_rows + 31) / 32;
float sparsity = 0.2;
int64_t n_rows_filter = (filter_type == BITMAP ? n_queries : 1);
int64_t n_filter = (n_rows_filter * n_samples + 31) / 32;
std::vector<uint32_t> filter_h(n_filter);
int64_t nnz = create_sparse_matrix(n_queries, n_rows, sparsity, filter_h);
int64_t nnz = create_sparse_matrix(n_rows_filter, n_samples, sparsity, filter_h);

cuvsDistanceType metric = L2Expanded;
bool select_min = cuvs::distance::is_min_close(metric);
Expand All @@ -427,7 +471,7 @@ TEST(BruteForceC, BuildSearchWithFilter)
select_min ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::lowest());
std::vector<int64_t> neighbors_ref_h(n_queries * n_neighbors, static_cast<int64_t>(0));

rmm::device_uvector<float> index_data(n_rows * n_dim, stream);
rmm::device_uvector<float> index_data(n_samples * n_dim, stream);
rmm::device_uvector<float> query_data(n_queries * n_dim, stream);
rmm::device_uvector<int64_t> neighbors_data(n_queries * n_neighbors, stream);
rmm::device_uvector<float> distances_data(n_queries * n_neighbors, stream);
Expand All @@ -436,33 +480,54 @@ TEST(BruteForceC, BuildSearchWithFilter)
raft::copy(neighbors_data.data(), neighbors_ref_h.data(), n_queries * n_neighbors, stream);
raft::copy(distances_data.data(), distances_ref_h.data(), n_queries * n_neighbors, stream);

generate_random_data(index_data.data(), n_rows * n_dim);
generate_random_data(index_data.data(), n_samples * n_dim);
generate_random_data(query_data.data(), n_queries * n_dim);

raft::copy(filter_data.data(), filter_h.data(), n_filter, stream);

run_brute_force(n_rows,
run_brute_force(n_samples,
n_queries,
n_dim,
n_neighbors,
index_data.data(),
query_data.data(),
filter_data.data(),
filter_type,
distances_data.data(),
neighbors_data.data(),
metric);

recall_eval_with_filter(query_data.data(),
index_data.data(),
filter_h,
filter_type,
neighbors_data.data(),
distances_data.data(),
distances_ref_h,
neighbors_ref_h,
n_queries,
n_rows,
n_samples,
n_dim,
n_neighbors,
nnz,
metric);
}
TEST(BruteForceC, BuildSearchWithBitmapFilter)
{
int64_t n_rows = 8096;
int64_t n_queries = 128;
int64_t n_dim = 32;
uint32_t n_neighbors = 8;

run_test_with_filter(n_rows, n_queries, n_dim, n_neighbors, BITMAP);
}

TEST(BruteForceC, BuildSearchWithBitsetFilter)
{
int64_t n_rows = 2000;
int64_t n_queries = 100;
int64_t n_dim = 128;
uint32_t n_neighbors = 100;

run_test_with_filter(n_rows, n_queries, n_dim, n_neighbors, BITSET);
}
14 changes: 9 additions & 5 deletions cpp/tests/neighbors/run_brute_force_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void run_brute_force(int64_t n_rows,
float* index_data,
float* query_data,
uint32_t* prefilter_data,
enum cuvsFilterType prefilter_type,
float* distances_data,
int64_t* neighbors_data,
cuvsDistanceType metric)
Expand Down Expand Up @@ -90,7 +91,7 @@ void run_brute_force(int64_t n_rows,
cuvsFilter prefilter;

DLManagedTensor prefilter_tensor;
if (prefilter_data == NULL) {
if (prefilter_data == NULL || prefilter_type == NO_FILTER) {
prefilter.type = NO_FILTER;
prefilter.addr = (uintptr_t)NULL;
} else {
Expand All @@ -100,11 +101,14 @@ void run_brute_force(int64_t n_rows,
prefilter_tensor.dl_tensor.dtype.code = kDLUInt;
prefilter_tensor.dl_tensor.dtype.bits = 32;
prefilter_tensor.dl_tensor.dtype.lanes = 1;
int64_t prefilter_shape[1] = {(n_queries * n_rows + 31) / 32};
prefilter_tensor.dl_tensor.shape = prefilter_shape;
prefilter_tensor.dl_tensor.strides = NULL;

prefilter.type = BITMAP;
int64_t prefilter_bits_num = (prefilter_type == BITMAP) ? n_queries * n_rows : n_rows;
int64_t prefilter_shape[1] = {(prefilter_bits_num + 31) / 32};

prefilter_tensor.dl_tensor.shape = prefilter_shape;
prefilter_tensor.dl_tensor.strides = NULL;

prefilter.type = prefilter_type;
prefilter.addr = (uintptr_t)&prefilter_tensor;
}

Expand Down
28 changes: 19 additions & 9 deletions python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,15 @@ def search(Index index,
distances : Optional CUDA array interface compliant matrix shape
(n_queries, k) If supplied, the distances to the
neighbors will be written here in-place. (default None)
prefilter : Optional cuvs.neighbors.cuvsFilter can be used to filter
queries and neighbors based on a given bitmap. The filter
function should have a row-major layout and logical shape
[n_queries, n_samples], using the first n_samples bits to
indicate whether queries[0] should compute the distance with
index.
prefilter : Optional, cuvs.neighbors.cuvsFilter
An optional filter to exclude certain query-neighbor
pairs using a bitmap or bitset. The filter function should
have a row-major layout with logical shape
`(n_prefilter_rows, n_samples)`, where:
- `n_prefilter_rows == n_queries` when using a bitmap filter.
- `n_prefilter_rows == 1` when using a bitset prefilter.
Each bit in `n_samples` determines whether `queries[i]`
should be considered for distance computation with the index.
(default None)
{resources_docstring}

Expand Down Expand Up @@ -203,14 +206,21 @@ def search(Index index,
>>> # Build filters
>>> n_bitmap = np.ceil(n_samples * n_queries / 32).astype(int)
>>> # Create your own bitmap as the filter by replacing the random one.
>>> bitmap = cp.random.randint(1, 1000, size=(n_bitmap,), dtype=cp.uint32)
>>> prefilter = filters.from_bitmap(bitmap)
>>> bitmap = cp.random.randint(1, 100, size=(n_bitmap,), dtype=cp.uint32)
>>> bitmap_prefilter = filters.from_bitmap(bitmap)
>>>
>>> # or Build bitset prefilter:
>>> # n_bitset = np.ceil(n_samples * 1 / 32).astype(int)
>>> # # Create your own bitset as the filter by replacing the random one.
>>> # bitset = cp.random.randint(1, 100, size=(n_bitset,), dtype=cp.uint32)
>>> # bitset_prefilter = filters.from_bitset(bitset)
>>>
>>> k = 10
>>> # Using a pooling allocator reduces overhead of temporary array
>>> # creation during search. This is useful if multiple searches
>>> # are performed with same query size.
>>> distances, neighbors = brute_force.search(index, queries, k,
... prefilter=prefilter)
... prefilter=bitmap_prefilter)
>>> neighbors = cp.asarray(neighbors)
>>> distances = cp.asarray(distances)
"""
Expand Down
Loading
Loading