Skip to content

Commit ea09681

Browse files
mfoerste4jiangyinzuo
authored andcommitted
Reduce memory consumption of scalar quantizer (rapidsai#736)
This is an optimization raised by rapidsai#718 . The quantile selection process currently relies on sampling. The memory requirements for the originally used `raft::random::sample_without_replacement` are roughly `48*input_len bytes` (`4*len*8byte + ~2*len*8byte for sort`). This has now been replaced by `raft::matrix::sample_rows` which essentially has the same requirement but based on the sample size instead of the original input. Authors: - Malte Förster (https://github.com/mfoerste4) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: rapidsai#736
1 parent 2c43f27 commit ea09681

File tree

1 file changed

+15
-43
lines changed

1 file changed

+15
-43
lines changed

cpp/src/preprocessing/quantize/detail/scalar.cuh

Lines changed: 15 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
#include <cuvs/preprocessing/quantize/scalar.hpp>
2020
#include <raft/core/operators.hpp>
2121
#include <raft/linalg/unary_op.cuh>
22+
#include <raft/matrix/sample_rows.cuh>
2223
#include <raft/random/rng.cuh>
23-
#include <raft/random/sample_without_replacement.cuh>
2424
#include <thrust/execution_policy.h>
2525
#include <thrust/sort.h>
2626
#include <thrust/system/omp/execution_policy.h>
@@ -72,10 +72,11 @@ struct quantize_op {
7272
}
7373
};
7474

75-
template <typename T>
76-
std::tuple<T, T> quantile_min_max(raft::resources const& res,
77-
raft::device_matrix_view<const T, int64_t> dataset,
78-
double quantile)
75+
template <typename T, typename IdxT = int64_t, typename accessor>
76+
std::tuple<T, T> quantile_min_max(
77+
raft::resources const& res,
78+
raft::mdspan<const T, raft::matrix_extent<IdxT>, raft::row_major, accessor> dataset,
79+
double quantile)
7980
{
8081
// settings for quantile approximation
8182
constexpr size_t max_num_samples = 1000000;
@@ -85,14 +86,15 @@ std::tuple<T, T> quantile_min_max(raft::resources const& res,
8586

8687
// select subsample
8788
raft::random::RngState rng(seed);
88-
size_t n_elements = dataset.extent(0) * dataset.extent(1);
89-
size_t subset_size = std::min(max_num_samples, n_elements);
90-
auto subset = raft::make_device_vector<T>(res, subset_size);
91-
auto dataset_view = raft::make_device_vector_view<const T>(dataset.data_handle(), n_elements);
92-
raft::random::sample_without_replacement(
93-
res, rng, dataset_view, std::nullopt, subset.view(), std::nullopt);
94-
95-
// quantile / sort and pick for now
89+
size_t n_rows = dataset.extent(0);
90+
size_t dim = dataset.extent(1);
91+
size_t n_sample_rows = std::min<size_t>(std::ceil(max_num_samples / dim), n_rows);
92+
93+
// select subsample rows (this returns device data for both device and host input)
94+
auto subset = raft::matrix::sample_rows(res, rng, dataset, (IdxT)n_sample_rows);
95+
96+
// quantile / sort element-wise and pick for now
97+
size_t subset_size = n_sample_rows * dim;
9698
thrust::sort(raft::resource::get_thrust_policy(res),
9799
subset.data_handle(),
98100
subset.data_handle() + subset_size);
@@ -105,39 +107,9 @@ std::tuple<T, T> quantile_min_max(raft::resources const& res,
105107
raft::update_host(&(minmax_h[0]), subset.data_handle() + pos_min, 1, stream);
106108
raft::update_host(&(minmax_h[1]), subset.data_handle() + pos_max, 1, stream);
107109
raft::resource::sync_stream(res);
108-
109110
return {minmax_h[0], minmax_h[1]};
110111
}
111112

112-
template <typename T>
113-
std::tuple<T, T> quantile_min_max(raft::resources const& res,
114-
raft::host_matrix_view<const T, int64_t> dataset,
115-
double quantile)
116-
{
117-
// settings for quantile approximation
118-
constexpr size_t max_num_samples = 1000000;
119-
constexpr int seed = 137;
120-
121-
// select subsample
122-
std::mt19937 rng(seed);
123-
size_t n_elements = dataset.extent(0) * dataset.extent(1);
124-
size_t subset_size = std::min(max_num_samples, n_elements);
125-
std::vector<T> subset;
126-
std::sample(dataset.data_handle(),
127-
dataset.data_handle() + n_elements,
128-
std::back_inserter(subset),
129-
subset_size,
130-
rng);
131-
132-
// quantile / sort and pick for now
133-
thrust::sort(thrust::omp::par, subset.data(), subset.data() + subset_size, fp_lt<T>);
134-
double half_quantile_pos = (0.5 + 0.5 * quantile) * subset_size;
135-
int pos_max = std::ceil(half_quantile_pos) - 1;
136-
int pos_min = subset_size - pos_max - 1;
137-
138-
return {subset[pos_min], subset[pos_max]};
139-
}
140-
141113
template <typename T>
142114
cuvs::preprocessing::quantize::scalar::quantizer<T> train(
143115
raft::resources const& res,

0 commit comments

Comments
 (0)