Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 89 additions & 3 deletions cpp/src/arrow/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1200,7 +1200,7 @@ class TestFWBinaryArray : public ::testing::Test {
};

TEST_F(TestFWBinaryArray, Builder) {
const int32_t byte_width = 10;
constexpr int32_t byte_width = 10;
int64_t length = 4096;

int64_t nbytes = length * byte_width;
Expand All @@ -1215,8 +1215,7 @@ TEST_F(TestFWBinaryArray, Builder) {

std::shared_ptr<Array> result;

auto CheckResult = [this, &length, &is_valid, &raw_data,
&byte_width](const Array& result) {
auto CheckResult = [&length, &is_valid, &raw_data, byte_width](const Array& result) {
// Verify output
const auto& fw_result = static_cast<const FixedSizeBinaryArray&>(result);

Expand Down Expand Up @@ -1847,6 +1846,93 @@ TEST(TestFixedSizeBinaryDictionaryBuilder, InvalidTypeAppend) {
ASSERT_RAISES(Invalid, builder.AppendArray(*fsb_array));
}

TEST(TestDecimalDictionaryBuilder, Basic) {
// Build the dictionary Array
const auto& decimal_type = arrow::decimal(2, 0);
DictionaryBuilder<FixedSizeBinaryType> builder(decimal_type, default_memory_pool());

// Test data
std::vector<Decimal128> test{12, 12, 11, 12};
for (const auto& value : test) {
ASSERT_OK(builder.Append(value.ToBytes().data()));
}

std::shared_ptr<Array> result;
ASSERT_OK(builder.Finish(&result));

// Build expected data
FixedSizeBinaryBuilder decimal_builder(decimal_type);
ASSERT_OK(decimal_builder.Append(Decimal128(12).ToBytes()));
ASSERT_OK(decimal_builder.Append(Decimal128(11).ToBytes()));

std::shared_ptr<Array> decimal_array;
ASSERT_OK(decimal_builder.Finish(&decimal_array));
auto dtype = arrow::dictionary(int8(), decimal_array);

Int8Builder int_builder;
ASSERT_OK(int_builder.Append({0, 0, 1, 0}));
std::shared_ptr<Array> int_array;
ASSERT_OK(int_builder.Finish(&int_array));

DictionaryArray expected(dtype, int_array);
ASSERT_TRUE(expected.Equals(result));
}

TEST(TestDecimalDictionaryBuilder, DoubleTableSize) {
const auto& decimal_type = arrow::decimal(21, 0);

// Build the dictionary Array
DictionaryBuilder<FixedSizeBinaryType> builder(decimal_type, default_memory_pool());

// Build expected data
FixedSizeBinaryBuilder fsb_builder(decimal_type);
Int16Builder int_builder;

// Fill with 1024 different values
for (int64_t i = 0; i < 1024; i++) {
const uint8_t bytes[] = {0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
12,
12,
static_cast<uint8_t>(i / 128),
static_cast<uint8_t>(i % 128)};
ASSERT_OK(builder.Append(bytes));
ASSERT_OK(fsb_builder.Append(bytes));
ASSERT_OK(int_builder.Append(static_cast<uint16_t>(i)));
}
// Fill with an already existing value
const uint8_t known_value[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 1};
for (int64_t i = 0; i < 1024; i++) {
ASSERT_OK(builder.Append(known_value));
ASSERT_OK(int_builder.Append(1));
}

// Finalize result
std::shared_ptr<Array> result;
ASSERT_OK(builder.Finish(&result));

// Finalize expected data
std::shared_ptr<Array> fsb_array;
ASSERT_OK(fsb_builder.Finish(&fsb_array));

auto dtype = std::make_shared<DictionaryType>(int16(), fsb_array);
std::shared_ptr<Array> int_array;
ASSERT_OK(int_builder.Finish(&int_array));

DictionaryArray expected(dtype, int_array);
ASSERT_TRUE(expected.Equals(result));
}

// ----------------------------------------------------------------------
// List tests

Expand Down
11 changes: 6 additions & 5 deletions cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -831,11 +831,11 @@ DictionaryBuilder<FixedSizeBinaryType>::DictionaryBuilder(
hash_table_(new PoolBuffer(pool)),
hash_slots_(nullptr),
dict_builder_(type, pool),
values_builder_(pool) {
values_builder_(pool),
byte_width_(static_cast<const FixedSizeBinaryType&>(*type).byte_width()) {
if (!::arrow::CpuInfo::initialized()) {
::arrow::CpuInfo::Init();
}
byte_width_ = static_cast<const FixedSizeBinaryType&>(*type).byte_width();
}

#ifndef ARROW_NO_DEPRECATED_API
Expand Down Expand Up @@ -921,7 +921,7 @@ Status DictionaryBuilder<T>::Append(const Scalar& value) {

template <typename T>
Status DictionaryBuilder<T>::AppendArray(const Array& array) {
const NumericArray<T>& numeric_array = static_cast<const NumericArray<T>&>(array);
const auto& numeric_array = static_cast<const NumericArray<T>&>(array);
for (int64_t i = 0; i < array.length(); i++) {
if (array.IsNull(i)) {
RETURN_NOT_OK(AppendNull());
Expand All @@ -938,8 +938,7 @@ Status DictionaryBuilder<FixedSizeBinaryType>::AppendArray(const Array& array) {
return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type");
}

const FixedSizeBinaryArray& numeric_array =
static_cast<const FixedSizeBinaryArray&>(array);
const auto& numeric_array = static_cast<const FixedSizeBinaryArray&>(array);
for (int64_t i = 0; i < array.length(); i++) {
if (array.IsNull(i)) {
RETURN_NOT_OK(AppendNull());
Expand Down Expand Up @@ -1493,6 +1492,7 @@ Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>&
DICTIONARY_BUILDER_CASE(STRING, StringDictionaryBuilder);
DICTIONARY_BUILDER_CASE(BINARY, BinaryDictionaryBuilder);
DICTIONARY_BUILDER_CASE(FIXED_SIZE_BINARY, DictionaryBuilder<FixedSizeBinaryType>);
DICTIONARY_BUILDER_CASE(DECIMAL, DictionaryBuilder<FixedSizeBinaryType>);
default:
return Status::NotImplemented(type->ToString());
}
Expand Down Expand Up @@ -1528,6 +1528,7 @@ Status EncodeArrayToDictionary(const Array& input, MemoryPool* pool,
DICTIONARY_ARRAY_CASE(STRING, StringDictionaryBuilder);
DICTIONARY_ARRAY_CASE(BINARY, BinaryDictionaryBuilder);
DICTIONARY_ARRAY_CASE(FIXED_SIZE_BINARY, DictionaryBuilder<FixedSizeBinaryType>);
DICTIONARY_ARRAY_CASE(DECIMAL, DictionaryBuilder<FixedSizeBinaryType>);
default:
std::stringstream ss;
ss << "Cannot encode array of type " << type->ToString();
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -937,8 +937,8 @@ class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder {

class ARROW_EXPORT BinaryDictionaryBuilder : public DictionaryBuilder<BinaryType> {
public:
using DictionaryBuilder::DictionaryBuilder;
using DictionaryBuilder::Append;
using DictionaryBuilder::DictionaryBuilder;

Status Append(const uint8_t* value, int32_t length) {
return Append(internal::WrappedBinary(value, length));
Expand All @@ -958,8 +958,8 @@ class ARROW_EXPORT BinaryDictionaryBuilder : public DictionaryBuilder<BinaryType
/// \brief Dictionary array builder with convenience methods for strings
class ARROW_EXPORT StringDictionaryBuilder : public DictionaryBuilder<StringType> {
public:
using DictionaryBuilder::DictionaryBuilder;
using DictionaryBuilder::Append;
using DictionaryBuilder::DictionaryBuilder;

Status Append(const uint8_t* value, int32_t length) {
return Append(internal::WrappedBinary(value, length));
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/arrow/compute/cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,10 @@ static Status AllocateIfNotPreallocated(FunctionContext* ctx, const Array& input
if (can_pre_allocate_values) {
std::shared_ptr<Buffer> out_data;

if (!(is_primitive(out->type->id()) || out->type->id() == Type::FIXED_SIZE_BINARY)) {
const Type::type type_id = out->type->id();

if (!(is_primitive(type_id) || type_id == Type::FIXED_SIZE_BINARY ||
type_id == Type::DECIMAL)) {
std::stringstream ss;
ss << "Cannot pre-allocate memory for type: " << out->type->ToString();
return Status::NotImplemented(ss.str());
Expand Down Expand Up @@ -614,6 +617,7 @@ class CastKernel : public UnaryKernel {
FN(IN_TYPE, FloatType); \
FN(IN_TYPE, DoubleType); \
FN(IN_TYPE, FixedSizeBinaryType); \
FN(IN_TYPE, DecimalType); \
FN(IN_TYPE, BinaryType); \
FN(IN_TYPE, StringType);

Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/io/io-file-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,9 @@ TEST_F(TestReadableFile, ThreadSafety) {
ASSERT_OK(ReadableFile::Open(path_, &pool, &file_));

std::atomic<int> correct_count(0);
const int niter = 10000;
constexpr int niter = 10000;

auto ReadData = [&correct_count, &data, niter, this]() {
auto ReadData = [&correct_count, &data, this, niter]() {
std::shared_ptr<Buffer> buffer;

for (int i = 0; i < niter; ++i) {
Expand Down Expand Up @@ -587,9 +587,9 @@ TEST_F(TestMemoryMappedFile, ThreadSafety) {
static_cast<int64_t>(data.size())));

std::atomic<int> correct_count(0);
const int niter = 10000;
constexpr int niter = 10000;

auto ReadData = [&correct_count, &data, niter, &file]() {
auto ReadData = [&correct_count, &data, &file, niter]() {
std::shared_ptr<Buffer> buffer;

for (int i = 0; i < niter; ++i) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/io/io-hdfs-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ TYPED_TEST(TestHadoopFileSystem, ThreadSafety) {
ASSERT_OK(this->client_->OpenReadable(src_path, &file));

std::atomic<int> correct_count(0);
const int niter = 1000;
constexpr int niter = 1000;

auto ReadData = [&file, &correct_count, &data, niter]() {
for (int i = 0; i < niter; ++i) {
Expand Down