Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ set(ARROW_SRCS
src/arrow/util/compression.cc
src/arrow/util/cpu-info.cc
src/arrow/util/decimal.cc
src/arrow/util/int128.cc
src/arrow/util/key_value_metadata.cc
)

Expand Down
92 changes: 52 additions & 40 deletions cpp/src/arrow/array-decimal-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,56 +28,66 @@ namespace decimal {
template <typename T>
class DecimalTestBase {
public:
virtual std::vector<uint8_t> data(const std::vector<T>& input,
size_t byte_width) const = 0;
DecimalTestBase() : pool_(default_memory_pool()) {}

void test(int precision, const std::vector<T>& draw,
const std::vector<uint8_t>& valid_bytes,
const std::vector<uint8_t>& sign_bitmap = {}, int64_t offset = 0) const {
auto type = std::make_shared<DecimalType>(precision, 4);
int byte_width = type->byte_width();
auto pool = default_memory_pool();
auto builder = std::make_shared<DecimalBuilder>(type, pool);
size_t null_count = 0;
virtual std::vector<uint8_t> MakeData(const std::vector<T>& input,
size_t byte_width) const = 0;

void InitBuilder(const std::shared_ptr<DecimalType>& type, const std::vector<T>& draw,
const std::vector<uint8_t>& valid_bytes, int byte_width,
std::shared_ptr<DecimalBuilder>* builder, size_t* null_count) const {
*builder = std::make_shared<DecimalBuilder>(type, pool_);

size_t size = draw.size();
ASSERT_OK(builder->Reserve(size));
ASSERT_OK((*builder)->Reserve(size));

for (size_t i = 0; i < size; ++i) {
if (valid_bytes[i]) {
ASSERT_OK(builder->Append(draw[i]));
ASSERT_OK((*builder)->Append(draw[i]));
} else {
ASSERT_OK(builder->AppendNull());
++null_count;
ASSERT_OK((*builder)->AppendNull());
++*null_count;
}
}
}

std::shared_ptr<Buffer> expected_sign_bitmap;
if (!sign_bitmap.empty()) {
ASSERT_OK(BitUtil::BytesToBits(sign_bitmap, &expected_sign_bitmap));
}
void TestCreate(int precision, const std::vector<T>& draw,
const std::vector<uint8_t>& valid_bytes, int64_t offset) const {
auto type = std::make_shared<DecimalType>(precision, 4);

auto raw_bytes = data(draw, byte_width);
std::shared_ptr<DecimalBuilder> builder;

size_t null_count = 0;

const size_t size = draw.size();
const int byte_width = type->byte_width();

InitBuilder(type, draw, valid_bytes, byte_width, &builder, &null_count);

auto raw_bytes = MakeData(draw, static_cast<size_t>(byte_width));
auto expected_data = std::make_shared<Buffer>(raw_bytes.data(), size * byte_width);
std::shared_ptr<Buffer> expected_null_bitmap;
ASSERT_OK(BitUtil::BytesToBits(valid_bytes, &expected_null_bitmap));

int64_t expected_null_count = test::null_count(valid_bytes);
auto expected =
std::make_shared<DecimalArray>(type, size, expected_data, expected_null_bitmap,
expected_null_count, offset, expected_sign_bitmap);
auto expected = std::make_shared<DecimalArray>(
type, size, expected_data, expected_null_bitmap, expected_null_count, 0);

std::shared_ptr<Array> out;
ASSERT_OK(builder->Finish(&out));
ASSERT_TRUE(out->Equals(*expected));
ASSERT_TRUE(out->Slice(offset)->Equals(
*expected->Slice(offset, expected->length() - offset)));
}

private:
MemoryPool* pool_;
};

template <typename T>
class DecimalTest : public DecimalTestBase<T> {
public:
std::vector<uint8_t> data(const std::vector<T>& input,
size_t byte_width) const override {
std::vector<uint8_t> MakeData(const std::vector<T>& input,
size_t byte_width) const override {
std::vector<uint8_t> result(input.size() * byte_width);
// TODO(phillipc): There's probably a better way to do this
constexpr static const size_t bytes_per_element = sizeof(T);
Expand All @@ -91,16 +101,15 @@ class DecimalTest : public DecimalTestBase<T> {
template <>
class DecimalTest<Decimal128> : public DecimalTestBase<Decimal128> {
public:
std::vector<uint8_t> data(const std::vector<Decimal128>& input,
size_t byte_width) const override {
std::vector<uint8_t> MakeData(const std::vector<Decimal128>& input,
size_t byte_width) const override {
std::vector<uint8_t> result;
result.reserve(input.size() * byte_width);
constexpr static const size_t bytes_per_element = 16;
for (size_t i = 0; i < input.size(); ++i) {
uint8_t stack_bytes[bytes_per_element] = {0};
uint8_t* bytes = stack_bytes;
bool is_negative;
ToBytes(input[i], &bytes, &is_negative);
ToBytes(input[i], &bytes);

for (size_t i = 0; i < bytes_per_element; ++i) {
result.push_back(bytes[i]);
Expand All @@ -124,40 +133,44 @@ TEST_P(Decimal32BuilderTest, NoNulls) {
std::vector<Decimal32> draw = {Decimal32(1), Decimal32(2), Decimal32(2389),
Decimal32(4), Decimal32(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->test(precision, draw, valid_bytes);
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal64BuilderTest, NoNulls) {
int precision = GetParam();
std::vector<Decimal64> draw = {Decimal64(1), Decimal64(2), Decimal64(2389),
Decimal64(4), Decimal64(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->test(precision, draw, valid_bytes);
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal128BuilderTest, NoNulls) {
int precision = GetParam();
std::vector<Decimal128> draw = {Decimal128(1), Decimal128(-2), Decimal128(2389),
Decimal128(4), Decimal128(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
std::vector<uint8_t> sign_bitmap = {false, true, false, false, true};
this->test(precision, draw, valid_bytes, sign_bitmap);
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal32BuilderTest, WithNulls) {
int precision = GetParam();
std::vector<Decimal32> draw = {Decimal32(1), Decimal32(2), Decimal32(-1), Decimal32(4),
Decimal32(-1)};
std::vector<uint8_t> valid_bytes = {true, true, false, true, false};
this->test(precision, draw, valid_bytes);
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal64BuilderTest, WithNulls) {
int precision = GetParam();
std::vector<Decimal64> draw = {Decimal64(-1), Decimal64(2), Decimal64(-1), Decimal64(4),
Decimal64(-1)};
std::vector<uint8_t> valid_bytes = {true, true, false, true, false};
this->test(precision, draw, valid_bytes);
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal128BuilderTest, WithNulls) {
Expand All @@ -173,9 +186,8 @@ TEST_P(Decimal128BuilderTest, WithNulls) {
Decimal128("-23049302932.235234")};
std::vector<uint8_t> valid_bytes = {true, true, false, true, false,
true, true, true, true};
std::vector<uint8_t> sign_bitmap = {false, false, false, false, false,
false, false, false, true};
this->test(precision, draw, valid_bytes, sign_bitmap);
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

INSTANTIATE_TEST_CASE_P(Decimal32BuilderTest, Decimal32BuilderTest,
Expand All @@ -185,8 +197,8 @@ INSTANTIATE_TEST_CASE_P(Decimal64BuilderTest, Decimal64BuilderTest,
::testing::Range(DecimalPrecision<int64_t>::minimum,
DecimalPrecision<int64_t>::maximum));
INSTANTIATE_TEST_CASE_P(Decimal128BuilderTest, Decimal128BuilderTest,
::testing::Range(DecimalPrecision<int128_t>::minimum,
DecimalPrecision<int128_t>::maximum));
::testing::Range(DecimalPrecision<Int128>::minimum,
DecimalPrecision<Int128>::maximum));

} // namespace decimal
} // namespace arrow
64 changes: 16 additions & 48 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ PrimitiveArray::PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t le

const uint8_t* PrimitiveArray::raw_values() const {
return raw_values_ +
offset() * static_cast<const FixedWidthType&>(*type()).bit_width() / 8;
offset() * static_cast<const FixedWidthType&>(*type()).bit_width() / CHAR_BIT;
}

template <typename T>
Expand Down Expand Up @@ -323,7 +323,6 @@ std::shared_ptr<Array> StringArray::Slice(int64_t offset, int64_t length) const

FixedSizeBinaryArray::FixedSizeBinaryArray(
const std::shared_ptr<internal::ArrayData>& data) {
DCHECK_EQ(data->type->id(), Type::FIXED_SIZE_BINARY);
SetData(data);
}

Expand All @@ -346,61 +345,30 @@ const uint8_t* FixedSizeBinaryArray::GetValue(int64_t i) const {
// ----------------------------------------------------------------------
// Decimal

DecimalArray::DecimalArray(const std::shared_ptr<internal::ArrayData>& data) {
DecimalArray::DecimalArray(const std::shared_ptr<internal::ArrayData>& data)
: FixedSizeBinaryArray(data) {
DCHECK_EQ(data->type->id(), Type::DECIMAL);
SetData(data);
}

void DecimalArray::SetData(const std::shared_ptr<ArrayData>& data) {
auto fixed_size_data = data->buffers[1];
auto sign_bitmap = data->buffers[2];
this->Array::SetData(data);

raw_values_ = fixed_size_data != nullptr ? fixed_size_data->data() : nullptr;
sign_bitmap_data_ = sign_bitmap != nullptr ? sign_bitmap->data() : nullptr;
}

DecimalArray::DecimalArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
int64_t offset, const std::shared_ptr<Buffer>& sign_bitmap) {
BufferVector buffers = {null_bitmap, data, sign_bitmap};
SetData(
std::make_shared<ArrayData>(type, length, std::move(buffers), null_count, offset));
}

bool DecimalArray::IsNegative(int64_t i) const {
return sign_bitmap_data_ != nullptr ? BitUtil::GetBit(sign_bitmap_data_, i) : false;
}

const uint8_t* DecimalArray::GetValue(int64_t i) const {
return raw_values_ + (i + data_->offset) * byte_width();
}
#define DECIMAL_TO_STRING_CASE(bits, bytes, precision, scale) \
case bits: { \
decimal::Decimal##bits value; \
decimal::FromBytes((bytes), &value); \
return decimal::ToString(value, (precision), (scale)); \
}

std::string DecimalArray::FormatValue(int64_t i) const {
const auto& type_ = static_cast<const DecimalType&>(*type());
const int precision = type_.precision();
const int scale = type_.scale();
const int byte_width = type_.byte_width();
const uint8_t* bytes = raw_values_ + (i + data_->offset) * byte_width;
switch (byte_width) {
case 4: {
decimal::Decimal32 value;
decimal::FromBytes(bytes, &value);
return decimal::ToString(value, precision, scale);
}
case 8: {
decimal::Decimal64 value;
decimal::FromBytes(bytes, &value);
return decimal::ToString(value, precision, scale);
}
case 16: {
decimal::Decimal128 value;
decimal::FromBytes(bytes, IsNegative(i), &value);
return decimal::ToString(value, precision, scale);
}
const int bit_width = type_.bit_width();
const uint8_t* bytes = GetValue(i);
switch (bit_width) {
DECIMAL_TO_STRING_CASE(32, bytes, precision, scale)
DECIMAL_TO_STRING_CASE(64, bytes, precision, scale)
DECIMAL_TO_STRING_CASE(128, bytes, precision, scale)
default: {
DCHECK(false) << "Invalid byte width: " << byte_width;
DCHECK(false) << "Invalid bit width: " << bit_width;
return "";
}
}
Expand Down
37 changes: 4 additions & 33 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,6 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {

int32_t byte_width() const { return byte_width_; }

const uint8_t* raw_values() const { return raw_values_ + byte_width_ * data_->offset; }

std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const override;

protected:
Expand All @@ -536,45 +534,18 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {

// ----------------------------------------------------------------------
// DecimalArray
class ARROW_EXPORT DecimalArray : public FlatArray {
class ARROW_EXPORT DecimalArray : public FixedSizeBinaryArray {
public:
using TypeClass = Type;
using TypeClass = DecimalType;

using FixedSizeBinaryArray::FixedSizeBinaryArray;

/// \brief Construct DecimalArray from internal::ArrayData instance
explicit DecimalArray(const std::shared_ptr<internal::ArrayData>& data);

DecimalArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = nullptr,
int64_t null_count = 0, int64_t offset = 0,
const std::shared_ptr<Buffer>& sign_bitmap = nullptr);

bool IsNegative(int64_t i) const;

const uint8_t* GetValue(int64_t i) const;

std::string FormatValue(int64_t i) const;

std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const override;

/// \brief The main decimal data
/// For 32/64-bit decimal this is everything
std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }

/// Only needed for 128 bit Decimals
std::shared_ptr<Buffer> sign_bitmap() const { return data_->buffers[2]; }

int32_t byte_width() const {
return static_cast<const DecimalType&>(*type()).byte_width();
}

/// \brief Return pointer to value data, accounting for any offset
const uint8_t* raw_values() const { return raw_values_ + byte_width() * data_->offset; }

private:
void SetData(const std::shared_ptr<internal::ArrayData>& data);
const uint8_t* raw_values_;
const uint8_t* sign_bitmap_data_;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

};

// ----------------------------------------------------------------------
Expand Down
Loading