Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,9 @@ endif (UNIX)
if (${CLANG_FORMAT_FOUND})
# runs clang format and updates files in place.
add_custom_target(format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 1
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/_generated/g'`
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h |
sed -e '/_generated/g' |
sed -e '/windows_compatibility.h/g'`
`find ${CMAKE_CURRENT_SOURCE_DIR}/../python -name \\*.cc -or -name \\*.h`)

# runs clang format and exits with a non-zero exit code if any files need to be reformatted
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array-list-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class TestListBuilder : public TestBuilder {
TEST_F(TestListBuilder, Equality) {
Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());

ArrayPtr array, equal_array, unequal_array;
std::shared_ptr<Array> array, equal_array, unequal_array;
vector<int32_t> equal_offsets = {0, 1, 2, 5};
vector<int32_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2};
vector<int32_t> unequal_offsets = {0, 1, 4};
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array-primitive-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) {
this->RandomData(size);
vector<T>& draws = this->draws_;
vector<uint8_t>& valid_bytes = this->valid_bytes_;
ArrayPtr array, equal_array, unequal_array;
std::shared_ptr<Array> array, equal_array, unequal_array;
auto builder = this->builder_.get();
ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array));
ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &equal_array));
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/array-struct-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,9 @@ TEST_F(TestStructBuilder, BulkAppendInvalid) {
}

TEST_F(TestStructBuilder, TestEquality) {
ArrayPtr array, equal_array;
ArrayPtr unequal_bitmap_array, unequal_offsets_array, unequal_values_array;
std::shared_ptr<Array> array, equal_array;
std::shared_ptr<Array> unequal_bitmap_array, unequal_offsets_array,
unequal_values_array;

vector<int32_t> int_values = {1, 2, 3, 4};
vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ TEST_F(TestArray, TestLength) {
ASSERT_EQ(arr->length(), 100);
}

ArrayPtr MakeArrayFromValidBytes(const std::vector<uint8_t>& v, MemoryPool* pool) {
std::shared_ptr<Array> MakeArrayFromValidBytes(
const std::vector<uint8_t>& v, MemoryPool* pool) {
int32_t null_count = v.size() - std::accumulate(v.begin(), v.end(), 0);
std::shared_ptr<Buffer> null_buf = test::bytes_to_null_buffer(v);

Expand All @@ -65,7 +66,8 @@ ArrayPtr MakeArrayFromValidBytes(const std::vector<uint8_t>& v, MemoryPool* pool
value_builder.Append<int32_t>(0);
}

ArrayPtr arr(new Int32Array(v.size(), value_builder.Finish(), null_count, null_buf));
std::shared_ptr<Array> arr(
new Int32Array(v.size(), value_builder.Finish(), null_count, null_buf));
return arr;
}

Expand Down
120 changes: 108 additions & 12 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,14 @@ bool BooleanArray::EqualsExact(const BooleanArray& other) const {
}
}

bool BooleanArray::Equals(const ArrayPtr& arr) const {
bool BooleanArray::Equals(const std::shared_ptr<Array>& arr) const {
if (this == arr.get()) return true;
if (Type::BOOL != arr->type_enum()) { return false; }
return EqualsExact(*static_cast<const BooleanArray*>(arr.get()));
}

bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx,
int32_t other_start_idx, const ArrayPtr& arr) const {
int32_t other_start_idx, const std::shared_ptr<Array>& arr) const {
if (this == arr.get()) { return true; }
if (!arr) { return false; }
if (this->type_enum() != arr->type_enum()) { return false; }
Expand All @@ -222,7 +222,7 @@ bool ListArray::EqualsExact(const ListArray& other) const {
if (null_count_ != other.null_count_) { return false; }

bool equal_offsets =
offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
offsets_buffer_->Equals(*other.offsets_buffer_, (length_ + 1) * sizeof(int32_t));
if (!equal_offsets) { return false; }
bool equal_null_bitmap = true;
if (null_count_ > 0) {
Expand Down Expand Up @@ -269,10 +269,10 @@ bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_st

Status ListArray::Validate() const {
if (length_ < 0) { return Status::Invalid("Length was negative"); }
if (!offset_buffer_) { return Status::Invalid("offset_buffer_ was null"); }
if (offset_buffer_->size() / static_cast<int>(sizeof(int32_t)) < length_) {
if (!offsets_buffer_) { return Status::Invalid("offsets_buffer_ was null"); }
if (offsets_buffer_->size() / static_cast<int>(sizeof(int32_t)) < length_) {
std::stringstream ss;
ss << "offset buffer size (bytes): " << offset_buffer_->size()
ss << "offset buffer size (bytes): " << offsets_buffer_->size()
<< " isn't large enough for length: " << length_;
return Status::Invalid(ss.str());
}
Expand Down Expand Up @@ -337,8 +337,8 @@ BinaryArray::BinaryArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& offsets, const std::shared_ptr<Buffer>& data,
int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
: Array(type, length, null_count, null_bitmap),
offset_buffer_(offsets),
offsets_(reinterpret_cast<const int32_t*>(offset_buffer_->data())),
offsets_buffer_(offsets),
offsets_(reinterpret_cast<const int32_t*>(offsets_buffer_->data())),
data_buffer_(data),
data_(nullptr) {
if (data_buffer_ != nullptr) { data_ = data_buffer_->data(); }
Expand All @@ -353,7 +353,7 @@ bool BinaryArray::EqualsExact(const BinaryArray& other) const {
if (!Array::EqualsExact(other)) { return false; }

bool equal_offsets =
offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
offsets_buffer_->Equals(*other.offsets_buffer_, (length_ + 1) * sizeof(int32_t));
if (!equal_offsets) { return false; }

if (!data_buffer_ && !(other.data_buffer_)) { return true; }
Expand Down Expand Up @@ -433,7 +433,7 @@ bool StructArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_
if (this == arr.get()) { return true; }
if (!arr) { return false; }
if (Type::STRUCT != arr->type_enum()) { return false; }
const auto other = static_cast<StructArray*>(arr.get());
const auto& other = static_cast<const StructArray&>(*arr.get());

bool equal_fields = true;
for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
Expand All @@ -442,7 +442,7 @@ bool StructArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_
for (size_t j = 0; j < field_arrays_.size(); ++j) {
// TODO: really we should be comparing stretches of non-null data rather
// than looking at one value at a time.
equal_fields = field(j)->RangeEquals(i, i + 1, o_i, other->field(j));
equal_fields = field(j)->RangeEquals(i, i + 1, o_i, other.field(j));
if (!equal_fields) { return false; }
}
}
Expand Down Expand Up @@ -490,6 +490,102 @@ Status StructArray::Accept(ArrayVisitor* visitor) const {
return visitor->Visit(*this);
}

// ----------------------------------------------------------------------
// UnionArray

UnionArray::UnionArray(const TypePtr& type, int32_t length,
const std::vector<std::shared_ptr<Array>>& children,
const std::shared_ptr<Buffer>& type_ids, const std::shared_ptr<Buffer>& offsets,
int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
: Array(type, length, null_count, null_bitmap),
children_(children),
type_ids_buffer_(type_ids),
offsets_buffer_(offsets) {
type_ids_ = reinterpret_cast<const uint8_t*>(type_ids->data());
if (offsets) { offsets_ = reinterpret_cast<const int32_t*>(offsets->data()); }
}

std::shared_ptr<Array> UnionArray::child(int32_t pos) const {
DCHECK_GT(children_.size(), 0);
return children_[pos];
}

bool UnionArray::Equals(const std::shared_ptr<Array>& arr) const {
if (this == arr.get()) { return true; }
if (!arr) { return false; }
if (!this->type_->Equals(arr->type())) { return false; }
if (null_count_ != arr->null_count()) { return false; }
return RangeEquals(0, length_, 0, arr);
}

bool UnionArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
const std::shared_ptr<Array>& arr) const {
if (this == arr.get()) { return true; }
if (!arr) { return false; }
if (Type::UNION != arr->type_enum()) { return false; }
const auto& other = static_cast<const UnionArray&>(*arr.get());

const UnionMode union_mode = mode();
if (union_mode != other.mode()) { return false; }

// Define a mapping from the type id to child number
const auto& type_codes = static_cast<const UnionType&>(*arr->type().get()).type_ids;
uint8_t max_code = 0;
for (uint8_t code : type_codes) {
if (code > max_code) { max_code = code; }
}

// Store mapping in a vector for constant time lookups
std::vector<uint8_t> type_id_to_child_num(max_code + 1);
for (uint8_t i = 0; i < static_cast<uint8_t>(type_codes.size()); ++i) {
type_id_to_child_num[type_codes[i]] = i;
}

const uint8_t* this_ids = raw_type_ids();
const uint8_t* other_ids = other.raw_type_ids();

uint8_t id, child_num;
for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
if (IsNull(i) != other.IsNull(o_i)) { return false; }
if (IsNull(i)) continue;
if (this_ids[i] != other_ids[o_i]) { return false; }

id = this_ids[i];
child_num = type_id_to_child_num[id];

// TODO(wesm): really we should be comparing stretches of non-null data
// rather than looking at one value at a time.
if (union_mode == UnionMode::SPARSE) {
if (!child(child_num)->RangeEquals(i, i + 1, o_i, other.child(child_num))) {
return false;
}
} else {
const int32_t offset = offsets_[i];
const int32_t o_offset = other.offsets_[i];
if (!child(child_num)->RangeEquals(
offset, offset + 1, o_offset, other.child(child_num))) {
return false;
}
}
}
return true;
}

Status UnionArray::Validate() const {
if (length_ < 0) { return Status::Invalid("Length was negative"); }

if (null_count() > length_) {
return Status::Invalid("Null count exceeds the length of this struct");
}

DCHECK(false) << "Validate not yet implemented";
return Status::OK();
}

Status UnionArray::Accept(ArrayVisitor* visitor) const {
return visitor->Visit(*this);
}

// ----------------------------------------------------------------------

#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType) \
Expand All @@ -499,7 +595,7 @@ Status StructArray::Accept(ArrayVisitor* visitor) const {

Status MakePrimitiveArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& data, int32_t null_count,
const std::shared_ptr<Buffer>& null_bitmap, ArrayPtr* out) {
const std::shared_ptr<Buffer>& null_bitmap, std::shared_ptr<Array>* out) {
switch (type->type) {
MAKE_PRIMITIVE_ARRAY_CASE(BOOL, BooleanArray);
MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
Expand Down
Loading