-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-34361: [C++] Fix the handling of logical nulls for types without bitmaps like Unions and Run-End Encoded #34408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ebdfead
0be5470
111ad6c
b07f9a2
a9b3194
ce56514
8f07725
4512297
e5c77a1
6ea0ab7
fa593d7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,7 @@ | |
| #include "arrow/array/builder_binary.h" | ||
| #include "arrow/array/builder_decimal.h" | ||
| #include "arrow/array/builder_dict.h" | ||
| #include "arrow/array/builder_run_end.h" | ||
| #include "arrow/array/builder_time.h" | ||
| #include "arrow/array/data.h" | ||
| #include "arrow/array/util.h" | ||
|
|
@@ -83,15 +84,48 @@ TEST_F(TestArray, TestNullCount) { | |
| auto data = std::make_shared<Buffer>(nullptr, 0); | ||
| auto null_bitmap = std::make_shared<Buffer>(nullptr, 0); | ||
|
|
||
| std::unique_ptr<Int32Array> arr(new Int32Array(100, data, null_bitmap, 10)); | ||
| std::shared_ptr<Int32Array> arr(new Int32Array(100, data, null_bitmap, 10)); | ||
| ASSERT_EQ(10, arr->ComputeLogicalNullCount()); | ||
| ASSERT_EQ(10, arr->null_count()); | ||
| ASSERT_TRUE(arr->data()->MayHaveNulls()); | ||
| ASSERT_TRUE(arr->data()->MayHaveLogicalNulls()); | ||
|
|
||
| std::unique_ptr<Int32Array> arr_no_nulls(new Int32Array(100, data)); | ||
| std::shared_ptr<Int32Array> arr_no_nulls(new Int32Array(100, data)); | ||
| ASSERT_EQ(0, arr_no_nulls->ComputeLogicalNullCount()); | ||
| ASSERT_EQ(0, arr_no_nulls->null_count()); | ||
| ASSERT_FALSE(arr_no_nulls->data()->MayHaveNulls()); | ||
| ASSERT_FALSE(arr_no_nulls->data()->MayHaveLogicalNulls()); | ||
|
|
||
| std::unique_ptr<Int32Array> arr_default_null_count( | ||
| std::shared_ptr<Int32Array> arr_default_null_count( | ||
| new Int32Array(100, data, null_bitmap)); | ||
| ASSERT_EQ(kUnknownNullCount, arr_default_null_count->data()->null_count); | ||
| ASSERT_TRUE(arr_default_null_count->data()->MayHaveNulls()); | ||
| ASSERT_TRUE(arr_default_null_count->data()->MayHaveLogicalNulls()); | ||
|
|
||
| RunEndEncodedBuilder ree_builder(pool_, std::make_shared<Int32Builder>(pool_), | ||
| std::make_shared<Int32Builder>(pool_), | ||
| run_end_encoded(int32(), int32())); | ||
| ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(2), 2)); | ||
| ASSERT_OK(ree_builder.AppendNull()); | ||
| ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(4), 3)); | ||
| ASSERT_OK(ree_builder.AppendNulls(2)); | ||
| ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(8), 5)); | ||
| ASSERT_OK(ree_builder.AppendNulls(7)); | ||
| ASSERT_OK_AND_ASSIGN(auto ree, ree_builder.Finish()); | ||
|
|
||
| ASSERT_EQ(0, ree->null_count()); | ||
| ASSERT_EQ(10, ree->ComputeLogicalNullCount()); | ||
| ASSERT_FALSE(ree->data()->MayHaveNulls()); | ||
| ASSERT_TRUE(ree->data()->MayHaveLogicalNulls()); | ||
|
|
||
| ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(2), 2)); | ||
| ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(4), 3)); | ||
| ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(8), 5)); | ||
| ASSERT_OK_AND_ASSIGN(auto ree_no_nulls, ree_builder.Finish()); | ||
| ASSERT_EQ(0, ree_no_nulls->null_count()); | ||
| ASSERT_EQ(0, ree_no_nulls->ComputeLogicalNullCount()); | ||
| ASSERT_FALSE(ree_no_nulls->data()->MayHaveNulls()); | ||
| ASSERT_FALSE(ree_no_nulls->data()->MayHaveLogicalNulls()); | ||
| } | ||
|
|
||
| TEST_F(TestArray, TestSlicePreservesAllNullCount) { | ||
|
|
@@ -377,20 +411,23 @@ TEST_F(TestArray, TestMakeArrayOfNull) { | |
| ASSERT_EQ(array->length(), length); | ||
| if (is_union(type->id())) { | ||
| ASSERT_EQ(array->null_count(), 0); | ||
| ASSERT_EQ(array->ComputeLogicalNullCount(), length); | ||
|
||
| const auto& union_array = checked_cast<const UnionArray&>(*array); | ||
| for (int i = 0; i < union_array.num_fields(); ++i) { | ||
| ASSERT_EQ(union_array.field(i)->null_count(), union_array.field(i)->length()); | ||
| } | ||
| } else if (type->id() == Type::RUN_END_ENCODED) { | ||
| ASSERT_EQ(array->null_count(), 0); | ||
| ASSERT_EQ(array->ComputeLogicalNullCount(), length); | ||
| const auto& ree_array = checked_cast<const RunEndEncodedArray&>(*array); | ||
| ASSERT_EQ(ree_array.values()->null_count(), ree_array.values()->length()); | ||
| } else { | ||
| ASSERT_EQ(array->null_count(), length); | ||
| for (int64_t i = 0; i < length; ++i) { | ||
| ASSERT_TRUE(array->IsNull(i)); | ||
| ASSERT_FALSE(array->IsValid(i)); | ||
| } | ||
| ASSERT_EQ(array->ComputeLogicalNullCount(), length); | ||
| } | ||
| for (int64_t i = 0; i < length; ++i) { | ||
| ASSERT_TRUE(array->IsNull(i)); | ||
| ASSERT_FALSE(array->IsValid(i)); | ||
| } | ||
|
||
| } | ||
| } | ||
|
|
@@ -482,35 +519,45 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar) | |
| std::unique_ptr<arrow::ArrayBuilder> builder; | ||
| auto null_scalar = MakeNullScalar(scalar->type); | ||
| ASSERT_OK(MakeBuilderExactIndex(pool, scalar->type, &builder)); | ||
| ASSERT_OK(builder->AppendScalar(*scalar)); | ||
| ASSERT_OK(builder->AppendScalar(*scalar)); | ||
| ASSERT_OK(builder->AppendScalar(*null_scalar)); | ||
| ASSERT_OK(builder->AppendScalars({scalar, null_scalar})); | ||
| ASSERT_OK(builder->AppendScalar(*scalar, /*n_repeats=*/2)); | ||
| ASSERT_OK(builder->AppendScalar(*null_scalar, /*n_repeats=*/2)); | ||
| ASSERT_OK(builder->AppendScalar(*scalar)); // [0] = scalar | ||
| ASSERT_OK(builder->AppendScalar(*scalar)); // [1] = scalar | ||
| ASSERT_OK(builder->AppendScalar(*null_scalar)); // [2] = NULL | ||
| ASSERT_OK(builder->AppendScalars({scalar, null_scalar})); // [3, 4] = {scalar, NULL} | ||
| ASSERT_OK( | ||
| builder->AppendScalar(*scalar, /*n_repeats=*/2)); // [5, 6] = {scalar, scalar} | ||
| ASSERT_OK( | ||
| builder->AppendScalar(*null_scalar, /*n_repeats=*/2)); // [7, 8] = {NULL, NULL} | ||
|
|
||
| std::shared_ptr<Array> out; | ||
| FinishAndCheckPadding(builder.get(), &out); | ||
| ASSERT_OK(out->ValidateFull()); | ||
| AssertTypeEqual(scalar->type, out->type()); | ||
| ASSERT_EQ(out->length(), 9); | ||
|
|
||
| const bool can_check_nulls = internal::HasValidityBitmap(out->type()->id()); | ||
| auto out_type_id = out->type()->id(); | ||
| const bool has_validity = internal::HasValidityBitmap(out_type_id); | ||
| // For a dictionary builder, the output dictionary won't necessarily be the same | ||
| const bool can_check_values = !is_dictionary(out->type()->id()); | ||
| const bool can_check_values = !is_dictionary(out_type_id); | ||
|
|
||
| if (can_check_nulls) { | ||
| if (has_validity) { | ||
| ASSERT_EQ(out->null_count(), 4); | ||
| } else { | ||
| ASSERT_EQ(out->null_count(), 0); | ||
| } | ||
| if (scalar->is_valid) { | ||
| ASSERT_EQ(out->ComputeLogicalNullCount(), 4); | ||
| } else { | ||
| ASSERT_EQ(out->ComputeLogicalNullCount(), 9); | ||
| } | ||
|
|
||
| for (const auto index : {0, 1, 3, 5, 6}) { | ||
| ASSERT_FALSE(out->IsNull(index)); | ||
| ASSERT_NE(out->IsNull(index), scalar->is_valid); | ||
| ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index)); | ||
| ASSERT_OK(scalar_i->ValidateFull()); | ||
| if (can_check_values) AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true); | ||
| } | ||
| for (const auto index : {2, 4, 7, 8}) { | ||
| ASSERT_EQ(out->IsNull(index), can_check_nulls); | ||
| ASSERT_TRUE(out->IsNull(index)); | ||
| ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index)); | ||
| ASSERT_OK(scalar_i->ValidateFull()); | ||
| AssertScalarsEqual(*null_scalar, *scalar_i, /*verbose=*/true); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have existing tests like this for sparse and dense union? (given we now have distinct paths for those)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mentioned you in PR comments pointing to code that tests
IsNull/Validfor unions.