Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions c_glib/arrow-glib/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5284,12 +5284,9 @@ GArrowCastOptions *
garrow_cast_options_new_raw(const arrow::compute::CastOptions *arrow_options)
{
GArrowDataType *to_data_type = NULL;
if (arrow_options->to_type) {
auto arrow_copied_options = arrow_options->Copy();
auto arrow_copied_cast_options =
static_cast<arrow::compute::CastOptions *>(arrow_copied_options.get());
to_data_type =
garrow_data_type_new_raw(&(arrow_copied_cast_options->to_type));
if (arrow_options->to_type.type) {
auto arrow_to_data_type = arrow_options->to_type.GetSharedPtr();
to_data_type = garrow_data_type_new_raw(&arrow_to_data_type);
}
auto options =
g_object_new(GARROW_TYPE_CAST_OPTIONS,
Expand Down
44 changes: 38 additions & 6 deletions c_glib/arrow-glib/scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2401,9 +2401,31 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
gint8 type_code,
GArrowScalar *value)
{
return GARROW_SPARSE_UNION_SCALAR(
garrow_union_scalar_new<arrow::SparseUnionScalar>(
GARROW_DATA_TYPE(data_type), type_code, value));
auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
const auto &arrow_type_codes =
std::dynamic_pointer_cast<arrow::SparseUnionType>(
arrow_data_type)->type_codes();
auto arrow_value = garrow_scalar_get_raw(value);
arrow::SparseUnionScalar::ValueType arrow_field_values;
for (int i = 0; i < arrow_data_type->num_fields(); ++i) {
if (arrow_type_codes[i] == type_code) {
arrow_field_values.emplace_back(arrow_value);
} else {
arrow_field_values.emplace_back(
arrow::MakeNullScalar(arrow_data_type->field(i)->type()));
}
}
auto arrow_scalar =
std::static_pointer_cast<arrow::Scalar>(
std::make_shared<arrow::SparseUnionScalar>(arrow_field_values,
type_code,
arrow_data_type));
auto scalar = garrow_scalar_new_raw(&arrow_scalar,
"scalar", &arrow_scalar,
"data-type", data_type,
"value", value,
NULL);
return GARROW_SPARSE_UNION_SCALAR(scalar);
}


Expand Down Expand Up @@ -2436,9 +2458,19 @@ garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
gint8 type_code,
GArrowScalar *value)
{
return GARROW_DENSE_UNION_SCALAR(
garrow_union_scalar_new<arrow::DenseUnionScalar>(
GARROW_DATA_TYPE(data_type), type_code, value));
auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
auto arrow_value = garrow_scalar_get_raw(value);
auto arrow_scalar =
std::static_pointer_cast<arrow::Scalar>(
std::make_shared<arrow::DenseUnionScalar>(arrow_value,
type_code,
arrow_data_type));
auto scalar = garrow_scalar_new_raw(&arrow_scalar,
"scalar", &arrow_scalar,
"data-type", data_type,
"value", value,
NULL);
return GARROW_DENSE_UNION_SCALAR(scalar);
}


Expand Down
6 changes: 5 additions & 1 deletion c_glib/test/test-large-binary-scalar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ def test_equal
end

def test_to_s
assert_equal("...", @scalar.to_s)
assert_equal(<<-BINARY.strip, @scalar.to_s)
[
030102
]
BINARY
end

def test_value
Expand Down
6 changes: 5 additions & 1 deletion c_glib/test/test-large-string-scalar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ def test_equal
end

def test_to_s
assert_equal("...", @scalar.to_s)
assert_equal(<<-STRING.strip, @scalar.to_s)
[
"Hello"
]
STRING
end

def test_value
Expand Down
12 changes: 11 additions & 1 deletion c_glib/test/test-list-scalar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,17 @@ def test_equal
end

def test_to_s
assert_equal("...", @scalar.to_s)
assert_equal(<<-LIST.strip, @scalar.to_s)
[
[
[
1,
2,
3
]
]
]
LIST
end

def test_value
Expand Down
15 changes: 14 additions & 1 deletion c_glib/test/test-map-scalar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,20 @@ def test_equal
end

def test_to_s
assert_equal("...", @scalar.to_s)
assert_equal(<<-MAP.strip, @scalar.to_s)
[
keys:
[
"hello",
"world"
]
values:
[
1,
2
]
]
MAP
end

def test_value
Expand Down
3 changes: 1 addition & 2 deletions cpp/examples/arrow/compute_register_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,7 @@ const cp::FunctionDoc func_doc{
int main(int argc, char** argv) {
const std::string name = "compute_register_example";
auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(), func_doc);
cp::ScalarKernel kernel({cp::InputType::Array(arrow::int64())}, arrow::int64(),
ExampleFunctionImpl);
cp::ScalarKernel kernel({arrow::int64()}, arrow::int64(), ExampleFunctionImpl);
kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
ABORT_ON_FAILURE(func->AddKernel(std::move(kernel)));

Expand Down
6 changes: 2 additions & 4 deletions cpp/examples/arrow/udf_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,8 @@ arrow::Status SampleFunction(cp::KernelContext* ctx, const cp::ExecSpan& batch,
arrow::Status Execute() {
const std::string name = "add_three";
auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Ternary(), func_doc);
cp::ScalarKernel kernel(
{cp::InputType::Array(arrow::int64()), cp::InputType::Array(arrow::int64()),
cp::InputType::Array(arrow::int64())},
arrow::int64(), SampleFunction);
cp::ScalarKernel kernel({arrow::int64(), arrow::int64(), arrow::int64()},
arrow::int64(), SampleFunction);

kernel.mem_allocation = cp::MemAllocation::PREALLOCATE;
kernel.null_handling = cp::NullHandling::INTERSECTION;
Expand Down
40 changes: 32 additions & 8 deletions cpp/gdb_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,13 +1406,12 @@ class FixedSizeBinaryScalarPrinter(BaseBinaryScalarPrinter):

def to_string(self):
size = self.type['byte_width_']
if not self.is_valid:
return f"{self._format_type()} of size {size}, null value"
bufptr = BufferPtr(SharedPtr(self.val['value']).get())
if bufptr.data is None:
return f"{self._format_type()} of size {size}, <unallocated>"
nullness = '' if self.is_valid else 'null with '
return (f"{self._format_type()} of size {size}, "
f"value {self._format_buf(bufptr)}")
f"{nullness}value {self._format_buf(bufptr)}")


class DictionaryScalarPrinter(ScalarPrinter):
Expand Down Expand Up @@ -1450,6 +1449,8 @@ def display_hint(self):
return 'map'

def children(self):
if not self.is_valid:
return None
eval_fields = StdVector(self.type['children_'])
eval_values = StdVector(self.val['value'])
for field, value in zip(eval_fields, eval_values):
Expand All @@ -1463,7 +1464,24 @@ def to_string(self):
return f"{self._format_type()}"


class UnionScalarPrinter(ScalarPrinter):
class SparseUnionScalarPrinter(ScalarPrinter):
"""
Pretty-printer for arrow::UnionScalar and subclasses.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Pretty-printer for arrow::UnionScalar and subclasses.
Pretty-printer for arrow::SparseUnionScalar.

"""

def to_string(self):
type_code = self.val['type_code'].cast(gdb.lookup_type('int'))
if not self.is_valid:
return (f"{self._format_type()} of type {self.type}, "
f"type code {type_code}, null value")
eval_values = StdVector(self.val['value'])
child_id = self.val['child_id'].cast(gdb.lookup_type('int'))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the cast is needed as it's already a C int?

(note: calls to gdb evaluation of C stuff can be costly, so this is not just pedantic)

return (f"{self._format_type()} of type code {type_code}, "
f"value {deref(eval_values[child_id])}")



class DenseUnionScalarPrinter(ScalarPrinter):
"""
Pretty-printer for arrow::UnionScalar and subclasses.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Pretty-printer for arrow::UnionScalar and subclasses.
Pretty-printer for arrow::DenseUnionScalar and subclasses.

"""
Expand Down Expand Up @@ -1968,10 +1986,16 @@ class StructTypeClass(DataTypeClass):
scalar_printer = StructScalarPrinter


class UnionTypeClass(DataTypeClass):
class DenseUnionTypeClass(DataTypeClass):
is_parametric = True
type_printer = UnionTypePrinter
scalar_printer = DenseUnionScalarPrinter


class SparseUnionTypeClass(DataTypeClass):
is_parametric = True
type_printer = UnionTypePrinter
scalar_printer = UnionScalarPrinter
scalar_printer = SparseUnionScalarPrinter


class DictionaryTypeClass(DataTypeClass):
Expand Down Expand Up @@ -2037,8 +2061,8 @@ class ExtensionTypeClass(DataTypeClass):
Type.MAP: DataTypeTraits(MapTypeClass, 'MapType'),

Type.STRUCT: DataTypeTraits(StructTypeClass, 'StructType'),
Type.SPARSE_UNION: DataTypeTraits(UnionTypeClass, 'SparseUnionType'),
Type.DENSE_UNION: DataTypeTraits(UnionTypeClass, 'DenseUnionType'),
Type.SPARSE_UNION: DataTypeTraits(SparseUnionTypeClass, 'SparseUnionType'),
Type.DENSE_UNION: DataTypeTraits(DenseUnionTypeClass, 'DenseUnionType'),

Type.DICTIONARY: DataTypeTraits(DictionaryTypeClass, 'DictionaryType'),
Type.EXTENSION: DataTypeTraits(ExtensionTypeClass, 'ExtensionType'),
Expand Down
23 changes: 9 additions & 14 deletions cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,15 @@ struct ScalarFromArraySlotImpl {
}

Status Visit(const SparseUnionArray& a) {
const auto type_code = a.type_code(index_);
// child array which stores the actual value
const auto arr = a.field(a.child_id(index_));
// no need to adjust the index
ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(index_));
if (value->is_valid) {
out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, type_code, a.type()));
} else {
out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(type_code, a.type()));
int8_t type_code = a.type_code(index_);

ScalarVector children;
for (int i = 0; i < a.type()->num_fields(); ++i) {
children.emplace_back();
ARROW_ASSIGN_OR_RAISE(children.back(), a.field(i)->GetScalar(index_));
}

out_ = std::make_shared<SparseUnionScalar>(std::move(children), type_code, a.type());
return Status::OK();
}

Expand All @@ -124,11 +123,7 @@ struct ScalarFromArraySlotImpl {
// need to look up the value based on offsets
auto offset = a.value_offset(index_);
ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(offset));
if (value->is_valid) {
out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, type_code, a.type()));
} else {
out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(type_code, a.type()));
}
out_ = std::make_shared<DenseUnionScalar>(value, type_code, a.type());
return Status::OK();
}

Expand Down
12 changes: 6 additions & 6 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -561,16 +561,16 @@ static ScalarVector GetScalars() {
},
struct_({field("min", int32()), field("max", int32())})),
// Same values, different union type codes
std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 6,
sparse_union_ty),
std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 42,
sparse_union_ty),
std::make_shared<SparseUnionScalar>(42, sparse_union_ty),
SparseUnionScalar::FromValue(std::make_shared<Int32Scalar>(100), 1,
sparse_union_ty),
SparseUnionScalar::FromValue(std::make_shared<Int32Scalar>(100), 2,
sparse_union_ty),
SparseUnionScalar::FromValue(MakeNullScalar(int32()), 2, sparse_union_ty),
std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 6,
dense_union_ty),
std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 42,
dense_union_ty),
std::make_shared<DenseUnionScalar>(42, dense_union_ty),
std::make_shared<DenseUnionScalar>(MakeNullScalar(int32()), 42, dense_union_ty),
DictionaryScalar::Make(ScalarFromJSON(int8(), "1"),
ArrayFromJSON(utf8(), R"(["foo", "bar"])")),
DictionaryScalar::Make(ScalarFromJSON(uint8(), "1"),
Expand Down
Loading