Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cpp/src/parquet/arrow/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,11 @@ Status ArrowColumnWriter::TypedWriteBatch<FLBAType, ::arrow::Decimal128Type>(
}

Status ArrowColumnWriter::Write(const Array& data) {
if (data.length() == 0) {
// Write nothing when length is 0
return Status::OK();
}

::arrow::Type::type values_type;
RETURN_NOT_OK(GetLeafType(*data.type(), &values_type));

Expand Down
33 changes: 31 additions & 2 deletions python/pyarrow/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from collections import OrderedDict
import datetime
import decimal
import io
Expand Down Expand Up @@ -2224,6 +2225,34 @@ def test_merging_parquet_tables_with_different_pandas_metadata(tempdir):

def test_writing_empty_lists():
# ARROW-2591: [Python] Segmentation fault issue in pq.write_table
arr = pa.array([[], []], pa.list_(pa.int32()))
table = pa.Table.from_arrays([arr], ['test'])
arr1 = pa.array([[], []], pa.list_(pa.int32()))
table = pa.Table.from_arrays([arr1], ['list(int32)'])
_check_roundtrip(table)


def test_write_nested_zero_length_array_chunk_failure():
# Bug report in ARROW-3792
cols = OrderedDict(
int32=pa.int32(),
list_string=pa.list_(pa.string())
)
data = [[], [OrderedDict(int32=1, list_string=('G',)), ]]

# This produces a table with a column like
# <Column name='list_string' type=ListType(list<item: string>)>
# [
# [],
# [
# [
# "G"
# ]
# ]
# ]
#
# Each column is a ChunkedArray with 2 elements
my_arrays = [pa.array(batch, type=pa.struct(cols)).flatten()
for batch in data]
my_batches = [pa.RecordBatch.from_arrays(batch, pa.schema(cols))
for batch in my_arrays]
tbl = pa.Table.from_batches(my_batches, pa.schema(cols))
_check_roundtrip(tbl)