Skip to content

Commit 7a36fcc

Browse files
hiroyuki-satokou
andauthored
GH-44810: [C++][Parquet] Add arrow::Result version of parquet::arrow::FileReader::Make() (#48285)
### Rationale for this change `FileReader::Make` previously returned `Status` and required callers to pass an `out` parameter. ### What changes are included in this PR? Introduce a `Result<std::unique_ptr<FileReader>>` returning API to allow clearer error propagation ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. Status version `FileReader::Make` has been deprecated. ```cpp static ::arrow::Status Make(::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> reader, const ArrowReaderProperties& properties, std::unique_ptr<FileReader>* out); static ::arrow::Status Make(::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> reader, std::unique_ptr<FileReader>* out); ``` * GitHub Issue: #44810 Lead-authored-by: Hiroyuki Sato <[email protected]> Co-authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 6456944 commit 7a36fcc

File tree

7 files changed

+120
-90
lines changed

7 files changed

+120
-90
lines changed

cpp/src/arrow/dataset/file_parquet.cc

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -509,9 +509,9 @@ Result<std::shared_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
509509
std::shared_ptr<parquet::FileMetaData> reader_metadata = reader->metadata();
510510
auto arrow_properties =
511511
MakeArrowReaderProperties(*this, *reader_metadata, *options, *parquet_scan_options);
512-
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
513-
RETURN_NOT_OK(parquet::arrow::FileReader::Make(
514-
options->pool, std::move(reader), std::move(arrow_properties), &arrow_reader));
512+
ARROW_ASSIGN_OR_RAISE(auto arrow_reader,
513+
parquet::arrow::FileReader::Make(options->pool, std::move(reader),
514+
std::move(arrow_properties)));
515515
// R build with openSUSE155 requires an explicit shared_ptr construction
516516
return std::shared_ptr<parquet::arrow::FileReader>(std::move(arrow_reader));
517517
}
@@ -532,37 +532,37 @@ Future<std::shared_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
532532
source.filesystem(), options->pool);
533533
auto self = checked_pointer_cast<const ParquetFileFormat>(shared_from_this());
534534

535-
return source.OpenAsync().Then(
536-
[self = self, properties = std::move(properties), source = source,
537-
options = options, metadata = metadata,
538-
parquet_scan_options = parquet_scan_options](
539-
const std::shared_ptr<io::RandomAccessFile>& input) mutable {
540-
return parquet::ParquetFileReader::OpenAsync(input, properties, metadata)
541-
.Then(
542-
[=](const std::unique_ptr<parquet::ParquetFileReader>& reader) mutable
543-
-> Result<std::shared_ptr<parquet::arrow::FileReader>> {
544-
auto arrow_properties = MakeArrowReaderProperties(
545-
*self, *reader->metadata(), *options, *parquet_scan_options);
546-
547-
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
548-
RETURN_NOT_OK(parquet::arrow::FileReader::Make(
535+
return source.OpenAsync().Then([self = self, properties = std::move(properties),
536+
source = source, options = options, metadata = metadata,
537+
parquet_scan_options = parquet_scan_options](
538+
const std::shared_ptr<io::RandomAccessFile>&
539+
input) mutable {
540+
return parquet::ParquetFileReader::OpenAsync(input, properties, metadata)
541+
.Then(
542+
[=](const std::unique_ptr<parquet::ParquetFileReader>& reader) mutable
543+
-> Result<std::shared_ptr<parquet::arrow::FileReader>> {
544+
auto arrow_properties = MakeArrowReaderProperties(
545+
*self, *reader->metadata(), *options, *parquet_scan_options);
546+
547+
ARROW_ASSIGN_OR_RAISE(
548+
auto arrow_reader,
549+
parquet::arrow::FileReader::Make(
549550
options->pool,
550551
// TODO(ARROW-12259): workaround since we have Future<(move-only
551552
// type)> It *wouldn't* be safe to const_cast reader except that
552553
// here we know there are no other waiters on the reader.
553554
std::move(const_cast<std::unique_ptr<parquet::ParquetFileReader>&>(
554555
reader)),
555-
arrow_properties, &arrow_reader));
556-
557-
// R build with openSUSE155 requires an explicit shared_ptr construction
558-
return std::shared_ptr<parquet::arrow::FileReader>(
559-
std::move(arrow_reader));
560-
},
561-
[path = source.path()](const Status& status)
562-
-> Result<std::shared_ptr<parquet::arrow::FileReader>> {
563-
return WrapSourceError(status, path);
564-
});
565-
});
556+
arrow_properties));
557+
558+
// R build with openSUSE155 requires an explicit shared_ptr construction
559+
return std::shared_ptr<parquet::arrow::FileReader>(std::move(arrow_reader));
560+
},
561+
[path = source.path()](const Status& status)
562+
-> Result<std::shared_ptr<parquet::arrow::FileReader>> {
563+
return WrapSourceError(status, path);
564+
});
565+
});
566566
}
567567

568568
struct SlicingGenerator {

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4182,12 +4182,13 @@ void TryReadDataFile(const std::string& path,
41824182
const std::string& expected_message = "") {
41834183
auto pool = ::arrow::default_memory_pool();
41844184

4185-
std::unique_ptr<FileReader> arrow_reader;
4186-
Status s =
4187-
FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &arrow_reader);
4188-
if (s.ok()) {
4185+
Status s;
4186+
auto reader_result = FileReader::Make(pool, ParquetFileReader::OpenFile(path, false));
4187+
if (reader_result.ok()) {
41894188
std::shared_ptr<::arrow::Table> table;
4190-
s = arrow_reader->ReadTable(&table);
4189+
s = (*reader_result)->ReadTable(&table);
4190+
} else {
4191+
s = reader_result.status();
41914192
}
41924193

41934194
ASSERT_EQ(s.code(), expected_code)
@@ -4259,14 +4260,15 @@ TEST(TestArrowReaderAdHoc, LARGE_MEMORY_TEST(LargeStringColumn)) {
42594260
array.reset();
42604261

42614262
auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(tables_buffer));
4262-
std::unique_ptr<FileReader> arrow_reader;
4263-
ASSERT_OK(FileReader::Make(default_memory_pool(), std::move(reader), &arrow_reader));
4263+
ASSERT_OK_AND_ASSIGN(auto arrow_reader,
4264+
FileReader::Make(default_memory_pool(), std::move(reader)));
42644265
ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
42654266
ASSERT_OK(table->ValidateFull());
42664267

42674268
// ARROW-9297: ensure RecordBatchReader also works
42684269
reader = ParquetFileReader::Open(std::make_shared<BufferReader>(tables_buffer));
4269-
ASSERT_OK(FileReader::Make(default_memory_pool(), std::move(reader), &arrow_reader));
4270+
ASSERT_OK_AND_ASSIGN(arrow_reader,
4271+
FileReader::Make(default_memory_pool(), std::move(reader)));
42704272
ASSERT_OK_AND_ASSIGN(auto batch_reader, arrow_reader->GetRecordBatchReader());
42714273
ASSERT_OK_AND_ASSIGN(auto batched_table,
42724274
::arrow::Table::FromRecordBatchReader(batch_reader.get()));
@@ -4362,9 +4364,8 @@ TEST(TestArrowReaderAdHoc, LegacyTwoLevelList) {
43624364
ASSERT_EQ(kExpectedLegacyList, nodeStr.str());
43634365

43644366
// Verify Arrow schema and data
4365-
std::unique_ptr<FileReader> reader;
4366-
ASSERT_OK_NO_THROW(
4367-
FileReader::Make(default_memory_pool(), std::move(file_reader), &reader));
4367+
ASSERT_OK_AND_ASSIGN(auto reader,
4368+
FileReader::Make(default_memory_pool(), std::move(file_reader)));
43684369
std::shared_ptr<Table> table;
43694370
ASSERT_OK(reader->ReadTable(&table));
43704371
ASSERT_OK(table->ValidateFull());
@@ -4427,9 +4428,8 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) {
44274428

44284429
auto pool = ::arrow::default_memory_pool();
44294430

4430-
std::unique_ptr<FileReader> arrow_reader;
4431-
ASSERT_OK_NO_THROW(
4432-
FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &arrow_reader));
4431+
ASSERT_OK_AND_ASSIGN(auto arrow_reader,
4432+
FileReader::Make(pool, ParquetFileReader::OpenFile(path, false)));
44334433
std::shared_ptr<::arrow::Table> table;
44344434
ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
44354435

@@ -4494,9 +4494,8 @@ TEST(TestArrowReaderAdHoc, ReadFloat16Files) {
44944494
path += "/" + tc.filename + ".parquet";
44954495
ARROW_SCOPED_TRACE("path = ", path);
44964496

4497-
std::unique_ptr<FileReader> reader;
4498-
ASSERT_OK_NO_THROW(
4499-
FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &reader));
4497+
ASSERT_OK_AND_ASSIGN(
4498+
auto reader, FileReader::Make(pool, ParquetFileReader::OpenFile(path, false)));
45004499
std::shared_ptr<::arrow::Table> table;
45014500
ASSERT_OK_NO_THROW(reader->ReadTable(&table));
45024501

@@ -4539,9 +4538,8 @@ TEST(TestArrowFileReader, RecordBatchReaderEmptyRowGroups) {
45394538
default_arrow_writer_properties(), &buffer));
45404539

45414540
auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
4542-
std::unique_ptr<FileReader> file_reader;
4543-
ASSERT_OK(
4544-
FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
4541+
ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(),
4542+
std::move(reader)));
45454543
// This is the important part in this test.
45464544
std::vector<int> row_group_indices = {};
45474545
ASSERT_OK_AND_ASSIGN(auto record_batch_reader,
@@ -4567,9 +4565,8 @@ TEST(TestArrowFileReader, RecordBatchReaderEmptyInput) {
45674565
default_arrow_writer_properties(), &buffer));
45684566

45694567
auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
4570-
std::unique_ptr<FileReader> file_reader;
4571-
ASSERT_OK(
4572-
FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
4568+
ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(),
4569+
std::move(reader)));
45734570
ASSERT_OK_AND_ASSIGN(auto record_batch_reader, file_reader->GetRecordBatchReader());
45744571
std::shared_ptr<::arrow::RecordBatch> record_batch;
45754572
ASSERT_OK(record_batch_reader->ReadNext(&record_batch));
@@ -4591,9 +4588,8 @@ TEST(TestArrowColumnReader, NextBatchZeroBatchSize) {
45914588
default_arrow_writer_properties(), &buffer));
45924589

45934590
auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
4594-
std::unique_ptr<FileReader> file_reader;
4595-
ASSERT_OK(
4596-
FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
4591+
ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(),
4592+
std::move(reader)));
45974593
std::unique_ptr<arrow::ColumnReader> column_reader;
45984594
ASSERT_OK(file_reader->GetColumn(0, &column_reader));
45994595
std::shared_ptr<ChunkedArray> chunked_array;
@@ -4617,9 +4613,8 @@ TEST(TestArrowColumnReader, NextBatchEmptyInput) {
46174613
default_arrow_writer_properties(), &buffer));
46184614

46194615
auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
4620-
std::unique_ptr<FileReader> file_reader;
4621-
ASSERT_OK(
4622-
FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
4616+
ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(),
4617+
std::move(reader)));
46234618
std::unique_ptr<arrow::ColumnReader> column_reader;
46244619
ASSERT_OK(file_reader->GetColumn(0, &column_reader));
46254620
std::shared_ptr<ChunkedArray> chunked_array;
@@ -5141,9 +5136,9 @@ class TestArrowReadDeltaEncoding : public ::testing::Test {
51415136
std::shared_ptr<Table>* out) {
51425137
auto file = test::get_data_file(file_name);
51435138
auto pool = ::arrow::default_memory_pool();
5144-
std::unique_ptr<FileReader> parquet_reader;
5145-
ASSERT_OK(FileReader::Make(pool, ParquetFileReader::OpenFile(file, false),
5146-
&parquet_reader));
5139+
ASSERT_OK_AND_ASSIGN(
5140+
auto parquet_reader,
5141+
FileReader::Make(pool, ParquetFileReader::OpenFile(file, false)));
51475142
ASSERT_OK(parquet_reader->ReadTable(out));
51485143
ASSERT_OK((*out)->ValidateFull());
51495144
}
@@ -5201,9 +5196,9 @@ TEST_F(TestArrowReadDeltaEncoding, IncrementalDecodeDeltaByteArray) {
52015196
const int64_t batch_size = 100;
52025197
ArrowReaderProperties properties = default_arrow_reader_properties();
52035198
properties.set_batch_size(batch_size);
5204-
std::unique_ptr<FileReader> parquet_reader;
5205-
ASSERT_OK(FileReader::Make(pool, ParquetFileReader::OpenFile(file, false), properties,
5206-
&parquet_reader));
5199+
ASSERT_OK_AND_ASSIGN(
5200+
auto parquet_reader,
5201+
FileReader::Make(pool, ParquetFileReader::OpenFile(file, false), properties));
52075202
ASSERT_OK_AND_ASSIGN(auto rb_reader, parquet_reader->GetRecordBatchReader());
52085203

52095204
auto convert_options = ::arrow::csv::ConvertOptions::Defaults();
@@ -5729,8 +5724,8 @@ TEST(TestArrowReadWrite, WriteAndReadRecordBatch) {
57295724
auto read_properties = default_arrow_reader_properties();
57305725
read_properties.set_batch_size(record_batch->num_rows());
57315726
auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
5732-
std::unique_ptr<FileReader> arrow_reader;
5733-
ASSERT_OK(FileReader::Make(pool, std::move(reader), read_properties, &arrow_reader));
5727+
ASSERT_OK_AND_ASSIGN(auto arrow_reader,
5728+
FileReader::Make(pool, std::move(reader), read_properties));
57345729

57355730
// Verify the single record batch has been sliced into two row groups by
57365731
// WriterProperties::max_row_group_length().

cpp/src/parquet/arrow/arrow_statistics_test.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,9 @@ ::arrow::Result<std::shared_ptr<::arrow::Array>> StatisticsReadArray(
202202

203203
auto reader =
204204
ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
205-
std::unique_ptr<FileReader> file_reader;
206-
ARROW_RETURN_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
207-
reader_properties, &file_reader));
205+
ARROW_ASSIGN_OR_RAISE(auto file_reader,
206+
FileReader::Make(::arrow::default_memory_pool(),
207+
std::move(reader), reader_properties));
208208
std::shared_ptr<::arrow::ChunkedArray> chunked_array;
209209
ARROW_RETURN_NOT_OK(file_reader->ReadColumn(0, &chunked_array));
210210
return chunked_array->chunk(0);

cpp/src/parquet/arrow/fuzz_internal.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,10 @@ Status FuzzReader(const uint8_t* data, int64_t size) {
281281
pq_file_reader = ParquetFileReader::Open(file, reader_properties, pq_md);
282282
END_PARQUET_CATCH_EXCEPTIONS
283283

284-
std::unique_ptr<FileReader> reader;
285-
RETURN_NOT_OK(FileReader::Make(pool, std::move(pq_file_reader), properties, &reader));
284+
auto arrow_reader_result =
285+
FileReader::Make(pool, std::move(pq_file_reader), properties);
286+
RETURN_NOT_OK(arrow_reader_result.status());
287+
auto reader = std::move(*arrow_reader_result);
286288
st &= FuzzReadData(std::move(reader));
287289
}
288290
return st;

cpp/src/parquet/arrow/reader.cc

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,14 +1343,30 @@ Status FileReader::Make(::arrow::MemoryPool* pool,
13431343
std::unique_ptr<ParquetFileReader> reader,
13441344
const ArrowReaderProperties& properties,
13451345
std::unique_ptr<FileReader>* out) {
1346-
*out = std::make_unique<FileReaderImpl>(pool, std::move(reader), properties);
1347-
return static_cast<FileReaderImpl*>(out->get())->Init();
1346+
ARROW_ASSIGN_OR_RAISE(*out, Make(pool, std::move(reader), properties));
1347+
return Status::OK();
13481348
}
13491349

13501350
Status FileReader::Make(::arrow::MemoryPool* pool,
13511351
std::unique_ptr<ParquetFileReader> reader,
13521352
std::unique_ptr<FileReader>* out) {
1353-
return Make(pool, std::move(reader), default_arrow_reader_properties(), out);
1353+
ARROW_ASSIGN_OR_RAISE(*out,
1354+
Make(pool, std::move(reader), default_arrow_reader_properties()));
1355+
return Status::OK();
1356+
}
1357+
1358+
Result<std::unique_ptr<FileReader>> FileReader::Make(
1359+
::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> parquet_reader,
1360+
const ArrowReaderProperties& properties) {
1361+
std::unique_ptr<FileReader> reader =
1362+
std::make_unique<FileReaderImpl>(pool, std::move(parquet_reader), properties);
1363+
RETURN_NOT_OK(static_cast<FileReaderImpl*>(reader.get())->Init());
1364+
return reader;
1365+
}
1366+
1367+
Result<std::unique_ptr<FileReader>> FileReader::Make(
1368+
::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> parquet_reader) {
1369+
return Make(pool, std::move(parquet_reader), default_arrow_reader_properties());
13541370
}
13551371

13561372
FileReaderBuilder::FileReaderBuilder()
@@ -1385,13 +1401,13 @@ FileReaderBuilder* FileReaderBuilder::properties(
13851401
}
13861402

13871403
Status FileReaderBuilder::Build(std::unique_ptr<FileReader>* out) {
1388-
return FileReader::Make(pool_, std::move(raw_reader_), properties_, out);
1404+
ARROW_ASSIGN_OR_RAISE(*out,
1405+
FileReader::Make(pool_, std::move(raw_reader_), properties_));
1406+
return Status::OK();
13891407
}
13901408

13911409
Result<std::unique_ptr<FileReader>> FileReaderBuilder::Build() {
1392-
std::unique_ptr<FileReader> out;
1393-
RETURN_NOT_OK(FileReader::Make(pool_, std::move(raw_reader_), properties_, &out));
1394-
return out;
1410+
return FileReader::Make(pool_, std::move(raw_reader_), properties_);
13951411
}
13961412

13971413
Result<std::unique_ptr<FileReader>> OpenFile(

cpp/src/parquet/arrow/reader.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,16 +116,29 @@ class RowGroupReader;
116116
class PARQUET_EXPORT FileReader {
117117
public:
118118
/// Factory function to create a FileReader from a ParquetFileReader and properties
119+
/// \deprecated Deprecated in 23.0.0. Use arrow::Result version instead.
120+
ARROW_DEPRECATED("Deprecated in 23.0.0. Use arrow::Result version instead.")
119121
static ::arrow::Status Make(::arrow::MemoryPool* pool,
120122
std::unique_ptr<ParquetFileReader> reader,
121123
const ArrowReaderProperties& properties,
122124
std::unique_ptr<FileReader>* out);
123125

124126
/// Factory function to create a FileReader from a ParquetFileReader
127+
/// \deprecated Deprecated in 23.0.0. Use arrow::Result version instead.
128+
ARROW_DEPRECATED("Deprecated in 23.0.0. Use arrow::Result version instead.")
125129
static ::arrow::Status Make(::arrow::MemoryPool* pool,
126130
std::unique_ptr<ParquetFileReader> reader,
127131
std::unique_ptr<FileReader>* out);
128132

133+
/// Factory function to create a FileReader from a ParquetFileReader and properties
134+
static ::arrow::Result<std::unique_ptr<FileReader>> Make(
135+
::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> reader,
136+
const ArrowReaderProperties& properties);
137+
138+
/// Factory function to create a FileReader from a ParquetFileReader
139+
static ::arrow::Result<std::unique_ptr<FileReader>> Make(
140+
::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> reader);
141+
129142
// Since the distribution of columns amongst a Parquet file's row groups may
130143
// be uneven (the number of values in each column chunk can be different), we
131144
// provide a column-oriented read interface. The ColumnReader hides the

0 commit comments

Comments
 (0)