|
32 | 32 | #include "arrow/datum.h" |
33 | 33 | #include "arrow/record_batch.h" |
34 | 34 | #include "arrow/result.h" |
| 35 | +#include "arrow/table.h" |
35 | 36 | #include "arrow/util/async_generator.h" |
36 | 37 | #include "arrow/util/checked_cast.h" |
37 | 38 | #include "arrow/util/key_value_metadata.h" |
38 | 39 | #include "arrow/util/logging.h" |
39 | 40 | #include "arrow/util/tracing_internal.h" |
| 41 | +#include "arrow/util/vector.h" |
40 | 42 |
|
41 | 43 | namespace arrow { |
42 | 44 |
|
@@ -555,6 +557,61 @@ bool Declaration::IsValid(ExecFactoryRegistry* registry) const { |
555 | 557 | return !this->factory_name.empty() && this->options != nullptr; |
556 | 558 | } |
557 | 559 |
|
| 560 | +Future<std::shared_ptr<Table>> DeclarationToTableAsync(Declaration declaration, |
| 561 | + ExecContext* exec_context) { |
| 562 | + std::shared_ptr<std::shared_ptr<Table>> output_table = |
| 563 | + std::make_shared<std::shared_ptr<Table>>(); |
| 564 | + ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ExecPlan> exec_plan, |
| 565 | + ExecPlan::Make(exec_context)); |
| 566 | + Declaration with_sink = Declaration::Sequence( |
| 567 | + {declaration, {"table_sink", TableSinkNodeOptions(output_table.get())}}); |
| 568 | + ARROW_RETURN_NOT_OK(with_sink.AddToPlan(exec_plan.get())); |
| 569 | + ARROW_RETURN_NOT_OK(exec_plan->StartProducing()); |
| 570 | + return exec_plan->finished().Then([exec_plan, output_table] { return *output_table; }); |
| 571 | +} |
| 572 | + |
| 573 | +Result<std::shared_ptr<Table>> DeclarationToTable(Declaration declaration, |
| 574 | + ExecContext* exec_context) { |
| 575 | + return DeclarationToTableAsync(std::move(declaration), exec_context).result(); |
| 576 | +} |
| 577 | + |
| 578 | +Future<std::vector<std::shared_ptr<RecordBatch>>> DeclarationToBatchesAsync( |
| 579 | + Declaration declaration, ExecContext* exec_context) { |
| 580 | + return DeclarationToTableAsync(std::move(declaration), exec_context) |
| 581 | + .Then([](const std::shared_ptr<Table>& table) { |
| 582 | + return TableBatchReader(table).ToRecordBatches(); |
| 583 | + }); |
| 584 | +} |
| 585 | + |
| 586 | +Result<std::vector<std::shared_ptr<RecordBatch>>> DeclarationToBatches( |
| 587 | + Declaration declaration, ExecContext* exec_context) { |
| 588 | + return DeclarationToBatchesAsync(std::move(declaration), exec_context).result(); |
| 589 | +} |
| 590 | + |
| 591 | +Future<std::vector<ExecBatch>> DeclarationToExecBatchesAsync(Declaration declaration, |
| 592 | + ExecContext* exec_context) { |
| 593 | + AsyncGenerator<std::optional<ExecBatch>> sink_gen; |
| 594 | + ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ExecPlan> exec_plan, |
| 595 | + ExecPlan::Make(exec_context)); |
| 596 | + Declaration with_sink = |
| 597 | + Declaration::Sequence({declaration, {"sink", SinkNodeOptions(&sink_gen)}}); |
| 598 | + ARROW_RETURN_NOT_OK(with_sink.AddToPlan(exec_plan.get())); |
| 599 | + ARROW_RETURN_NOT_OK(exec_plan->StartProducing()); |
| 600 | + auto collected_fut = CollectAsyncGenerator(sink_gen); |
| 601 | + return AllComplete({exec_plan->finished(), Future<>(collected_fut)}) |
| 602 | + .Then([collected_fut, exec_plan]() -> Result<std::vector<ExecBatch>> { |
| 603 | + ARROW_ASSIGN_OR_RAISE(auto collected, collected_fut.result()); |
| 604 | + return ::arrow::internal::MapVector( |
| 605 | + [](std::optional<ExecBatch> batch) { return std::move(*batch); }, |
| 606 | + std::move(collected)); |
| 607 | + }); |
| 608 | +} |
| 609 | + |
| 610 | +Result<std::vector<ExecBatch>> DeclarationToExecBatches(Declaration declaration, |
| 611 | + ExecContext* exec_context) { |
| 612 | + return DeclarationToExecBatchesAsync(std::move(declaration), exec_context).result(); |
| 613 | +} |
| 614 | + |
558 | 615 | namespace internal { |
559 | 616 |
|
560 | 617 | void RegisterSourceNode(ExecFactoryRegistry*); |
|
0 commit comments