Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/parquet/column/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
install(FILES
page.h
levels.h
properties.h
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed this in the previous patch

reader.h
scanner.h
writer.h
Expand Down
1 change: 0 additions & 1 deletion src/parquet/column/properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

#include "parquet/util/input.h"
#include "parquet/util/mem-allocator.h"
#include "parquet/types.h"

namespace parquet {

Expand Down
16 changes: 12 additions & 4 deletions src/parquet/schema/descriptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "parquet/schema/descriptor.h"

#include "parquet/exception.h"
#include "parquet/util/logging.h"

namespace parquet {

Expand All @@ -42,12 +43,12 @@ void SchemaDescriptor::Init(const NodePtr& schema) {
leaves_.clear();

for (int i = 0; i < group_->field_count(); ++i) {
BuildTree(group_->field(i), 0, 0);
BuildTree(group_->field(i), 0, 0, group_->field(i));
}
}

void SchemaDescriptor::BuildTree(
const NodePtr& node, int16_t max_def_level, int16_t max_rep_level) {
void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level,
int16_t max_rep_level, const NodePtr& base) {
if (node->is_optional()) {
++max_def_level;
} else if (node->is_repeated()) {
Expand All @@ -61,11 +62,12 @@ void SchemaDescriptor::BuildTree(
if (node->is_group()) {
const GroupNode* group = static_cast<const GroupNode*>(node.get());
for (int i = 0; i < group->field_count(); ++i) {
BuildTree(group->field(i), max_def_level, max_rep_level);
BuildTree(group->field(i), max_def_level, max_rep_level, base);
}
} else {
// Primitive node, append to leaves
leaves_.push_back(ColumnDescriptor(node, max_def_level, max_rep_level, this));
leaf_to_base_.emplace(leaves_.size() - 1, base);
}
}

Expand All @@ -81,9 +83,15 @@ ColumnDescriptor::ColumnDescriptor(const schema::NodePtr& node,
}

const ColumnDescriptor* SchemaDescriptor::Column(int i) const {
DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
return &leaves_[i];
}

const schema::NodePtr& SchemaDescriptor::GetColumnRoot(int i) const {
DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
return leaf_to_base_.find(i)->second;
}

int ColumnDescriptor::type_scale() const {
return primitive_node_->decimal_metadata().scale;
}
Expand Down
11 changes: 8 additions & 3 deletions src/parquet/schema/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,19 @@ class SchemaDescriptor {

const schema::NodePtr& schema() const { return schema_; }

const schema::GroupNode* group() const { return group_; }

// Returns the root (child of the schema root) node of the leaf(column) node
const schema::NodePtr& GetColumnRoot(int i) const;

private:
friend class ColumnDescriptor;

schema::NodePtr schema_;
const schema::GroupNode* group_;

void BuildTree(
const schema::NodePtr& node, int16_t max_def_level, int16_t max_rep_level);
void BuildTree(const schema::NodePtr& node, int16_t max_def_level,
int16_t max_rep_level, const schema::NodePtr& base);

// Result of leaf node / tree analysis
std::vector<ColumnDescriptor> leaves_;
Expand All @@ -122,7 +127,7 @@ class SchemaDescriptor {
// -- -- b |
// -- -- -- c |
// -- -- -- -- d
std::unordered_map<int, schema::NodePtr> leaf_to_base_;
std::unordered_map<int, const schema::NodePtr> leaf_to_base_;
};

} // namespace parquet
Expand Down
10 changes: 9 additions & 1 deletion src/parquet/schema/schema-descriptor-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ TEST_F(TestSchemaDescriptor, BuildTree) {
NodeVector fields;
NodePtr schema;

fields.push_back(Int32("a", Repetition::REQUIRED));
NodePtr inta = Int32("a", Repetition::REQUIRED);
fields.push_back(inta);
fields.push_back(Int64("b", Repetition::OPTIONAL));
fields.push_back(ByteArray("c", Repetition::REPEATED));

Expand Down Expand Up @@ -122,6 +123,13 @@ TEST_F(TestSchemaDescriptor, BuildTree) {
ASSERT_EQ(descr_.Column(4)->path()->ToDotString(), "bag.records.item2");
ASSERT_EQ(descr_.Column(5)->path()->ToDotString(), "bag.records.item3");

ASSERT_EQ(inta.get(), descr_.GetColumnRoot(0).get());
ASSERT_EQ(bag.get(), descr_.GetColumnRoot(3).get());
ASSERT_EQ(bag.get(), descr_.GetColumnRoot(4).get());
ASSERT_EQ(bag.get(), descr_.GetColumnRoot(5).get());

ASSERT_EQ(schema.get(), descr_.group());

// Init clears the leaves
descr_.Init(schema);
ASSERT_EQ(nleaves, descr_.num_columns());
Expand Down