Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions cpp/src/arrow/parquet/parquet-schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,17 @@

#include "arrow/parquet/schema.h"

using ParquetType = parquet::Type;
using parquet::LogicalType;
using parquet::Repetition;
using parquet::schema::NodePtr;
using parquet::schema::GroupNode;
using parquet::schema::PrimitiveNode;

namespace arrow {

namespace parquet {

using parquet_cpp::Repetition;
using parquet_cpp::schema::NodePtr;
using parquet_cpp::schema::GroupNode;
using parquet_cpp::schema::PrimitiveNode;

const auto BOOL = std::make_shared<BooleanType>();
const auto UINT8 = std::make_shared<UInt8Type>();
const auto INT32 = std::make_shared<Int32Type>();
Expand Down Expand Up @@ -66,7 +68,7 @@ class TestConvertParquetSchema : public ::testing::Test {
}

protected:
parquet_cpp::SchemaDescriptor descr_;
::parquet::SchemaDescriptor descr_;
std::shared_ptr<Schema> result_schema_;
};

Expand All @@ -75,40 +77,40 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
std::vector<std::shared_ptr<Field>> arrow_fields;

parquet_fields.push_back(
PrimitiveNode::Make("boolean", Repetition::REQUIRED, parquet_cpp::Type::BOOLEAN));
PrimitiveNode::Make("boolean", Repetition::REQUIRED, ParquetType::BOOLEAN));
arrow_fields.push_back(std::make_shared<Field>("boolean", BOOL, false));

parquet_fields.push_back(
PrimitiveNode::Make("int32", Repetition::REQUIRED, parquet_cpp::Type::INT32));
PrimitiveNode::Make("int32", Repetition::REQUIRED, ParquetType::INT32));
arrow_fields.push_back(std::make_shared<Field>("int32", INT32, false));

parquet_fields.push_back(
PrimitiveNode::Make("int64", Repetition::REQUIRED, parquet_cpp::Type::INT64));
PrimitiveNode::Make("int64", Repetition::REQUIRED, ParquetType::INT64));
arrow_fields.push_back(std::make_shared<Field>("int64", INT64, false));

parquet_fields.push_back(
PrimitiveNode::Make("float", Repetition::OPTIONAL, parquet_cpp::Type::FLOAT));
PrimitiveNode::Make("float", Repetition::OPTIONAL, ParquetType::FLOAT));
arrow_fields.push_back(std::make_shared<Field>("float", FLOAT));

parquet_fields.push_back(
PrimitiveNode::Make("double", Repetition::OPTIONAL, parquet_cpp::Type::DOUBLE));
PrimitiveNode::Make("double", Repetition::OPTIONAL, ParquetType::DOUBLE));
arrow_fields.push_back(std::make_shared<Field>("double", DOUBLE));

parquet_fields.push_back(
PrimitiveNode::Make("binary", Repetition::OPTIONAL,
parquet_cpp::Type::BYTE_ARRAY));
ParquetType::BYTE_ARRAY));
arrow_fields.push_back(std::make_shared<Field>("binary", BINARY));

parquet_fields.push_back(
PrimitiveNode::Make("string", Repetition::OPTIONAL,
parquet_cpp::Type::BYTE_ARRAY,
parquet_cpp::LogicalType::UTF8));
ParquetType::BYTE_ARRAY,
LogicalType::UTF8));
arrow_fields.push_back(std::make_shared<Field>("string", UTF8));

parquet_fields.push_back(
PrimitiveNode::Make("flba-binary", Repetition::OPTIONAL,
parquet_cpp::Type::FIXED_LEN_BYTE_ARRAY,
parquet_cpp::LogicalType::NONE, 12));
ParquetType::FIXED_LEN_BYTE_ARRAY,
LogicalType::NONE, 12));
arrow_fields.push_back(std::make_shared<Field>("flba-binary", BINARY));

auto arrow_schema = std::make_shared<Schema>(arrow_fields);
Expand All @@ -121,18 +123,18 @@ TEST_F(TestConvertParquetSchema, UnsupportedThings) {
std::vector<NodePtr> unsupported_nodes;

unsupported_nodes.push_back(
PrimitiveNode::Make("int96", Repetition::REQUIRED, parquet_cpp::Type::INT96));
PrimitiveNode::Make("int96", Repetition::REQUIRED, ParquetType::INT96));

unsupported_nodes.push_back(
GroupNode::Make("repeated-group", Repetition::REPEATED, {}));

unsupported_nodes.push_back(
PrimitiveNode::Make("int32", Repetition::OPTIONAL,
parquet_cpp::Type::INT32, parquet_cpp::LogicalType::DATE));
ParquetType::INT32, LogicalType::DATE));

unsupported_nodes.push_back(
PrimitiveNode::Make("int64", Repetition::OPTIONAL,
parquet_cpp::Type::INT64, parquet_cpp::LogicalType::TIMESTAMP_MILLIS));
ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS));

for (const NodePtr& node : unsupported_nodes) {
ASSERT_RAISES(NotImplemented, ConvertSchema({node}));
Expand Down
29 changes: 15 additions & 14 deletions cpp/src/arrow/parquet/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@
#include "arrow/util/status.h"
#include "arrow/types/decimal.h"

using parquet_cpp::schema::Node;
using parquet_cpp::schema::NodePtr;
using parquet_cpp::schema::GroupNode;
using parquet_cpp::schema::PrimitiveNode;
using parquet::schema::Node;
using parquet::schema::NodePtr;
using parquet::schema::GroupNode;
using parquet::schema::PrimitiveNode;

using parquet_cpp::LogicalType;
using ParquetType = parquet::Type;
using parquet::LogicalType;

namespace arrow {

Expand Down Expand Up @@ -124,30 +125,30 @@ Status NodeToField(const NodePtr& node, std::shared_ptr<Field>* out) {
const PrimitiveNode* primitive = static_cast<const PrimitiveNode*>(node.get());

switch (primitive->physical_type()) {
case parquet_cpp::Type::BOOLEAN:
case ParquetType::BOOLEAN:
type = BOOL;
break;
case parquet_cpp::Type::INT32:
case ParquetType::INT32:
RETURN_NOT_OK(FromInt32(primitive, &type));
break;
case parquet_cpp::Type::INT64:
case ParquetType::INT64:
RETURN_NOT_OK(FromInt64(primitive, &type));
break;
case parquet_cpp::Type::INT96:
case ParquetType::INT96:
// TODO: Do we have that type in Arrow?
// type = TypePtr(new Int96Type());
return Status::NotImplemented("int96");
case parquet_cpp::Type::FLOAT:
case ParquetType::FLOAT:
type = FLOAT;
break;
case parquet_cpp::Type::DOUBLE:
case ParquetType::DOUBLE:
type = DOUBLE;
break;
case parquet_cpp::Type::BYTE_ARRAY:
case ParquetType::BYTE_ARRAY:
// TODO: Do we have that type in Arrow?
RETURN_NOT_OK(FromByteArray(primitive, &type));
break;
case parquet_cpp::Type::FIXED_LEN_BYTE_ARRAY:
case ParquetType::FIXED_LEN_BYTE_ARRAY:
RETURN_NOT_OK(FromFLBA(primitive, &type));
break;
}
Expand All @@ -157,7 +158,7 @@ Status NodeToField(const NodePtr& node, std::shared_ptr<Field>* out) {
return Status::OK();
}

Status FromParquetSchema(const parquet_cpp::SchemaDescriptor* parquet_schema,
Status FromParquetSchema(const ::parquet::SchemaDescriptor* parquet_schema,
std::shared_ptr<Schema>* out) {
// TODO(wesm): Consider adding an arrow::Schema name attribute, which comes
// from the root Parquet node
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/parquet/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ class Status;

namespace parquet {

Status NodeToField(const parquet_cpp::schema::NodePtr& node,
Status NodeToField(const ::parquet::schema::NodePtr& node,
std::shared_ptr<Field>* out);

Status FromParquetSchema(const parquet_cpp::SchemaDescriptor* parquet_schema,
Status FromParquetSchema(const ::parquet::SchemaDescriptor* parquet_schema,
std::shared_ptr<Schema>* out);

} // namespace parquet
Expand Down
3 changes: 0 additions & 3 deletions python/pyarrow/array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,4 @@ cdef class Table:
names.append(frombytes(col.get().name()))
data.append(<object> arr)

# One ref count too many
Py_XDECREF(arr)

return pd.DataFrame(dict(zip(names, data)), columns=names)
2 changes: 1 addition & 1 deletion python/pyarrow/includes/parquet.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from pyarrow.includes.common cimport *

cdef extern from "parquet/api/reader.h" namespace "parquet_cpp" nogil:
cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
cdef cppclass ColumnReader:
pass

Expand Down