Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cpp/src/parquet/arrow/arrow_reader_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3443,8 +3443,8 @@ TEST(ArrowReadWrite, Decimal256AsInt) {
auto table = ::arrow::Table::Make(::arrow::schema({field("root", type)}), {array});

parquet::WriterProperties::Builder builder;
// Enforce integer type to annotate decimal type
auto writer_properties = builder.enable_integer_annotate_decimal()->build();
// Allow small decimals to be stored as int32 or int64.
auto writer_properties = builder.enable_store_decimal_as_integer()->build();
auto props_store_schema = ArrowWriterProperties::Builder().store_schema()->build();

CheckConfiguredRoundtrip(table, table, writer_properties, props_store_schema);
Expand Down Expand Up @@ -4821,8 +4821,8 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO<TestType> {
auto arrow_schema = ::arrow::schema({::arrow::field("a", values->type())});

parquet::WriterProperties::Builder builder;
// Enforce integer type to annotate decimal type
auto writer_properties = builder.enable_integer_annotate_decimal()->build();
// Allow small decimals to be stored as int32 or int64.
auto writer_properties = builder.enable_store_decimal_as_integer()->build();
std::shared_ptr<SchemaDescriptor> parquet_schema;
ASSERT_OK_NO_THROW(ToParquetSchema(arrow_schema.get(), *writer_properties,
*default_arrow_writer_properties(),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/arrow/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
const auto& decimal_type = static_cast<const ::arrow::DecimalType&>(*field->type());
precision = decimal_type.precision();
scale = decimal_type.scale();
if (properties.integer_annotate_decimal() && 1 <= precision && precision <= 18) {
if (properties.store_decimal_as_integer() && 1 <= precision && precision <= 18) {
type = precision <= 9 ? ParquetType ::INT32 : ParquetType ::INT64;
} else {
type = ParquetType::FIXED_LEN_BYTE_ARRAY;
Expand Down
48 changes: 32 additions & 16 deletions cpp/src/parquet/properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ class PARQUET_EXPORT WriterProperties {
version_(ParquetVersion::PARQUET_2_4),
data_page_version_(ParquetDataPageVersion::V1),
created_by_(DEFAULT_CREATED_BY),
integer_annotate_decimal_(false) {}
store_decimal_as_integer_(false) {}
virtual ~Builder() {}

/// Specify the memory pool for the writer. Default default_memory_pool.
Expand Down Expand Up @@ -452,19 +452,35 @@ class PARQUET_EXPORT WriterProperties {
return this->disable_statistics(path->ToDotString());
}

/// Enable integer type to annotate decimal type as below:
/// int32: 1 <= precision <= 9
/// int64: 10 <= precision <= 18
/// Default disabled.
Builder* enable_integer_annotate_decimal() {
integer_annotate_decimal_ = true;
/// Allow decimals with 1 <= precision <= 18 to be stored as integers.
///
/// In Parquet, DECIMAL can be stored in any of the following physical types:
/// - int32: for 1 <= precision <= 9.
/// - int64: for 10 <= precision <= 18.
/// - fixed_len_byte_array: precision is limited by the array size.
/// Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
/// - binary: precision is unlimited. The minimum number of bytes to store
/// the unscaled value is used.
///
/// By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
///
/// When enabled, the C++ writer will use following physical types to store decimals:
/// - int32: for 1 <= precision <= 9.
/// - int64: for 10 <= precision <= 18.
/// - fixed_len_byte_array: for precision > 18.
///
/// As a consequence, decimal columns stored in integer types are more compact.
Builder* enable_store_decimal_as_integer() {
store_decimal_as_integer_ = true;
return this;
}

/// Disable integer type to annotate decimal type.
/// Disable decimal logical type with 1 <= precision <= 18 to be stored as
/// integer physical type.
///
/// Default disabled.
Builder* disable_integer_annotate_decimal() {
integer_annotate_decimal_ = false;
Builder* disable_store_decimal_as_integer() {
store_decimal_as_integer_ = false;
return this;
}

Expand Down Expand Up @@ -493,7 +509,7 @@ class PARQUET_EXPORT WriterProperties {
pool_, dictionary_pagesize_limit_, write_batch_size_, max_row_group_length_,
pagesize_, version_, created_by_, std::move(file_encryption_properties_),
default_column_properties_, column_properties, data_page_version_,
integer_annotate_decimal_));
store_decimal_as_integer_));
}

private:
Expand All @@ -505,7 +521,7 @@ class PARQUET_EXPORT WriterProperties {
ParquetVersion::type version_;
ParquetDataPageVersion data_page_version_;
std::string created_by_;
bool integer_annotate_decimal_;
bool store_decimal_as_integer_;

std::shared_ptr<FileEncryptionProperties> file_encryption_properties_;

Expand Down Expand Up @@ -536,7 +552,7 @@ class PARQUET_EXPORT WriterProperties {

inline std::string created_by() const { return parquet_created_by_; }

inline bool integer_annotate_decimal() const { return integer_annotate_decimal_; }
inline bool store_decimal_as_integer() const { return store_decimal_as_integer_; }

inline Encoding::type dictionary_index_encoding() const {
if (parquet_version_ == ParquetVersion::PARQUET_1_0) {
Expand Down Expand Up @@ -606,7 +622,7 @@ class PARQUET_EXPORT WriterProperties {
std::shared_ptr<FileEncryptionProperties> file_encryption_properties,
const ColumnProperties& default_column_properties,
const std::unordered_map<std::string, ColumnProperties>& column_properties,
ParquetDataPageVersion data_page_version, bool integer_annotate_decimal)
ParquetDataPageVersion data_page_version, bool store_short_decimal_as_integer)
: pool_(pool),
dictionary_pagesize_limit_(dictionary_pagesize_limit),
write_batch_size_(write_batch_size),
Expand All @@ -615,7 +631,7 @@ class PARQUET_EXPORT WriterProperties {
parquet_data_page_version_(data_page_version),
parquet_version_(version),
parquet_created_by_(created_by),
integer_annotate_decimal_(integer_annotate_decimal),
store_decimal_as_integer_(store_short_decimal_as_integer),
file_encryption_properties_(file_encryption_properties),
default_column_properties_(default_column_properties),
column_properties_(column_properties) {}
Expand All @@ -628,7 +644,7 @@ class PARQUET_EXPORT WriterProperties {
ParquetDataPageVersion parquet_data_page_version_;
ParquetVersion::type parquet_version_;
std::string parquet_created_by_;
bool integer_annotate_decimal_;
bool store_decimal_as_integer_;

std::shared_ptr<FileEncryptionProperties> file_encryption_properties_;

Expand Down