Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/ipc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ set(FBS_OUTPUT_FILES
set(FBS_SRC
${CMAKE_SOURCE_DIR}/../format/Message.fbs
${CMAKE_SOURCE_DIR}/../format/File.fbs
${CMAKE_SOURCE_DIR}/../format/Schema.fbs
${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)

foreach(FIL ${FBS_SRC})
Expand Down
2 changes: 1 addition & 1 deletion format/File.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

include "Message.fbs";
include "Schema.fbs";

namespace org.apache.arrow.flatbuf;

Expand Down
264 changes: 3 additions & 261 deletions format/Message.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -15,272 +15,14 @@
// specific language governing permissions and limitations
// under the License.

namespace org.apache.arrow.flatbuf;

enum MetadataVersion:short {
V1,
V2
}

/// ----------------------------------------------------------------------
/// Logical types and their metadata (if any)
///
/// These are stored in the flatbuffer in the Type union below

table Null {
}

/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
/// (according to the physical memory layout). We used Struct_ here as
/// Struct is a reserved word in Flatbuffers
table Struct_ {
}

table List {
}

enum UnionMode:short { Sparse, Dense }

/// A union is a complex type with children in Field
/// By default ids in the type vector refer to the offsets in the children
/// optionally typeIds provides an indirection between the child offset and the type id
/// for each child typeIds[offset] is the id used in the type vector
table Union {
mode: UnionMode;
typeIds: [ int ]; // optional, describes typeid of each child.
}

table Int {
bitWidth: int; // restricted to 8, 16, 32, and 64 in v1
is_signed: bool;
}

enum Precision:short {HALF, SINGLE, DOUBLE}

table FloatingPoint {
precision: Precision;
}

/// Unicode with UTF-8 encoding
table Utf8 {
}

table Binary {
}

table FixedWidthBinary {
/// Number of bytes per value
byteWidth: int;
}

table Bool {
}

table Decimal {
precision: int;
scale: int;
}

enum DateUnit: short {
DAY,
MILLISECOND
}

/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
/// epoch (1970-01-01), stored in either of two units:
///
/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
/// leap seconds), where the values are evenly divisible by 86400000
/// * Days (32 bits) since the UNIX epoch
table Date {
unit: DateUnit;
}

enum TimeUnit: short { SECOND, MILLISECOND, MICROSECOND, NANOSECOND }

/// Time type. The physical storage type depends on the unit
/// - SECOND and MILLISECOND: 32 bits
/// - MICROSECOND and NANOSECOND: 64 bits
table Time {
unit: TimeUnit;
bitWidth: int;
}

/// Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, UTC.
table Timestamp {
unit: TimeUnit;

/// The time zone is a string indicating the name of a time zone, one of:
///
/// * As used in the Olson time zone database (the "tz database" or
/// "tzdata"), such as "America/New_York"
/// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
///
/// Whether a timezone string is present indicates different semantics about
/// the data:
///
/// * If the time zone is null or equal to an empty string, the data is "time
/// zone naive" and shall be displayed *as is* to the user, not localized
/// to the locale of the user. This data can be though of as UTC but
/// without having "UTC" as the time zone, it is not considered to be
/// localized to any time zone
///
/// * If the time zone is set to a valid value, values can be displayed as
/// "localized" to that time zone, even though the underlying 64-bit
/// integers are identical to the same data stored in UTC. Converting
/// between time zones is a metadata-only operation and does not change the
/// underlying values
timezone: string;
}

enum IntervalUnit: short { YEAR_MONTH, DAY_TIME}
table Interval {
unit: IntervalUnit;
}

/// ----------------------------------------------------------------------
/// Top-level Type value, enabling extensible type-specific metadata. We can
/// add new logical types to Type without breaking backwards compatibility

union Type {
Null,
Int,
FloatingPoint,
Binary,
Utf8,
Bool,
Decimal,
Date,
Time,
Timestamp,
Interval,
List,
Struct_,
Union,
FixedWidthBinary
}

/// ----------------------------------------------------------------------
/// The possible types of a vector

enum VectorType: short {
/// used in List type, Dense Union and variable length primitive types (String, Binary)
OFFSET,
/// actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector
DATA,
/// Bit vector indicating if each value is null
VALIDITY,
/// Type vector used in Union type
TYPE
}

/// ----------------------------------------------------------------------
/// represents the physical layout of a buffer
/// buffers have fixed width slots of a given type

table VectorLayout {
/// the width of a slot in the buffer (typically 1, 8, 16, 32 or 64)
bit_width: short;
/// the purpose of the vector
type: VectorType;
}


/// ----------------------------------------------------------------------
/// user defined key value pairs to add custom metadata to arrow
/// key namespacing is the responsibility of the user

table KeyValue {
key: string;
value: [ubyte];
}

/// ----------------------------------------------------------------------
/// Dictionary encoding metadata

table DictionaryEncoding {
/// The known dictionary id in the application where this data is used. In
/// the file or streaming formats, the dictionary ids are found in the
/// DictionaryBatch messages
id: long;

/// The dictionary indices are constrained to be positive integers. If this
/// field is null, the indices must be signed int32
indexType: Int;
include "Schema.fbs";

/// By default, dictionaries are not ordered, or the order does not have
/// semantic meaning. In some statistical, applications, dictionary-encoding
/// is used to represent ordered categorical data, and we provide a way to
/// preserve that metadata here
isOrdered: bool;
}

/// ----------------------------------------------------------------------
/// A field represents a named column in a record / row batch or child of a
/// nested type.
///
/// - children is only for nested Arrow arrays
/// - For primitive types, children will have length 0
/// - nullable should default to true in general

table Field {
// Name is not required, in i.e. a List
name: string;
nullable: bool;
type: Type;

// Present only if the field is dictionary encoded
dictionary: DictionaryEncoding;

// children apply only to Nested data types like Struct, List and Union
children: [Field];
/// layout of buffers produced for this type (as derived from the Type)
/// does not include children
/// each recordbatch will return instances of those Buffers.
layout: [ VectorLayout ];
// User-defined metadata
custom_metadata: [ KeyValue ];
}

/// ----------------------------------------------------------------------
/// Endianness of the platform that produces the RecordBatch

enum Endianness:short { Little, Big }

/// ----------------------------------------------------------------------
/// A Schema describes the columns in a row batch

table Schema {

/// endianness of the buffer
/// it is Little Endian by default
/// if endianness doesn't match the underlying system then the vectors need to be converted
endianness: Endianness=Little;

fields: [Field];
// User-defined metadata
custom_metadata: [ KeyValue ];
}
namespace org.apache.arrow.flatbuf;

/// ----------------------------------------------------------------------
/// Data structures for describing a table row batch (a collection of
/// equal-length Arrow arrays)

/// A Buffer represents a single contiguous memory segment
struct Buffer {
/// The shared memory page id where this buffer is located. Currently this is
/// not used
page: int;

/// The relative offset into the shared memory page where the bytes for this
/// buffer starts
offset: long;

/// The absolute length (in bytes) of the memory buffer. The memory is found
/// from offset (inclusive) to offset + length (non-inclusive).
length: long;
}

/// Metadata about a field at some level of a nested type tree (but not
/// its children).
///
Expand Down Expand Up @@ -349,4 +91,4 @@ table Message {
bodyLength: long;
}

root_type Message;
root_type Message;
Loading