Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
24c1d5d
Some Types refactoring, add TypeVisitor abstract class. Add RapidJSON as
wesm Nov 8, 2016
1edf2a9
Prototyping out visitor pattern for json serialization
wesm Nov 9, 2016
68ee7ab
Move forward declarations into type_fwd.h
wesm Nov 9, 2016
820b0f2
Drafting JSON schema read/write
wesm Nov 10, 2016
3b9d14e
Add type-specific JSON metadata to schema writer
wesm Nov 10, 2016
86c9559
Add convenience factory functions for common types
wesm Nov 10, 2016
1c08233
JSON schema roundtrip passing for many types
wesm Nov 10, 2016
5fbea41
Implement some more json types and add convenience factory functions
wesm Nov 10, 2016
379da3c
Implement union metadata JSON serialization
wesm Nov 10, 2016
209ba48
More types refactoring. Strange linker error in pyarrow
wesm Nov 11, 2016
15c1094
Add type traits, refactoring, drafting json array writing. not workin…
wesm Nov 11, 2016
932ba7a
Add ArrayVisitor methods, add enough metaprogramming to detect presen…
wesm Nov 14, 2016
2c93cce
WIP JSON array reader code path
wesm Nov 15, 2016
4fc7294
Refactoring, type attribute consistency. Array reader compiles
wesm Nov 15, 2016
f26402a
Install type_traits.h. cpplint
wesm Nov 15, 2016
35c2f85
Refactoring. Start drafting string/list reader
wesm Nov 15, 2016
6566343
Recursively construct children for list/struct
wesm Nov 16, 2016
0891378
Declare loop variables
wesm Nov 16, 2016
82f108b
Refactoring. Array test scaffold
wesm Nov 17, 2016
e2e86b5
Test JSON array roundtrip for numeric types, strings, lists, structs
wesm Nov 17, 2016
6bbd669
Tweaks
wesm Nov 17, 2016
3d6bbbd
Start high level writer scaffold
wesm Nov 17, 2016
2753449
Complete draft json roundtrip implementation. tests not complete yet
wesm Nov 17, 2016
3d9fcc2
Complete round trip json file test with multiple record batches
wesm Nov 18, 2016
a2cf47b
cpplint
wesm Nov 18, 2016
72c24fe
Add a minimal literal JSON example
wesm Nov 18, 2016
d13a05f
Compiler warning
wesm Nov 18, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,25 @@ if(ARROW_BUILD_BENCHMARKS)
endif()
endif()

# RapidJSON, header only dependency
if("$ENV{RAPIDJSON_HOME}" STREQUAL "")
ExternalProject_Add(rapidjson_ep
PREFIX "${CMAKE_BINARY_DIR}"
URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz"
URL_MD5 "badd12c511e081fec6c89c43a7027bce"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
BUILD_IN_SOURCE 1
INSTALL_COMMAND "")

ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
else()
set(RAPIDJSON_INCLUDE_DIR "$ENV{RAPIDJSON_HOME}/include")
endif()
message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})

## Google PerfTools
##
## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ install(FILES
schema.h
table.h
type.h
type_fwd.h
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's our policy on - vs _ in filenames`?

Copy link
Member Author

@wesm wesm Nov 18, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure there's a hard and fast rule. Kudu seems to use dashes for internal files (and for unit tests), and _ as a separator in "public" headers:

https://github.com/apache/kudu/tree/master/src/kudu/client

so you would have

widget_xyz-test.cc

or

widget_xyz.h
widget_xyz-internal.h

https://google.github.io/styleguide/cppguide.html#File_Names

let me know what you think

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Kudu conventions seem fine to me. As long as we have some kind of convention I'm happy with it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay. By this reasoning we should rename bit-util.h and memory-pool.h to bit_util.h and memory_pool.h. Can do this here or in another patch

type_traits.h
test-util.h
DESTINATION include/arrow)

Expand Down
15 changes: 15 additions & 0 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,24 @@
#include "arrow/array.h"

#include <cstdint>
#include <cstring>

#include "arrow/util/bit-util.h"
#include "arrow/util/buffer.h"
#include "arrow/util/status.h"

namespace arrow {

Status GetEmptyBitmap(
MemoryPool* pool, int32_t length, std::shared_ptr<MutableBuffer>* result) {
auto buffer = std::make_shared<PoolBuffer>(pool);
RETURN_NOT_OK(buffer->Resize(BitUtil::BytesForBits(length)));
memset(buffer->mutable_data(), 0, buffer->size());

*result = buffer;
return Status::OK();
}

// ----------------------------------------------------------------------
// Base array class

Expand Down Expand Up @@ -66,4 +77,8 @@ bool NullArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_st
return true;
}

Status NullArray::Accept(ArrayVisitor* visitor) const {
return visitor->Visit(*this);
}

} // namespace arrow
12 changes: 12 additions & 0 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
namespace arrow {

class Buffer;
class MemoryPool;
class MutableBuffer;
class Status;

// Immutable data array with some logical type and some length. Any memory is
Expand Down Expand Up @@ -70,6 +72,8 @@ class ARROW_EXPORT Array {
// returning Status::OK. This can be an expensive check.
virtual Status Validate() const;

virtual Status Accept(ArrayVisitor* visitor) const = 0;

protected:
std::shared_ptr<DataType> type_;
int32_t null_count_;
Expand All @@ -86,6 +90,8 @@ class ARROW_EXPORT Array {
// Degenerate null type Array
class ARROW_EXPORT NullArray : public Array {
public:
using TypeClass = NullType;

NullArray(const std::shared_ptr<DataType>& type, int32_t length)
: Array(type, length, length, nullptr) {}

Expand All @@ -94,9 +100,15 @@ class ARROW_EXPORT NullArray : public Array {
bool Equals(const std::shared_ptr<Array>& arr) const override;
bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_index,
const std::shared_ptr<Array>& arr) const override;

Status Accept(ArrayVisitor* visitor) const override;
};

typedef std::shared_ptr<Array> ArrayPtr;

Status ARROW_EXPORT GetEmptyBitmap(
MemoryPool* pool, int32_t length, std::shared_ptr<MutableBuffer>* result);

} // namespace arrow

#endif
1 change: 1 addition & 0 deletions cpp/src/arrow/column-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "gtest/gtest.h"

#include "arrow/array.h"
#include "arrow/column.h"
#include "arrow/schema.h"
#include "arrow/test-util.h"
Expand Down
8 changes: 2 additions & 6 deletions cpp/src/arrow/io/hdfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -289,13 +289,9 @@ class HdfsClient::HdfsClientImpl {

// connect to HDFS with the builder object
hdfsBuilder* builder = hdfsNewBuilder();
if (!config->host.empty()) {
hdfsBuilderSetNameNode(builder, config->host.c_str());
}
if (!config->host.empty()) { hdfsBuilderSetNameNode(builder, config->host.c_str()); }
hdfsBuilderSetNameNodePort(builder, config->port);
if (!config->user.empty()) {
hdfsBuilderSetUserName(builder, config->user.c_str());
}
if (!config->user.empty()) { hdfsBuilderSetUserName(builder, config->user.c_str()); }
if (!config->kerb_ticket.empty()) {
hdfsBuilderSetKerbTicketCachePath(builder, config->kerb_ticket.c_str());
}
Expand Down
26 changes: 9 additions & 17 deletions cpp/src/arrow/io/libhdfs_shim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,9 @@ static HINSTANCE libjvm_handle = NULL;
// NOTE(wesm): cpplint does not like use of short and other imprecise C types

static hdfsBuilder* (*ptr_hdfsNewBuilder)(void) = NULL;
static void (*ptr_hdfsBuilderSetNameNode)(
hdfsBuilder* bld, const char* nn) = NULL;
static void (*ptr_hdfsBuilderSetNameNodePort)(
hdfsBuilder* bld, tPort port) = NULL;
static void (*ptr_hdfsBuilderSetUserName)(
hdfsBuilder* bld, const char* userName) = NULL;
static void (*ptr_hdfsBuilderSetNameNode)(hdfsBuilder* bld, const char* nn) = NULL;
static void (*ptr_hdfsBuilderSetNameNodePort)(hdfsBuilder* bld, tPort port) = NULL;
static void (*ptr_hdfsBuilderSetUserName)(hdfsBuilder* bld, const char* userName) = NULL;
static void (*ptr_hdfsBuilderSetKerbTicketCachePath)(
hdfsBuilder* bld, const char* kerbTicketCachePath) = NULL;
static hdfsFS (*ptr_hdfsBuilderConnect)(hdfsBuilder* bld) = NULL;
Expand Down Expand Up @@ -173,9 +170,9 @@ void hdfsBuilderSetUserName(hdfsBuilder* bld, const char* userName) {
ptr_hdfsBuilderSetUserName(bld, userName);
}

void hdfsBuilderSetKerbTicketCachePath(hdfsBuilder* bld,
const char* kerbTicketCachePath) {
ptr_hdfsBuilderSetKerbTicketCachePath(bld , kerbTicketCachePath);
void hdfsBuilderSetKerbTicketCachePath(
hdfsBuilder* bld, const char* kerbTicketCachePath) {
ptr_hdfsBuilderSetKerbTicketCachePath(bld, kerbTicketCachePath);
}

hdfsFS hdfsBuilderConnect(hdfsBuilder* bld) {
Expand Down Expand Up @@ -364,7 +361,7 @@ static std::vector<fs::path> get_potential_libhdfs_paths() {
std::vector<fs::path> libhdfs_potential_paths;
std::string file_name;

// OS-specific file name
// OS-specific file name
#ifdef __WIN32
file_name = "hdfs.dll";
#elif __APPLE__
Expand All @@ -374,10 +371,7 @@ static std::vector<fs::path> get_potential_libhdfs_paths() {
#endif

// Common paths
std::vector<fs::path> search_paths = {
fs::path(""),
fs::path(".")
};
std::vector<fs::path> search_paths = {fs::path(""), fs::path(".")};

// Path from environment variable
const char* hadoop_home = std::getenv("HADOOP_HOME");
Expand All @@ -387,9 +381,7 @@ static std::vector<fs::path> get_potential_libhdfs_paths() {
}

const char* libhdfs_dir = std::getenv("ARROW_LIBHDFS_DIR");
if (libhdfs_dir != nullptr) {
search_paths.push_back(fs::path(libhdfs_dir));
}
if (libhdfs_dir != nullptr) { search_paths.push_back(fs::path(libhdfs_dir)); }

// All paths with file name
for (auto& path : search_paths) {
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/arrow/ipc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ set(ARROW_IPC_TEST_LINK_LIBS
set(ARROW_IPC_SRCS
adapter.cc
file.cc
json.cc
json-internal.cc
metadata.cc
metadata-internal.cc
)
Expand Down Expand Up @@ -79,6 +81,10 @@ ADD_ARROW_TEST(ipc-metadata-test)
ARROW_TEST_LINK_LIBRARIES(ipc-metadata-test
${ARROW_IPC_TEST_LINK_LIBS})

ADD_ARROW_TEST(ipc-json-test)
ARROW_TEST_LINK_LIBRARIES(ipc-json-test
${ARROW_IPC_TEST_LINK_LIBS})

# make clean will delete the generated file
set_source_files_properties(Metadata_generated.h PROPERTIES GENERATED TRUE)

Expand Down Expand Up @@ -114,6 +120,7 @@ add_dependencies(arrow_objlib metadata_fbs)
install(FILES
adapter.h
file.h
json.h
metadata.h
DESTINATION include/arrow/ipc)

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/ipc/adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ Status VisitArray(const Array* arr, std::vector<flatbuf::FieldNode>* field_nodes
buffers->push_back(binary_arr->data());
} else if (arr->type_enum() == Type::LIST) {
const auto list_arr = static_cast<const ListArray*>(arr);
buffers->push_back(list_arr->offset_buffer());
buffers->push_back(list_arr->offsets());
RETURN_NOT_OK(VisitArray(
list_arr->values().get(), field_nodes, buffers, max_recursion_depth - 1));
} else if (arr->type_enum() == Type::STRUCT) {
Expand Down
Loading