Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ matrix:
jdk: oraclejdk7
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
- language: java
os: linux
env: ARROW_TEST_GROUP=integration
jdk: oraclejdk7
before_script:
- export CC="gcc-4.9"
- export CXX="g++-4.9"
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_integration.sh

before_install:
- ulimit -c unlimited -S
Expand Down
49 changes: 49 additions & 0 deletions ci/travis_script_integration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env bash

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. See accompanying LICENSE file.

set -e

: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}

JAVA_DIR=${TRAVIS_BUILD_DIR}/java

pushd $JAVA_DIR

mvn package

popd

pushd $TRAVIS_BUILD_DIR/integration

VERSION=0.1.1-SNAPSHOT
export ARROW_JAVA_INTEGRATION_JAR=$JAVA_DIR/tools/target/arrow-tools-$VERSION-jar-with-dependencies.jar
export ARROW_CPP_TESTER=$CPP_BUILD_DIR/debug/json-integration-test

source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
export MINICONDA=$HOME/miniconda
export PATH="$MINICONDA/bin:$PATH"

CONDA_ENV_NAME=arrow-integration-test
conda create -y -q -n $CONDA_ENV_NAME python=3.5
source activate $CONDA_ENV_NAME

# faster builds, please
conda install -y nomkl

# Expensive dependencies install from Continuum package repo
conda install -y pip numpy six

python integration_test.py --debug

popd
4 changes: 4 additions & 0 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ bool Array::EqualsExact(const Array& other) const {
return true;
}

bool Array::ApproxEquals(const std::shared_ptr<Array>& arr) const {
return Equals(arr);
}

Status Array::Validate() const {
return Status::OK();
}
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class ARROW_EXPORT Array {

bool EqualsExact(const Array& arr) const;
virtual bool Equals(const std::shared_ptr<Array>& arr) const = 0;
virtual bool ApproxEquals(const std::shared_ptr<Array>& arr) const;

// Compare if the range of slots specified are equal for the given array and
// this array. end_idx exclusive. This methods does not bounds check.
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/ipc/ipc-metadata-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,15 @@ const std::shared_ptr<DataType> INT32 = std::make_shared<Int32Type>();

TEST_F(TestSchemaMetadata, PrimitiveFields) {
auto f0 = std::make_shared<Field>("f0", std::make_shared<Int8Type>());
auto f1 = std::make_shared<Field>("f1", std::make_shared<Int16Type>());
auto f1 = std::make_shared<Field>("f1", std::make_shared<Int16Type>(), false);
auto f2 = std::make_shared<Field>("f2", std::make_shared<Int32Type>());
auto f3 = std::make_shared<Field>("f3", std::make_shared<Int64Type>());
auto f4 = std::make_shared<Field>("f4", std::make_shared<UInt8Type>());
auto f5 = std::make_shared<Field>("f5", std::make_shared<UInt16Type>());
auto f6 = std::make_shared<Field>("f6", std::make_shared<UInt32Type>());
auto f7 = std::make_shared<Field>("f7", std::make_shared<UInt64Type>());
auto f8 = std::make_shared<Field>("f8", std::make_shared<FloatType>());
auto f9 = std::make_shared<Field>("f9", std::make_shared<DoubleType>());
auto f9 = std::make_shared<Field>("f9", std::make_shared<DoubleType>(), false);
auto f10 = std::make_shared<Field>("f10", std::make_shared<BooleanType>());

Schema schema({f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10});
Expand Down
27 changes: 22 additions & 5 deletions cpp/src/arrow/ipc/json-integration-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,15 @@ static Status ValidateArrowVsJson(
RETURN_NOT_OK(json_reader->GetRecordBatch(i, &json_batch));
RETURN_NOT_OK(arrow_reader->GetRecordBatch(i, &arrow_batch));

if (!json_batch->Equals(*arrow_batch.get())) {
if (!json_batch->ApproxEquals(*arrow_batch.get())) {
std::stringstream ss;
ss << "Record batch " << i << " did not match";

ss << "\nJSON: \n ";
RETURN_NOT_OK(PrettyPrint(*json_batch.get(), &ss));
ss << "\nJSON:\n";
RETURN_NOT_OK(PrettyPrint(*json_batch.get(), 0, &ss));

ss << "\nArrow: \n ";
RETURN_NOT_OK(PrettyPrint(*arrow_batch.get(), &ss));
ss << "\nArrow:\n";
RETURN_NOT_OK(PrettyPrint(*arrow_batch.get(), 0, &ss));
return Status::Invalid(ss.str());
}
}
Expand Down Expand Up @@ -299,6 +299,23 @@ static const char* JSON_EXAMPLE = R"example(
"VALIDITY": [1, 0, 0, 1, 1]
}
]
},
{
"count": 4,
"columns": [
{
"name": "foo",
"count": 4,
"DATA": [1, 2, 3, 4],
"VALIDITY": [1, 0, 1, 1]
},
{
"name": "bar",
"count": 4,
"DATA": [1.0, 2.0, 3.0, 4.0],
"VALIDITY": [1, 0, 0, 1]
}
]
}
]
}
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/ipc/json-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ class JsonArrayWriter : public ArrayVisitor {

template <typename T>
void WriteOffsetsField(const T* offsets, int32_t length) {
writer_->Key("OFFSETS");
writer_->Key("OFFSET");
writer_->StartArray();
for (int i = 0; i < length; ++i) {
writer_->Int64(offsets[i]);
Expand Down Expand Up @@ -810,7 +810,7 @@ class JsonArrayReader {
builder.Append(val.GetUint64());
} else if (IsFloatingPoint<T>::value) {
DCHECK(val.IsFloat());
builder.Append(val.GetFloat());
builder.Append(val.GetDouble());
} else if (std::is_base_of<BooleanType, T>::value) {
DCHECK(val.IsBool());
builder.Append(val.GetBool());
Expand Down Expand Up @@ -853,8 +853,8 @@ class JsonArrayReader {
typename std::enable_if<std::is_base_of<ListType, T>::value, Status>::type ReadArray(
const RjObject& json_array, int32_t length, const std::vector<bool>& is_valid,
const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
const auto& json_offsets = json_array.FindMember("OFFSETS");
RETURN_NOT_ARRAY("OFFSETS", json_offsets, json_array);
const auto& json_offsets = json_array.FindMember("OFFSET");
RETURN_NOT_ARRAY("OFFSET", json_offsets, json_array);
const auto& json_offsets_arr = json_offsets->value.GetArray();

int32_t null_count = 0;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/ipc/metadata-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, std::shared_ptr<Field>*
RETURN_NOT_OK(
TypeFromFlatbuffer(field->type_type(), field->type(), child_fields, &type));

*out = std::make_shared<Field>(field->name()->str(), type);
*out = std::make_shared<Field>(field->name()->str(), type, field->nullable());
return Status::OK();
}

Expand Down
10 changes: 5 additions & 5 deletions cpp/src/arrow/pretty_print-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ class TestArrayPrinter : public ::testing::Test {
};

template <typename TYPE, typename C_TYPE>
void CheckPrimitive(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
const char* expected) {
void CheckPrimitive(int indent, const std::vector<bool>& is_valid,
const std::vector<C_TYPE>& values, const char* expected) {
std::ostringstream sink;

MemoryPool* pool = default_memory_pool();
Expand All @@ -66,7 +66,7 @@ void CheckPrimitive(const std::vector<bool>& is_valid, const std::vector<C_TYPE>
std::shared_ptr<Array> array;
ASSERT_OK(builder.Finish(&array));

ASSERT_OK(PrettyPrint(*array.get(), &sink));
ASSERT_OK(PrettyPrint(*array.get(), indent, &sink));

std::string result = sink.str();
ASSERT_EQ(std::string(expected, strlen(expected)), result);
Expand All @@ -77,11 +77,11 @@ TEST_F(TestArrayPrinter, PrimitiveType) {

std::vector<int32_t> values = {0, 1, 2, 3, 4};
static const char* expected = R"expected([0, 1, null, 3, null])expected";
CheckPrimitive<Int32Type, int32_t>(is_valid, values, expected);
CheckPrimitive<Int32Type, int32_t>(0, is_valid, values, expected);

std::vector<std::string> values2 = {"foo", "bar", "", "baz", ""};
static const char* ex2 = R"expected(["foo", "bar", null, "baz", null])expected";
CheckPrimitive<StringType, std::string>(is_valid, values2, ex2);
CheckPrimitive<StringType, std::string>(0, is_valid, values2, ex2);
}

} // namespace arrow
90 changes: 71 additions & 19 deletions cpp/src/arrow/pretty_print.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
// under the License.

#include <ostream>
#include <sstream>
#include <string>
#include <vector>

#include "arrow/array.h"
#include "arrow/pretty_print.h"
Expand All @@ -32,20 +34,35 @@ namespace arrow {

class ArrayPrinter : public ArrayVisitor {
public:
ArrayPrinter(const Array& array, std::ostream* sink) : array_(array), sink_(sink) {}
ArrayPrinter(const Array& array, int indent, std::ostream* sink)
: array_(array), indent_(indent), sink_(sink) {}

Status Print() { return VisitArray(array_); }

Status VisitArray(const Array& array) { return array.Accept(this); }

template <typename T>
typename std::enable_if<IsNumeric<T>::value, void>::type WriteDataValues(
typename std::enable_if<IsInteger<T>::value, void>::type WriteDataValues(
const T& array) {
const auto data = array.raw_data();
for (int i = 0; i < array.length(); ++i) {
if (i > 0) { (*sink_) << ", "; }
if (array.IsNull(i)) {
(*sink_) << "null";
} else {
(*sink_) << static_cast<int64_t>(data[i]);
}
}
}

template <typename T>
typename std::enable_if<IsFloatingPoint<T>::value, void>::type WriteDataValues(
const T& array) {
const auto data = array.raw_data();
for (int i = 0; i < array.length(); ++i) {
if (i > 0) { (*sink_) << ", "; }
if (array.IsNull(i)) {
Write("null");
} else {
(*sink_) << data[i];
}
Expand All @@ -60,7 +77,7 @@ class ArrayPrinter : public ArrayVisitor {
for (int i = 0; i < array.length(); ++i) {
if (i > 0) { (*sink_) << ", "; }
if (array.IsNull(i)) {
(*sink_) << "null";
Write("null");
} else {
const char* buf = reinterpret_cast<const char*>(array.GetValue(i, &length));
(*sink_) << "\"" << std::string(buf, length) << "\"";
Expand All @@ -74,9 +91,9 @@ class ArrayPrinter : public ArrayVisitor {
for (int i = 0; i < array.length(); ++i) {
if (i > 0) { (*sink_) << ", "; }
if (array.IsNull(i)) {
(*sink_) << "null";
Write("null");
} else {
(*sink_) << (array.Value(i) ? "true" : "false");
Write(array.Value(i) ? "true" : "false");
}
}
}
Expand Down Expand Up @@ -148,42 +165,77 @@ class ArrayPrinter : public ArrayVisitor {
}

Status Visit(const ListArray& array) override {
// auto type = static_cast<const ListType*>(array.type().get());
// for (size_t i = 0; i < fields.size(); ++i) {
// RETURN_NOT_OK(VisitArray(fields[i]->name, *arrays[i].get()));
// }
// return WriteChildren(type->children(), {array.values()});
Newline();
Write("-- is_valid: ");
BooleanArray is_valid(array.length(), array.null_bitmap());
PrettyPrint(is_valid, indent_ + 2, sink_);

Newline();
Write("-- offsets: ");
Int32Array offsets(array.length() + 1, array.offsets());
PrettyPrint(offsets, indent_ + 2, sink_);

Newline();
Write("-- values: ");
PrettyPrint(*array.values().get(), indent_ + 2, sink_);

return Status::OK();
}

Status Visit(const StructArray& array) override {
// auto type = static_cast<const StructType*>(array.type().get());
// for (size_t i = 0; i < fields.size(); ++i) {
// RETURN_NOT_OK(VisitArray(fields[i]->name, *arrays[i].get()));
// }
// return WriteChildren(type->children(), array.fields());
Newline();
Write("-- is_valid: ");
BooleanArray is_valid(array.length(), array.null_bitmap());
PrettyPrint(is_valid, indent_ + 2, sink_);

const std::vector<std::shared_ptr<Array>>& fields = array.fields();
for (size_t i = 0; i < fields.size(); ++i) {
Newline();
std::stringstream ss;
ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << " values: ";
Write(ss.str());
PrettyPrint(*fields[i].get(), indent_ + 2, sink_);
}

return Status::OK();
}

Status Visit(const UnionArray& array) override {
return Status::NotImplemented("union");
}

void Write(const char* data) { (*sink_) << data; }

void Write(const std::string& data) { (*sink_) << data; }

void Newline() {
(*sink_) << "\n";
Indent();
}

void Indent() {
for (int i = 0; i < indent_; ++i) {
(*sink_) << " ";
}
}

private:
const Array& array_;
int indent_;

std::ostream* sink_;
};

Status PrettyPrint(const Array& arr, std::ostream* sink) {
ArrayPrinter printer(arr, sink);
Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) {
ArrayPrinter printer(arr, indent, sink);
return printer.Print();
}

Status PrettyPrint(const RecordBatch& batch, std::ostream* sink) {
Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink) {
for (int i = 0; i < batch.num_columns(); ++i) {
const std::string& name = batch.column_name(i);
(*sink) << name << ": ";
RETURN_NOT_OK(PrettyPrint(*batch.column(i).get(), sink));
RETURN_NOT_OK(PrettyPrint(*batch.column(i).get(), indent + 2, sink));
(*sink) << "\n";
}
return Status::OK();
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/pretty_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,12 @@ namespace arrow {

class Status;

Status ARROW_EXPORT PrettyPrint(const RecordBatch& batch, std::ostream* sink);
Status ARROW_EXPORT PrettyPrint(const Array& arr, std::ostream* sink);
struct PrettyPrintOptions {
int indent;
};

Status ARROW_EXPORT PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink);
Status ARROW_EXPORT PrettyPrint(const Array& arr, int indent, std::ostream* sink);

} // namespace arrow

Expand Down
Loading