Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 9 additions & 19 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ addons:
- g++-4.9
- cmake
- valgrind
- libboost-dev
#- libsnappy-dev currently handled by thirdparty scipts.
- libboost-dev #needed for thrift cpp compilation
- libboost-program-options-dev #needed for thrift cpp compilation
- libboost-test-dev #needed for thrift cpp compilation
- libssl-dev #needed for thrift cpp compilation
Expand All @@ -31,33 +30,24 @@ addons:
- pkg-config #needed for thrift cpp compilation

before_install:
- pushd thirdparty
# thrift cpp
- mkdir $HOME/build_dir
- cd $HOME/build_dir
- >
if [ $TRAVIS_OS_NAME == osx ]; then
brew update &&
brew install thrift;
brew install thrift lz4 snappy;
fi
- >
if [ $TRAVIS_OS_NAME == linux ]; then
wget http://archive.apache.org/dist/thrift/0.9.1/thrift-0.9.1.tar.gz &&
tar xfz thrift-0.9.1.tar.gz &&
pushd thrift-0.9.1 &&
./configure CXXFLAGS='-fPIC' --without-qt4 --without-c_glib --without-csharp --without-java --without-erlang --without-nodejs --without-lua --without-python --without-perl --without-php --without-php_extension --without-ruby --without-haskell --without-go --without-d --with-cpp --prefix=$HOME/local &&
make clean &&
make install &&
popd;
cp -r $TRAVIS_BUILD_DIR/thirdparty . &&
./thirdparty/download_thirdparty.sh &&
./thirdparty/build_thirdparty.sh &&
export THRIFT_HOME=$HOME/build_dir/thirdparty/installed SNAPPY_HOME=$HOME/build_dir/thirdparty/installed LZ4_HOME=$HOME/build_dir/thirdparty/installed;
fi
# snappy and lz4
- ./download_thirdparty.sh
- ./build_thirdparty.sh
- popd

before_script:
- export CC="gcc-4.9"
- export CXX="g++-4.9"
- mkdir build
- cd build
- THRIFT_HOME=$HOME/local cmake ..
- cmake $TRAVIS_BUILD_DIR

script: make
3 changes: 0 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ enable_testing()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
set(BUILD_SUPPORT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build-support)

set(THIRDPARTY_PREFIX ${CMAKE_SOURCE_DIR}/thirdparty/installed)
set(CMAKE_PREFIX_PATH ${THIRDPARTY_PREFIX})

if(APPLE)
set(CMAKE_MACOSX_RPATH 1)
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9)
Expand Down
52 changes: 28 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,36 @@ Parquet-cpp [![Build Status](https://travis-ci.org/apache/parquet-cpp.svg)](http
===========
A C++ library to read parquet files.

To build you will need some version of boost installed and thrift 0.7+ installed.
(If you are building thrift from source, you will need to set the THRIFT_HOME env
variable to the directory containing include/ and lib/.)

Then run:
<br>
<code>
thirdparty/download_thirdparty.sh
</code>
<br>
<code>
thirdparty/build_thirdparty.sh
</code>
<br>
<code>
cmake .
</code>
<br>
<code>
make
</code>

The binaries will be built to ./bin which contains the libraries to link against as
## Third Party Dependencies
- snappy
- lz4
- thrift 0.7+ [install instructions](https://thrift.apache.org/docs/install/)

Many package managers support some or all of these dependencies. E.g.:
```shell
ubuntu$ sudo apt-get install libboost-dev libsnappy-dev liblz4-dev
```
```shell
mac$ brew install snappy lz4 thrift
```

./setup_build_env.sh tries to automate setting up a build environment for you with third party dependencies. You use it by running `./setup_build_env.sh`. By default, it will create a build directory `build/`. You can override the build directory by setting the BUILD_DIR env variable to another location.

Also feel free to take a look at our [.travis.yml](.travis.yml) to see how that build env is set up.


## Build
- `cmake .`
- You can customize dependent library locations through various environment variables:
- THRIFT_HOME customizes the thrift installed location.
- SNAPPY_HOME customizes the snappy installed location.
- LZ4_HOME customizes the lz4 installed location.
- `make`

The binaries will be built to ./debug which contains the libraries to link against as
well as a few example executables.

Incremental builds can be done afterwords with just <code> make </code>.
Incremental builds can be done afterwords with just `make`.

Design
========
Expand Down
69 changes: 47 additions & 22 deletions cmake_modules/FindLz4.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# Copyright 2012 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -11,47 +10,71 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#
# Tries to find Lz4 headers and libraries.
#
# Usage of this module as follows:
#
# find_package(Lz4)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Lz4_HOME - When set, this path is inspected instead of standard library
# locations as the root of the Lz4 installation.
# The environment variable LZ4_HOME overrides this veriable.
#
# - Find LZ4 (lz4.h, liblz4.a, liblz4.so, and liblz4.so.1)
# This module defines
# LZ4_INCLUDE_DIR, directory containing headers
# LZ4_LIBS, directory containing lz4 libraries
# LZ4_STATIC_LIB, path to liblz4.a
# LZ4_SHARED_LIB, path to liblz4's shared library
# LZ4_FOUND, whether lz4 has been found

set(LZ4_SEARCH_HEADER_PATHS
${THIRDPARTY_PREFIX}/include
)

set(LZ4_SEARCH_LIB_PATH
${THIRDPARTY_PREFIX}/lib
)
if( NOT "$ENV{LZ4_HOME}" STREQUAL "")
file( TO_CMAKE_PATH "$ENV{LZ4_HOME}" _native_path )
list( APPEND _lz4_roots ${_native_path} )
elseif ( Lz4_HOME )
list( APPEND _lz4_roots ${Lz4_HOME} )
endif()

find_path(LZ4_INCLUDE_DIR lz4.h PATHS
${LZ4_SEARCH_HEADER_PATHS}
# make sure we don't accidentally pick up a different version
NO_DEFAULT_PATH
)
# Try the parameterized roots, if they exist
if ( _lz4_roots )
find_path( LZ4_INCLUDE_DIR NAMES lz4.h
PATHS ${_lz4_roots} NO_DEFAULT_PATH
PATH_SUFFIXES "include" )
find_library( LZ4_LIBRARIES NAMES lz4
PATHS ${_lz4_roots} NO_DEFAULT_PATH
PATH_SUFFIXES "lib" )
else ()
find_path( LZ4_INCLUDE_DIR NAMES lz4.h )
find_library( LZ4_LIBRARIES NAMES lz4 )
endif ()

find_library(LZ4_LIB_PATH NAMES liblz4.a PATHS ${LZ4_SEARCH_LIB_PATH} NO_DEFAULT_PATH)

if (LZ4_INCLUDE_DIR AND LZ4_LIB_PATH)
if (LZ4_INCLUDE_DIR AND LZ4_LIBRARIES)
set(LZ4_FOUND TRUE)
set(LZ4_LIBS ${LZ4_SEARCH_LIB_PATH})
set(LZ4_STATIC_LIB ${LZ4_SEARCH_LIB_PATH}/liblz4.a)
get_filename_component( LZ4_LIBS ${LZ4_LIBRARIES} DIRECTORY )
set(LZ4_LIB_NAME liblz4)
set(LZ4_STATIC_LIB ${LZ4_LIBS}/${LZ4_LIB_NAME}.a)
set(LZ4_SHARED_LIB ${LZ4_LIBS}/${LZ4_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
else ()
set(LZ4_FOUND FALSE)
endif ()

if (LZ4_FOUND)
if (NOT Lz4_FIND_QUIETLY)
message(STATUS "Found the Lz4 library: ${LZ4_LIB_PATH}")
message(STATUS "Found the Lz4 library: ${LZ4_LIBRARIES}")
endif ()
else ()
if (NOT Lz4_FIND_QUIETLY)
set(LZ4_ERR_MSG "Could not find the Lz4 library. Looked for headers")
set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${LZ4_SEARCH_HEADER_PATHS}, and for libs")
set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${LZ4_SEARCH_LIB_PATH}")
set(LZ4_ERR_MSG "Could not find the Lz4 library. Looked in ")
if ( _lz4_roots )
set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${_lz4_roots}.")
else ()
set(LZ4_ERR_MSG "${LZ4_ERR_MSG} system search paths.")
endif ()
if (Lz4_FIND_REQUIRED)
message(FATAL_ERROR "${LZ4_ERR_MSG}")
else (Lz4_FIND_REQUIRED)
Expand All @@ -63,5 +86,7 @@ endif ()
mark_as_advanced(
LZ4_INCLUDE_DIR
LZ4_LIBS
LZ4_LIBRARIES
LZ4_STATIC_LIB
LZ4_SHARED_LIB
)
70 changes: 47 additions & 23 deletions cmake_modules/FindSnappy.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# Copyright 2012 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -11,47 +10,70 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# - Find SNAPPY (snappy.h, libsnappy.a, libsnappy.so, and libsnappy.so.1)
#
# Tries to find Snappy headers and libraries.
#
# Usage of this module as follows:
#
# find_package(Snappy)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Snappy_HOME - When set, this path is inspected instead of standard library
# locations as the root of the Snappy installation.
# The environment variable SNAPPY_HOME overrides this veriable.
#
# This module defines
# SNAPPY_INCLUDE_DIR, directory containing headers
# SNAPPY_LIBS, directory containing snappy libraries
# SNAPPY_STATIC_LIB, path to libsnappy.a
# SNAPPY_SHARED_LIB, path to libsnappy's shared library
# SNAPPY_FOUND, whether snappy has been found

set(SNAPPY_SEARCH_HEADER_PATHS
${THIRDPARTY_PREFIX}/include
)

set(SNAPPY_SEARCH_LIB_PATH
${THIRDPARTY_PREFIX}/lib
)
if( NOT "$ENV{SNAPPY_HOME}" STREQUAL "")
file( TO_CMAKE_PATH "$ENV{SNAPPY_HOME}" _native_path )
list( APPEND _snappy_roots ${_native_path} )
elseif ( Snappy_HOME )
list( APPEND _snappy_roots ${Snappy_HOME} )
endif()

find_path(SNAPPY_INCLUDE_DIR snappy.h PATHS
${SNAPPY_SEARCH_HEADER_PATHS}
# make sure we don't accidentally pick up a different version
NO_DEFAULT_PATH
)
# Try the parameterized roots, if they exist
if ( _snappy_roots )
find_path( SNAPPY_INCLUDE_DIR NAMES snappy.h
PATHS ${_snappy_roots} NO_DEFAULT_PATH
PATH_SUFFIXES "include" )
find_library( SNAPPY_LIBRARIES NAMES snappy
PATHS ${_snappy_roots} NO_DEFAULT_PATH
PATH_SUFFIXES "lib" )
else ()
find_path( SNAPPY_INCLUDE_DIR NAMES snappy.h )
find_library( SNAPPY_LIBRARIES NAMES snappy )
endif ()

find_library(SNAPPY_LIB_PATH NAMES snappy PATHS ${SNAPPY_SEARCH_LIB_PATH} NO_DEFAULT_PATH)

if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIB_PATH)
if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARIES)
set(SNAPPY_FOUND TRUE)
set(SNAPPY_LIBS ${SNAPPY_SEARCH_LIB_PATH})
set(SNAPPY_STATIC_LIB ${SNAPPY_SEARCH_LIB_PATH}/libsnappy.a)
get_filename_component( SNAPPY_LIBS ${SNAPPY_LIBRARIES} DIRECTORY )
set(SNAPPY_LIB_NAME libsnappy)
set(SNAPPY_STATIC_LIB ${SNAPPY_LIBS}/${SNAPPY_LIB_NAME}.a)
set(SNAPPY_SHARED_LIB ${SNAPPY_LIBS}/${SNAPPY_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
else ()
set(SNAPPY_FOUND FALSE)
endif ()

if (SNAPPY_FOUND)
if (NOT Snappy_FIND_QUIETLY)
message(STATUS "Found the Snappy library: ${SNAPPY_LIB_PATH}")
message(STATUS "Found the Snappy library: ${SNAPPY_LIBRARIES}")
endif ()
else ()
if (NOT Snappy_FIND_QUIETLY)
set(SNAPPY_ERR_MSG "Could not find the Snappy library. Looked for headers")
set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${SNAPPY_SEARCH_HEADER_PATHS}, and for libs")
set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${SNAPPY_SEARCH_LIB_PATH}")
set(SNAPPY_ERR_MSG "Could not find the Snappy library. Looked in ")
if ( _snappy_roots )
set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${_snappy_roots}.")
else ()
set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} system search paths.")
endif ()
if (Snappy_FIND_REQUIRED)
message(FATAL_ERROR "${SNAPPY_ERR_MSG}")
else (Snappy_FIND_REQUIRED)
Expand All @@ -63,5 +85,7 @@ endif ()
mark_as_advanced(
SNAPPY_INCLUDE_DIR
SNAPPY_LIBS
SNAPPY_LIBRARIES
SNAPPY_STATIC_LIB
SNAPPY_SHARED_LIB
)
27 changes: 27 additions & 0 deletions setup_build_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

set -e

SOURCE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
: ${BUILD_DIR:=$SOURCE_DIR/build}

mkdir -p $BUILD_DIR
cp -r $SOURCE_DIR/thirdparty $BUILD_DIR
cd $BUILD_DIR
./thirdparty/download_thirdparty.sh
./thirdparty/build_thirdparty.sh

export SNAPPY_HOME=$BUILD_DIR/thirdparty/installed
export LZ4_HOME=$BUILD_DIR/thirdparty/installed
# build script doesn't support building thrift on OSX
if [ "$(uname)" != "Darwin" ]; then
export THRIFT_HOME=$BUILD_DIR/thirdparty/installed
fi

cmake $SOURCE_DIR

cd $SOURCE_DIR

echo
echo "Build env initialized in $BUILD_DIR."

2 changes: 1 addition & 1 deletion src/parquet/compression/lz4-codec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace parquet_cpp {

void Lz4Codec::Decompress(int input_len, const uint8_t* input,
int output_len, uint8_t* output_buffer) {
int n = LZ4_uncompress(reinterpret_cast<const char*>(input),
int n = LZ4_decompress_fast(reinterpret_cast<const char*>(input),
reinterpret_cast<char*>(output_buffer), output_len);
if (n != input_len) {
throw parquet_cpp::ParquetException("Corrupt lz4 compressed data.");
Expand Down
Loading