Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -712,15 +712,17 @@ endif()
add_subdirectory(src/arrow)
add_subdirectory(src/arrow/io)

if (ARROW_GPU)
# IPC extensions required to build the GPU library
set(ARROW_IPC ON)
add_subdirectory(src/arrow/gpu)
endif()

if (ARROW_IPC)
add_subdirectory(src/arrow/ipc)
add_dependencies(arrow_dependencies metadata_fbs)
endif()

if (ARROW_GPU)
add_subdirectory(src/arrow/gpu)
endif()

set(ARROW_SRCS
src/arrow/array.cc
src/arrow/buffer.cc
Expand Down
18 changes: 18 additions & 0 deletions cpp/src/arrow/gpu/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

cuda_version.h
95 changes: 21 additions & 74 deletions cpp/src/arrow/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,52 +15,6 @@
# specific language governing permissions and limitations
# under the License.

function(ADD_ARROW_CUDA_TEST REL_TEST_NAME)
set(options)
set(single_value_args)
set(multi_value_args STATIC_LINK_LIBS)
cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
if(ARG_UNPARSED_ARGUMENTS)
message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
endif()

if(NO_TESTS OR NOT ARROW_BUILD_STATIC)
return()
endif()
get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)

if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}.cc)
# This test has a corresponding .cc file, set it up as an executable.
set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
cuda_add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc")

if (ARG_STATIC_LINK_LIBS)
# Customize link libraries
target_link_libraries(${TEST_NAME} ${ARG_STATIC_LINK_LIBS})
else()
target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS})
endif()
add_dependencies(unittest ${TEST_NAME})
else()
# No executable, just invoke the test (probably a script) directly.
set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME})
endif()

if (ARROW_TEST_MEMCHECK)
SET_PROPERTY(TARGET ${TEST_NAME}
APPEND_STRING PROPERTY
COMPILE_FLAGS " -DARROW_VALGRIND")
add_test(${TEST_NAME}
bash -c "cd ${EXECUTABLE_OUTPUT_PATH}; valgrind --tool=memcheck --leak-check=full --leak-check-heuristics=stdstring --error-exitcode=1 ${TEST_PATH}")
elseif(MSVC)
add_test(${TEST_NAME} ${TEST_PATH})
else()
add_test(${TEST_NAME}
${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
endif()
set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest")
endfunction()

#######################################
# arrow_gpu
#######################################
Expand All @@ -73,62 +27,55 @@ find_package(CUDA REQUIRED)
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})

set(ARROW_GPU_SRCS
cuda_arrow_ipc.cc
cuda_context.cc
cuda_memory.cc
)

set(ARROW_GPU_SHARED_LINK_LIBS
arrow_shared
${CUDA_LIBRARIES}
${CUDA_CUDA_LIBRARY}
)

add_library(arrow_gpu_objlib OBJECT
${ARROW_GPU_SRCS}
ADD_ARROW_LIB(arrow_gpu
SOURCES ${ARROW_GPU_SRCS}
SHARED_LINK_FLAGS ""
SHARED_LINK_LIBS ${ARROW_GPU_SHARED_LINK_LIBS}
STATIC_LINK_LIBS ""
)
set_property(TARGET arrow_gpu_objlib PROPERTY POSITION_INDEPENDENT_CODE 1)

if (ARROW_BUILD_SHARED)
cuda_add_library(arrow_gpu_shared SHARED $<TARGET_OBJECTS:arrow_gpu_objlib>)
install(TARGETS arrow_gpu_shared
RUNTIME DESTINATION bin
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
# CUDA build version
configure_file(cuda_version.h.in
"${CMAKE_CURRENT_SOURCE_DIR}/cuda_version.h"
@ONLY)

if (ARROW_BUILD_STATIC)
add_library(arrow_gpu_static STATIC $<TARGET_OBJECTS:arrow_gpu_objlib>)
install(TARGETS arrow_gpu_static
RUNTIME DESTINATION bin
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
install(FILES
"${CMAKE_CURRENT_SOURCE_DIR}/cuda_version.h"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/gpu")

install(FILES
cuda_common.h
cuda_api.h
cuda_arrow_ipc.h
cuda_context.h
cuda_memory.h
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/gpu")

# pkg-config support
configure_file(arrow-gpu.pc.in
"${CMAKE_CURRENT_BINARY_DIR}/arrow-gpu.pc"
@ONLY)

install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-gpu.pc"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")

# CUDA build version
configure_file(cuda_version.h.in
"${CMAKE_CURRENT_BINARY_DIR}/cuda_version.h"
@ONLY)

install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/cuda_version.h"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/gpu")

set(ARROW_GPU_TEST_LINK_LIBS
arrow_gpu_shared
${ARROW_TEST_LINK_LIBS})

if (ARROW_BUILD_TESTS)
ADD_ARROW_CUDA_TEST(cuda-test
ADD_ARROW_TEST(cuda-test
STATIC_LINK_LIBS ${ARROW_GPU_TEST_LINK_LIBS})
endif()

Expand Down
9 changes: 7 additions & 2 deletions cpp/src/arrow/gpu/cuda-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "arrow/memory_pool.h"
#include "arrow/test-util.h"

#include "arrow/gpu/cuda_memory.h"
#include "arrow/gpu/cuda_api.h"

namespace arrow {
namespace gpu {
Expand All @@ -35,8 +35,13 @@ constexpr int64_t kGpuNumber = 0;
static void CudaBufferWriterBenchmark(benchmark::State& state, const int64_t total_bytes,
const int64_t chunksize,
const int64_t buffer_size) {
CudaDeviceManager* manager;
ABORT_NOT_OK(CudaDeviceManager::GetInstance(&manager));
std::shared_ptr<CudaContext> context;
ABORT_NOT_OK(manager->GetContext(kGpuNumber, &context));

std::shared_ptr<CudaBuffer> device_buffer;
ABORT_NOT_OK(AllocateCudaBuffer(kGpuNumber, total_bytes, &device_buffer));
ABORT_NOT_OK(context->Allocate(total_bytes, &device_buffer));
CudaBufferWriter writer(device_buffer);

if (buffer_size > 0) {
Expand Down
114 changes: 105 additions & 9 deletions cpp/src/arrow/gpu/cuda-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,39 @@

#include "gtest/gtest.h"

#include "arrow/ipc/api.h"
#include "arrow/ipc/test-common.h"
#include "arrow/status.h"
#include "arrow/test-util.h"

#include "arrow/gpu/cuda_memory.h"
#include "arrow/gpu/cuda_api.h"

namespace arrow {
namespace gpu {

constexpr int kGpuNumber = 0;

class TestCudaBuffer : public ::testing::Test {};
class TestCudaBufferBase : public ::testing::Test {
public:
void SetUp() {
ASSERT_OK(CudaDeviceManager::GetInstance(&manager_));
ASSERT_OK(manager_->GetContext(kGpuNumber, &context_));
}

protected:
CudaDeviceManager* manager_;
std::shared_ptr<CudaContext> context_;
};

class TestCudaBuffer : public TestCudaBufferBase {
public:
void SetUp() { TestCudaBufferBase::SetUp(); }
};

TEST_F(TestCudaBuffer, Allocate) {
const int64_t kSize = 100;
std::shared_ptr<CudaBuffer> buffer;

ASSERT_OK(AllocateCudaBuffer(kGpuNumber, kSize, &buffer));
ASSERT_OK(context_->Allocate(kSize, &buffer));
ASSERT_EQ(kSize, buffer->size());
}

Expand All @@ -52,7 +68,7 @@ void AssertCudaBufferEquals(const CudaBuffer& buffer, const uint8_t* host_data,
TEST_F(TestCudaBuffer, CopyFromHost) {
const int64_t kSize = 1000;
std::shared_ptr<CudaBuffer> device_buffer;
ASSERT_OK(AllocateCudaBuffer(kGpuNumber, kSize, &device_buffer));
ASSERT_OK(context_->Allocate(kSize, &device_buffer));

std::shared_ptr<PoolBuffer> host_buffer;
ASSERT_OK(test::MakeRandomBytePoolBuffer(kSize, default_memory_pool(), &host_buffer));
Expand All @@ -63,10 +79,49 @@ TEST_F(TestCudaBuffer, CopyFromHost) {
AssertCudaBufferEquals(*device_buffer, host_buffer->data(), kSize);
}

class TestCudaBufferWriter : public ::testing::Test {
// IPC only supported on Linux
#if defined(__linux)

TEST_F(TestCudaBuffer, DISABLED_ExportForIpc) {
// For this test to work, a second process needs to be spawned
const int64_t kSize = 1000;
std::shared_ptr<CudaBuffer> device_buffer;
ASSERT_OK(context_->Allocate(kSize, &device_buffer));

std::shared_ptr<PoolBuffer> host_buffer;
ASSERT_OK(test::MakeRandomBytePoolBuffer(kSize, default_memory_pool(), &host_buffer));
ASSERT_OK(device_buffer->CopyFromHost(0, host_buffer->data(), kSize));

// Export for IPC and serialize
std::unique_ptr<CudaIpcMemHandle> ipc_handle;
ASSERT_OK(device_buffer->ExportForIpc(&ipc_handle));

std::shared_ptr<Buffer> serialized_handle;
ASSERT_OK(ipc_handle->Serialize(default_memory_pool(), &serialized_handle));

// Deserialize IPC handle and open
std::unique_ptr<CudaIpcMemHandle> ipc_handle2;
ASSERT_OK(CudaIpcMemHandle::FromBuffer(serialized_handle->data(), &ipc_handle2));

std::shared_ptr<CudaBuffer> ipc_buffer;
ASSERT_OK(context_->OpenIpcBuffer(*ipc_handle2, &ipc_buffer));

ASSERT_EQ(kSize, ipc_buffer->size());

std::shared_ptr<MutableBuffer> ipc_data;
ASSERT_OK(AllocateBuffer(default_memory_pool(), kSize, &ipc_data));
ASSERT_OK(ipc_buffer->CopyToHost(0, kSize, ipc_data->mutable_data()));
ASSERT_EQ(0, std::memcmp(ipc_buffer->data(), host_buffer->data(), kSize));
}

#endif

class TestCudaBufferWriter : public TestCudaBufferBase {
public:
void SetUp() { TestCudaBufferBase::SetUp(); }

void Allocate(const int64_t size) {
ASSERT_OK(AllocateCudaBuffer(kGpuNumber, size, &device_buffer_));
ASSERT_OK(context_->Allocate(size, &device_buffer_));
writer_.reset(new CudaBufferWriter(device_buffer_));
}

Expand Down Expand Up @@ -164,11 +219,16 @@ TEST_F(TestCudaBufferWriter, EdgeCases) {
AssertCudaBufferEquals(*device_buffer_, host_data, 1000);
}

TEST(TestCudaBufferReader, Basics) {
class TestCudaBufferReader : public TestCudaBufferBase {
public:
void SetUp() { TestCudaBufferBase::SetUp(); }
};

TEST_F(TestCudaBufferReader, Basics) {
std::shared_ptr<CudaBuffer> device_buffer;

const int64_t size = 1000;
ASSERT_OK(AllocateCudaBuffer(kGpuNumber, size, &device_buffer));
ASSERT_OK(context_->Allocate(size, &device_buffer));

std::shared_ptr<PoolBuffer> buffer;
ASSERT_OK(test::MakeRandomBytePoolBuffer(1000, default_memory_pool(), &buffer));
Expand Down Expand Up @@ -204,5 +264,41 @@ TEST(TestCudaBufferReader, Basics) {
ASSERT_EQ(0, std::memcmp(stack_buffer, host_data + 925, 75));
}

class TestCudaArrowIpc : public TestCudaBufferBase {
public:
void SetUp() {
TestCudaBufferBase::SetUp();
pool_ = default_memory_pool();
}

protected:
MemoryPool* pool_;
};

TEST_F(TestCudaArrowIpc, BasicWriteRead) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(ipc::MakeIntRecordBatch(&batch));

std::shared_ptr<CudaBuffer> device_serialized;
ASSERT_OK(arrow::gpu::SerializeRecordBatch(*batch, context_.get(), &device_serialized));

// Test that ReadRecordBatch works properly
std::shared_ptr<RecordBatch> device_batch;
ASSERT_OK(ReadRecordBatch(batch->schema(), device_serialized, default_memory_pool(),
&device_batch));

// Copy data from device, read batch, and compare
std::shared_ptr<MutableBuffer> host_buffer;
int64_t size = device_serialized->size();
ASSERT_OK(AllocateBuffer(pool_, size, &host_buffer));
ASSERT_OK(device_serialized->CopyToHost(0, size, host_buffer->mutable_data()));

std::shared_ptr<RecordBatch> cpu_batch;
io::BufferReader cpu_reader(host_buffer);
ASSERT_OK(ipc::ReadRecordBatch(batch->schema(), &cpu_reader, &cpu_batch));

ipc::CompareBatch(*batch, *cpu_batch);
}

} // namespace gpu
} // namespace arrow
4 changes: 3 additions & 1 deletion cpp/src/arrow/gpu/cuda_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
#ifndef ARROW_GPU_CUDA_API_H
#define ARROW_GPU_CUDA_API_H

#include "arrow/gpu/cuda_arrow_ipc.h"
#include "arrow/gpu/cuda_context.h"
#include "arrow/gpu/cuda_memory.h"
#include "arrow/gpu/cuda_version.h"

#endif // ARROW_GPU_CUDA_API_H
#endif // ARROW_GPU_CUDA_API_H
Loading