Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ option(WITH_CUSTOM_DEVICE "Compile with custom device support" OFF)
option(WITH_ARM_BRPC "Supprot Brpc in Arm" OFF)
option(WITH_FLPS "FL PS mode" OFF)
option(WITH_RPC "Compile with rpc support" ${WITH_DISTRIBUTE})
option(WITH_CUDNN_FRONTEND
"Compile with CUDNN Frontend API support (experimental)" OFF)

if(WITH_RECORD_BUILDTIME)
set_property(
Expand Down
4 changes: 4 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,7 @@ endif()
if(WITH_GPU_GRAPH)
add_definitions(-DPADDLE_WITH_GPU_GRAPH)
endif()

if(WITH_CUDNN_FRONTEND)
add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND)
endif()
60 changes: 60 additions & 0 deletions cmake/external/cudnn-frontend.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

include(ExternalProject)

set(CUDNN_FRONTEND_CUDNN_MIN_VERSION 8000)

if(NOT WITH_GPU)
message(FATAL_ERROR "Can't enable CUDNN Frontend API without CUDA.")
endif()
if(CUDNN_VERSION LESS 8000)
message(
FATAL_ERROR
"Minimum CUDNN version is ${CUDNN_FRONTEND_CUDNN_MIN_VERSION}. Current: ${CUDNN_VERSION}"
)
endif()

# Version: v0.7.1
set(CUDNN_FRONTEND_PREFIX_DIR ${THIRD_PARTY_PATH}/cudnn-frontend)
set(CUDNN_FRONTEND_SOURCE_DIR
${THIRD_PARTY_PATH}/cudnn-frontend/src/extern_cudnn_frontend/include)
set(CUDNN_FRONTEND_REPOSITORY https://github.com/NVIDIA/cudnn-frontend.git)
set(CUDNN_FRONTEND_TAG v0.7.1)

set(CUDNN_FRONTEND_INCLUDE_DIR ${CUDNN_FRONTEND_SOURCE_DIR})
include_directories(${CUDNN_FRONTEND_INCLUDE_DIR})

message(
STATUS
"Adding cudnn-frontend. Version: ${CUDNN_FRONTEND_TAG}. Directory: ${CUDNN_FRONTEND_INCLUDE_DIR}"
)

ExternalProject_Add(
extern_cudnn_frontend
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
GIT_REPOSITORY ${CUDNN_FRONTEND_REPOSITORY}
GIT_TAG ${CUDNN_FRONTEND_TAG}
PREFIX ${CUDNN_FRONTEND_PREFIX_DIR}
UPDATE_COMMAND ""
PATCH_COMMAND
patch -d ${CUDNN_FRONTEND_SOURCE_DIR} -p2 <
${PADDLE_SOURCE_DIR}/patches/cudnn-frontend/0001-patch-for-paddle.patch
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND "")

add_library(cudnn-frontend INTERFACE)
add_dependencies(cudnn-frontend extern_cudnn_frontend)
5 changes: 5 additions & 0 deletions cmake/flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,11 @@ if(NOT WIN32)
)
endif()

if(WITH_CUDNN_FRONTEND)
# flags from https://github.com/NVIDIA/cudnn-frontend/blob/v0.7.1/CMakeLists.txt
set(COMMON_FLAGS ${COMMON_FLAGS} -Wno-sign-compare -Wno-non-virtual-dtor)
endif()

if(WITH_ASCEND_CL AND WITH_ARM_BRPC)
set(COMMON_FLAGS ${COMMON_FLAGS} -faligned-new)
endif()
Expand Down
5 changes: 5 additions & 0 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -515,4 +515,9 @@ if(WITH_GPU
endif()
endif()

if(WITH_CUDNN_FRONTEND)
include(external/cudnn-frontend) # download cudnn-frontend
list(APPEND third_party_deps extern_cudnn_frontend)
endif()

add_custom_target(third_party ALL DEPENDS ${third_party_deps})
12 changes: 12 additions & 0 deletions paddle/fluid/platform/flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1021,3 +1021,15 @@ PADDLE_DEFINE_EXPORTED_bool(
PADDLE_DEFINE_EXPORTED_string(jit_engine_type,
"Predictor",
"Choose default funciton type in JitLayer.");

#ifdef PADDLE_WITH_CUDNN_FRONTEND
/**
* CUDNNv8 related FLAG
* Name: enable_cudnn_frontend
* Since Version: 2.5.0
* Value Range: bool, default=false
* Example:
* Note: Enable CUDNNv8 Frontend API for CUDNN kernels.
*/
PADDLE_DEFINE_EXPORTED_bool(enable_cudnn_frontend, false, "");
#endif // PADDLE_WITH_CUDNN_FRONTEND
7 changes: 7 additions & 0 deletions paddle/phi/backends/dynload/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,10 @@ if(MKL_FOUND AND WITH_ONEMKL)
DEPS phi_dynamic_loader)
target_include_directories(phi_dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif()

if(WITH_CUDNN_FRONTEND)
nv_test(
cudnn_frontend_test
SRCS cudnn_frontend_test.cc
DEPS phi_dynload_cuda cudnn-frontend)
endif()
4 changes: 4 additions & 0 deletions paddle/phi/backends/dynload/cudnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP);
#endif

#ifdef CUDNN_DNN_ROUTINE_EACH_FRONTEND
CUDNN_DNN_ROUTINE_EACH_FRONTEND(DEFINE_WRAP);
#endif

bool HasCUDNN() {
std::call_once(cudnn_dso_flag,
[]() { cudnn_dso_handle = GetCUDNNDsoHandle(); });
Expand Down
13 changes: 13 additions & 0 deletions paddle/phi/backends/dynload/cudnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,19 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
CUDNN_DNN_ROUTINE_EACH_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif

#ifdef PADDLE_WITH_CUDNN_FRONTEND
#define CUDNN_DNN_ROUTINE_EACH_FRONTEND(__macro) \
__macro(cudnnBackendCreateDescriptor); \
__macro(cudnnBackendDestroyDescriptor); \
__macro(cudnnBackendExecute); \
__macro(cudnnBackendFinalize); \
__macro(cudnnBackendGetAttribute); \
__macro(cudnnBackendSetAttribute); \
__macro(cudnnGetStream); \
__macro(cudnnReorderFilterAndBias);
CUDNN_DNN_ROUTINE_EACH_FRONTEND(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif

} // namespace dynload
} // namespace phi

Expand Down
62 changes: 62 additions & 0 deletions paddle/phi/backends/dynload/cudnn_frontend.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Copyright (c) 2022 NVIDIA Corporation. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include "gflags/gflags.h"
#include "glog/logging.h"

#include "paddle/phi/backends/dynload/cudnn.h"
#include "paddle/phi/backends/gpu/gpu_info.h"

DECLARE_bool(enable_cudnn_frontend);

// Redirect the CUDNN APIs in the cudnn_frontend namespace to
// the functions in phi::dynload
#define CUDNN_FRONTEND_OVERRIDE_SYMBOL(__name) using phi::dynload::__name

#define CUDNN_FRONTEND_APPLY_EACH(__macro) \
__macro(cudnnBackendCreateDescriptor); \
__macro(cudnnBackendDestroyDescriptor); \
__macro(cudnnBackendExecute); \
__macro(cudnnBackendFinalize); \
__macro(cudnnBackendGetAttribute); \
__macro(cudnnBackendSetAttribute); \
__macro(cudnnCreateFilterDescriptor); \
__macro(cudnnDestroyFilterDescriptor); \
__macro(cudnnGetStream); \
__macro(cudnnGetVersion); \
__macro(cudnnReorderFilterAndBias); \
__macro(cudnnSetFilterNdDescriptor);

namespace cudnn_frontend {
CUDNN_FRONTEND_APPLY_EACH(CUDNN_FRONTEND_OVERRIDE_SYMBOL);
} // namespace cudnn_frontend

// clang-format off
#include <cudnn_frontend.h> // NOLINT
#include <cudnn_frontend_find_plan.h> // NOLINT
#include <cudnn_frontend_get_plan.h> // NOLINT
// clang-format on

namespace phi {
namespace dynload {
inline bool IsCudnnFrontendEnabled() {
int cudnn_version = phi::backends::gpu::DnnVersion();
bool flag_enabled = FLAGS_enable_cudnn_frontend && (cudnn_version >= 8000);
VLOG(3) << "[cudnn_frontend] flag_enabled=" << flag_enabled;
return flag_enabled;
}
} // namespace dynload
} // namespace phi
44 changes: 44 additions & 0 deletions paddle/phi/backends/dynload/cudnn_frontend_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <iostream>

#include "paddle/phi/backends/dynload/cudnn.h"
#include "paddle/phi/backends/dynload/cudnn_frontend.h"

TEST(CudnnFrontendTest, TensorCreation) {
// Consider creation of a 2d Tensor
// n,c,h,w as 4,32,32,32
std::cout << "Tensor creation comparison" << std::endl;
std::array<int64_t, 4> tensor_dim = {4, 32, 32, 32};
std::array<int64_t, 4> tensor_str = {32768, 1024, 32, 1}; // NCHW format
cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
int64_t alignment = sizeof(float);
int64_t id = 0xD0D0CACA; // Some magic number

try {
auto tensor = cudnn_frontend::TensorBuilder()
.setDim(tensor_dim.size(), tensor_dim.data())
.setStrides(tensor_str.size(), tensor_str.data())
.setId(id)
.setAlignment(alignment)
.setDataType(data_type)
.build();
} catch (cudnn_frontend::cudnnException &e) {
std::cout << "Exception in tensor creation " << e.what() << std::endl;
FAIL();
}
std::cout << "Finished tensor creation." << std::endl;
}
Loading