PaddlePaddle · Xreki · Nov 2, 2022 · Oct 28, 2022 · Oct 31, 2022 · Oct 31, 2022
@@ -305,6 +305,8 @@ option(WITH_CUSTOM_DEVICE "Compile with custom device support" OFF)
 option(WITH_ARM_BRPC "Supprot Brpc in Arm" OFF)
 option(WITH_FLPS "FL PS mode" OFF)
 option(WITH_RPC "Compile with rpc support" ${WITH_DISTRIBUTE})
+option(WITH_CUDNN_FRONTEND
+       "Compile with CUDNN Frontend API support (experimental)" OFF)
 
 if(WITH_RECORD_BUILDTIME)
   set_property(

diff --git a/cmake/configure.cmake b/cmake/configure.cmake
@@ -248,3 +248,7 @@ endif()
 if(WITH_GPU_GRAPH)
   add_definitions(-DPADDLE_WITH_GPU_GRAPH)
 endif()
+
+if(WITH_CUDNN_FRONTEND)
+  add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND)
+endif()
diff --git a/cmake/external/cudnn-frontend.cmake b/cmake/external/cudnn-frontend.cmake
@@ -0,0 +1,60 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(ExternalProject)
+
+set(CUDNN_FRONTEND_CUDNN_MIN_VERSION 8000)
+
+if(NOT WITH_GPU)
+  message(FATAL_ERROR "Can't enable CUDNN Frontend API without CUDA.")
+endif()
+if(CUDNN_VERSION LESS 8000)
+  message(
+    FATAL_ERROR
+      "Minimum CUDNN version is ${CUDNN_FRONTEND_CUDNN_MIN_VERSION}. Current: ${CUDNN_VERSION}"
+  )
+endif()
+
+# Version: v0.7.1
+set(CUDNN_FRONTEND_PREFIX_DIR ${THIRD_PARTY_PATH}/cudnn-frontend)
+set(CUDNN_FRONTEND_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/cudnn-frontend/src/extern_cudnn_frontend/include)
+set(CUDNN_FRONTEND_REPOSITORY https://github.com/NVIDIA/cudnn-frontend.git)
+set(CUDNN_FRONTEND_TAG v0.7.1)
+
+set(CUDNN_FRONTEND_INCLUDE_DIR ${CUDNN_FRONTEND_SOURCE_DIR})
+include_directories(${CUDNN_FRONTEND_INCLUDE_DIR})
+
+message(
+  STATUS
+    "Adding cudnn-frontend. Version: ${CUDNN_FRONTEND_TAG}. Directory: ${CUDNN_FRONTEND_INCLUDE_DIR}"
+)
+
+ExternalProject_Add(
+  extern_cudnn_frontend
+  ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
+  GIT_REPOSITORY ${CUDNN_FRONTEND_REPOSITORY}
+  GIT_TAG ${CUDNN_FRONTEND_TAG}
+  PREFIX ${CUDNN_FRONTEND_PREFIX_DIR}
+  UPDATE_COMMAND ""
+  PATCH_COMMAND
+    patch -d ${CUDNN_FRONTEND_SOURCE_DIR} -p2 <
+    ${PADDLE_SOURCE_DIR}/patches/cudnn-frontend/0001-patch-for-paddle.patch
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  INSTALL_COMMAND ""
+  TEST_COMMAND "")
+
+add_library(cudnn-frontend INTERFACE)
+add_dependencies(cudnn-frontend extern_cudnn_frontend)
diff --git a/cmake/flags.cmake b/cmake/flags.cmake
@@ -162,6 +162,11 @@ if(NOT WIN32)
     )
   endif()
 
+  if(WITH_CUDNN_FRONTEND)
+    # flags from https://github.com/NVIDIA/cudnn-frontend/blob/v0.7.1/CMakeLists.txt
+    set(COMMON_FLAGS ${COMMON_FLAGS} -Wno-sign-compare -Wno-non-virtual-dtor)
+  endif()
+
   if(WITH_ASCEND_CL AND WITH_ARM_BRPC)
     set(COMMON_FLAGS ${COMMON_FLAGS} -faligned-new)
   endif()

diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
@@ -515,4 +515,9 @@ if(WITH_GPU
   endif()
 endif()
 
+if(WITH_CUDNN_FRONTEND)
+  include(external/cudnn-frontend) # download cudnn-frontend
+  list(APPEND third_party_deps extern_cudnn_frontend)
+endif()
+
 add_custom_target(third_party ALL DEPENDS ${third_party_deps})
diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc
@@ -1021,3 +1021,15 @@ PADDLE_DEFINE_EXPORTED_bool(
 PADDLE_DEFINE_EXPORTED_string(jit_engine_type,
                               "Predictor",
                               "Choose default funciton type in JitLayer.");
+
+#ifdef PADDLE_WITH_CUDNN_FRONTEND
+/**
+ * CUDNNv8 related FLAG
+ * Name: enable_cudnn_frontend
+ * Since Version: 2.5.0
+ * Value Range: bool, default=false
+ * Example:
+ * Note: Enable CUDNNv8 Frontend API for CUDNN kernels.
+ */
+PADDLE_DEFINE_EXPORTED_bool(enable_cudnn_frontend, false, "");
+#endif  // PADDLE_WITH_CUDNN_FRONTEND
diff --git a/paddle/phi/backends/dynload/CMakeLists.txt b/paddle/phi/backends/dynload/CMakeLists.txt
@@ -99,3 +99,10 @@ if(MKL_FOUND AND WITH_ONEMKL)
     DEPS phi_dynamic_loader)
   target_include_directories(phi_dynload_mklrt PRIVATE ${MKL_INCLUDE})
 endif()
+
+if(WITH_CUDNN_FRONTEND)
+  nv_test(
+    cudnn_frontend_test
+    SRCS cudnn_frontend_test.cc
+    DEPS phi_dynload_cuda cudnn-frontend)
+endif()
diff --git a/paddle/phi/backends/dynload/cudnn.cc b/paddle/phi/backends/dynload/cudnn.cc
@@ -46,6 +46,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP);
 CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP);
 #endif
 
+#ifdef CUDNN_DNN_ROUTINE_EACH_FRONTEND
+CUDNN_DNN_ROUTINE_EACH_FRONTEND(DEFINE_WRAP);
+#endif
+
 bool HasCUDNN() {
   std::call_once(cudnn_dso_flag,
                  []() { cudnn_dso_handle = GetCUDNNDsoHandle(); });

diff --git a/paddle/phi/backends/dynload/cudnn.h b/paddle/phi/backends/dynload/cudnn.h
@@ -194,6 +194,19 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
 CUDNN_DNN_ROUTINE_EACH_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
 #endif
 
+#ifdef PADDLE_WITH_CUDNN_FRONTEND
+#define CUDNN_DNN_ROUTINE_EACH_FRONTEND(__macro) \
+  __macro(cudnnBackendCreateDescriptor);         \
+  __macro(cudnnBackendDestroyDescriptor);        \
+  __macro(cudnnBackendExecute);                  \
+  __macro(cudnnBackendFinalize);                 \
+  __macro(cudnnBackendGetAttribute);             \
+  __macro(cudnnBackendSetAttribute);             \
+  __macro(cudnnGetStream);                       \
+  __macro(cudnnReorderFilterAndBias);
+CUDNN_DNN_ROUTINE_EACH_FRONTEND(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
+#endif
+
 }  // namespace dynload
 }  // namespace phi
 

diff --git a/paddle/phi/backends/dynload/cudnn_frontend.h b/paddle/phi/backends/dynload/cudnn_frontend.h
@@ -0,0 +1,62 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "gflags/gflags.h"
+#include "glog/logging.h"
+
+#include "paddle/phi/backends/dynload/cudnn.h"
+#include "paddle/phi/backends/gpu/gpu_info.h"
+
+DECLARE_bool(enable_cudnn_frontend);
+
+// Redirect the CUDNN APIs in the cudnn_frontend namespace to
+// the functions in phi::dynload
+#define CUDNN_FRONTEND_OVERRIDE_SYMBOL(__name) using phi::dynload::__name
+
+#define CUDNN_FRONTEND_APPLY_EACH(__macro) \
+  __macro(cudnnBackendCreateDescriptor);   \
+  __macro(cudnnBackendDestroyDescriptor);  \
+  __macro(cudnnBackendExecute);            \
+  __macro(cudnnBackendFinalize);           \
+  __macro(cudnnBackendGetAttribute);       \
+  __macro(cudnnBackendSetAttribute);       \
+  __macro(cudnnCreateFilterDescriptor);    \
+  __macro(cudnnDestroyFilterDescriptor);   \
+  __macro(cudnnGetStream);                 \
+  __macro(cudnnGetVersion);                \
+  __macro(cudnnReorderFilterAndBias);      \
+  __macro(cudnnSetFilterNdDescriptor);
+
+namespace cudnn_frontend {
+CUDNN_FRONTEND_APPLY_EACH(CUDNN_FRONTEND_OVERRIDE_SYMBOL);
+}  // namespace cudnn_frontend
+
+// clang-format off
+#include <cudnn_frontend.h>                                        // NOLINT
+#include <cudnn_frontend_find_plan.h>                              // NOLINT
+#include <cudnn_frontend_get_plan.h>                               // NOLINT
+// clang-format on
+
+namespace phi {
+namespace dynload {
+inline bool IsCudnnFrontendEnabled() {
+  int cudnn_version = phi::backends::gpu::DnnVersion();
+  bool flag_enabled = FLAGS_enable_cudnn_frontend && (cudnn_version >= 8000);
+  VLOG(3) << "[cudnn_frontend] flag_enabled=" << flag_enabled;
+  return flag_enabled;
+}
+}  // namespace dynload
+}  // namespace phi
diff --git a/paddle/phi/backends/dynload/cudnn_frontend_test.cc b/paddle/phi/backends/dynload/cudnn_frontend_test.cc
@@ -0,0 +1,44 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include <iostream>
+
+#include "paddle/phi/backends/dynload/cudnn.h"
+#include "paddle/phi/backends/dynload/cudnn_frontend.h"
+
+TEST(CudnnFrontendTest, TensorCreation) {
+  // Consider creation of a 2d Tensor
+  // n,c,h,w as 4,32,32,32
+  std::cout << "Tensor creation comparison" << std::endl;
+  std::array<int64_t, 4> tensor_dim = {4, 32, 32, 32};
+  std::array<int64_t, 4> tensor_str = {32768, 1024, 32, 1};  // NCHW format
+  cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
+  int64_t alignment = sizeof(float);
+  int64_t id = 0xD0D0CACA;  // Some magic number
+
+  try {
+    auto tensor = cudnn_frontend::TensorBuilder()
+                      .setDim(tensor_dim.size(), tensor_dim.data())
+                      .setStrides(tensor_str.size(), tensor_str.data())
+                      .setId(id)
+                      .setAlignment(alignment)
+                      .setDataType(data_type)
+                      .build();
+  } catch (cudnn_frontend::cudnnException &e) {
+    std::cout << "Exception in tensor creation " << e.what() << std::endl;
+    FAIL();
+  }
+  std::cout << "Finished tensor creation." << std::endl;
+}