PaddlePaddle · zhwesky2010 · Jan 11, 2022 · Jan 6, 2022 · Jan 6, 2022 · Jan 7, 2022
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
@@ -207,6 +207,10 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
     elseif(WITH_IPU)
         SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
         SET(PROTOBUF_TAG         d750fbf648256c7c631f51ffdbf67d7c18b0114e)
+    elseif(WIN32)
+        SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
+        # Change the tag to support building with vs2019
+        SET(PROTOBUF_TAG         01a05a53f40ca2ac5f0af10c6cc0810bee39b792)
     else()
         SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
         SET(PROTOBUF_TAG         9f75c5aa851cd877fb0d93ccc31b8567a6706546)

diff --git a/paddle/fluid/operators/elementwise/elementwise_functor.h b/paddle/fluid/operators/elementwise/elementwise_functor.h
@@ -174,6 +174,27 @@ struct FMaxFunctor<paddle::platform::float16> {
   }
 };
 
+template <>
+struct FMaxFunctor<int> {
+  inline HOSTDEVICE int operator()(const int& a, const int& b) const {
+    float float_a = static_cast<float>(a);
+    float float_b = static_cast<float>(b);
+    auto result = std::fmax(float_a, float_b);
+    return std::lrint(result);
+  }
+};
+
+template <>
+struct FMaxFunctor<int64_t> {
+  inline HOSTDEVICE int64_t operator()(const int64_t& a,
+                                       const int64_t& b) const {
+    double double_a = static_cast<double>(a);
+    double double_b = static_cast<double>(b);
+    auto result = std::fmax(double_a, double_b);
+    return std::llrint(result);
+  }
+};
+
 // Fmin
 template <typename T>
 struct FMinFunctor {
@@ -194,6 +215,27 @@ struct FMinFunctor<paddle::platform::float16> {
   }
 };
 
+template <>
+struct FMinFunctor<int> {
+  inline HOSTDEVICE int operator()(const int& a, const int& b) const {
+    float float_a = static_cast<float>(a);
+    float float_b = static_cast<float>(b);
+    auto result = std::fmin(float_a, float_b);
+    return std::lrint(result);
+  }
+};
+
+template <>
+struct FMinFunctor<int64_t> {
+  inline HOSTDEVICE int64_t operator()(const int64_t& a,
+                                       const int64_t& b) const {
+    double double_a = static_cast<double>(a);
+    double double_b = static_cast<double>(b);
+    auto result = std::fmin(double_a, double_b);
+    return std::llrint(result);
+  }
+};
+
 template <typename T>
 struct MulGradFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }

diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.cu b/paddle/fluid/operators/elementwise/elementwise_pow_op.cu
@@ -31,7 +31,8 @@ struct CudaPowFunctor<
   // when cast to int by default and it is wrong.
   // Use llrint to cast it to the nearest integer, which is 3.
   inline HOSTDEVICE T operator()(const T args[]) const {
-    return std::llrint(std::pow(args[0], args[1]));
+    return std::llrint(
+        std::pow(static_cast<double>(args[0]), static_cast<double>(args[1])));
   }
 };
 

diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.h b/paddle/fluid/operators/elementwise/elementwise_pow_op.h
@@ -31,7 +31,8 @@ struct PowFunctor {
     // when cast to int by default and it is wrong.
     // Use llrint to cast it to the nearest integer, which is 3.
     if (std::is_integral<T>::value) {
-      return std::llrint(std::pow(a, b));
+      return std::llrint(
+          std::pow(static_cast<double>(a), static_cast<double>(b)));
     }
 #endif
     return std::pow(a, b);
@@ -60,17 +61,32 @@ class ElementwisePowKernel : public framework::OpKernel<T> {
 template <typename T>
 struct PowGradDX {
   HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
+#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
+    if (std::is_integral<T>::value) {
+      return std::llrint(dout * y * std::pow(static_cast<double>(x),
+                                             static_cast<double>(y - 1)));
+    }
+#endif
     return dout * y * std::pow(x, y - 1);
   }
 };
 
-template <typename T>
+template <typename T, typename Enable = void>
 struct PowGradDY {
   HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
     return dout * std::log(x) * std::pow(x, y);
   }
 };
 
+template <typename T>
+struct PowGradDY<T, typename std::enable_if<std::is_integral<T>::value>::type> {
+  HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
+    return std::llrint(
+        dout * std::log(static_cast<double>(x)) *
+        std::pow(static_cast<double>(x), static_cast<double>(y)));
+  }
+};
+
 template <typename DeviceContext, typename T>
 class ElementwisePowGradKernel : public ElemwiseGradKernel<T> {
  public:

diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h
@@ -84,7 +84,7 @@ void BatchSvd(const T* X, T* U, T* VH, T* S, int rows, int cols, int batches,
 
 template <typename T>
 struct PowFunctor {
-  PowFunctor(const T* input, T* output, int64_t numel, float exp)
+  PowFunctor(const T* input, T* output, int64_t numel, T exp)
       : input_(input), output_(output), numel_(numel), exp_(exp) {}
 
   HOSTDEVICE void operator()(int64_t idx) const {
@@ -93,7 +93,7 @@ struct PowFunctor {
   const T* input_;
   T* output_;
   int64_t numel_;
-  float exp_;
+  T exp_;
 };
 
 template <typename T>
@@ -210,7 +210,7 @@ struct DeviceIndependenceTensorOperations {
       const framework::ExecutionContext& context)
       : context(context) {}
 
-  framework::Tensor Pow(const framework::Tensor& x, float exp) {
+  framework::Tensor Pow(const framework::Tensor& x, T exp) {
     framework::Tensor out;
     auto for_range = GetForRange(x.numel());
     int numel = x.numel();

@@ -78,6 +78,7 @@ if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37
 if not defined BUILD_DIR set BUILD_DIR=build
 set task_name=%1
 set UPLOAD_TP_FILE=OFF
+set WITH_TPCACHE=OFF
 
 rem ------initialize the python environment------
 set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe
@@ -325,10 +326,14 @@ echo    ========================================
 rem set vs language to english to block showIncludes, this need vs has installed English language package.
 set VSLANG=1033
 rem Configure the environment for 64-bit builds. 'DISTUTILS_USE_SDK' indicates that the user has selected the compiler.
-call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+echo %task_name%|findstr wincheck_inference >nul && (
+    call "D:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
+) || (
+    call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+)
 set DISTUTILS_USE_SDK=1
 rem Windows 10 Kit bin dir
-set PATH=C:\Program Files (x86)\Windows Kits\10\bin\10.0.17763.0\x64;%PATH%
+::set PATH=C:\Program Files (x86)\Windows Kits\10\bin\10.0.17763.0\x64;%PATH%
 rem Use 64-bit ToolSet to compile
 set PreferredToolArchitecture=x64
 

diff --git a/paddle/utils/small_vector.h b/paddle/utils/small_vector.h
@@ -31,6 +31,7 @@
 #include <limits>
 #include <memory>
 #include <new>
+#include <stdexcept>
 #include <string>
 #include <type_traits>
 #include <utility>

diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py
@@ -156,11 +156,11 @@ def setUp(self):
         # dout = 1
         self.grad_res = np.asarray([1, 1, 1])
         # dx = dout * y * pow(x, y-1)
-        self.grad_x = self.grad_res * self.y * (self.x
-                                                **(self.y - 1)).astype("int")
+        self.grad_x = (np.rint(self.grad_res * self.y * self.x
+                               **(self.y - 1))).astype("int")
         # dy = dout * log(x) * pow(x, y)
-        self.grad_y = (self.grad_res * np.log(self.x) *
-                       (self.x**self.y)).astype("int")
+        self.grad_y = (np.rint(self.grad_res * np.log(self.x) *
+                               (self.x**self.y))).astype("int")
         print(self.grad_res, self.grad_x, self.grad_y)
 
     def test_grad(self):
@@ -176,6 +176,7 @@ def test_grad(self):
                 y.stop_gradient = False
                 res = x**y
                 res.backward()
+                print(res.gradient(), x.gradient(), y.gradient())
                 self.assertTrue(np.array_equal(res.gradient(), self.grad_res))
                 self.assertTrue(np.array_equal(x.gradient(), self.grad_x))
                 self.assertTrue(np.array_equal(y.gradient(), self.grad_y))