Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmake/external/protobuf.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
elseif(WITH_IPU)
SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git)
SET(PROTOBUF_TAG d750fbf648256c7c631f51ffdbf67d7c18b0114e)
elseif(WIN32)
SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git)
# Change the tag to support building with vs2019
SET(PROTOBUF_TAG 01a05a53f40ca2ac5f0af10c6cc0810bee39b792)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

使用当前的tag编会有什么错误

Copy link
Contributor Author

@betterpig betterpig Jan 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

与该issue一致,
image
tag修复了该问题。
image

else()
SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git)
SET(PROTOBUF_TAG 9f75c5aa851cd877fb0d93ccc31b8567a6706546)
Expand Down
42 changes: 42 additions & 0 deletions paddle/fluid/operators/elementwise/elementwise_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,27 @@ struct FMaxFunctor<paddle::platform::float16> {
}
};

template <>
struct FMaxFunctor<int> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::fmax 不能直接处理int类型的输入吗,还是说原本就有一个隐式的类型转换,在VS2019时必须变为显式?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

应该是在VS2019, int / int64_t不能隐式转换到float / double,导致CUDA找不到对应的函数声明,标准库的fmax没有这个问题。在CUDA10.2的机器上,使用VS2019编译,也存在这个问题。
此次修改针对int / int64_t类型,先将其精度提高,在调用fmax获得结果后,再通过lrint函数转换回原类型。
因为是整型,操作是不涉及到计算的比大小,再加上lrint能圆整回最接近的正数,因此不会影响结果。

inline HOSTDEVICE int operator()(const int& a, const int& b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmax(float_a, float_b);
return std::lrint(result);
}
};

template <>
struct FMaxFunctor<int64_t> {
inline HOSTDEVICE int64_t operator()(const int64_t& a,
const int64_t& b) const {
double double_a = static_cast<double>(a);
double double_b = static_cast<double>(b);
auto result = std::fmax(double_a, double_b);
return std::llrint(result);
}
};

// Fmin
template <typename T>
struct FMinFunctor {
Expand All @@ -194,6 +215,27 @@ struct FMinFunctor<paddle::platform::float16> {
}
};

template <>
struct FMinFunctor<int> {
inline HOSTDEVICE int operator()(const int& a, const int& b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmin(float_a, float_b);
return std::lrint(result);
}
};

template <>
struct FMinFunctor<int64_t> {
inline HOSTDEVICE int64_t operator()(const int64_t& a,
const int64_t& b) const {
double double_a = static_cast<double>(a);
double double_b = static_cast<double>(b);
auto result = std::fmin(double_a, double_b);
return std::llrint(result);
}
};

template <typename T>
struct MulGradFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/operators/elementwise/elementwise_pow_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ struct CudaPowFunctor<
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
inline HOSTDEVICE T operator()(const T args[]) const {
return std::llrint(std::pow(args[0], args[1]));
return std::llrint(
std::pow(static_cast<double>(args[0]), static_cast<double>(args[1])));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CUDA pow这里的计算是否会导致结果不同,这样的话会有不兼容风险

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这是只是将 int / int64_t 转换为 double类型后,进行pow运算,然后再用 llrint 函数取最接近的整数。因此不会导致结果不同,也就不会有非兼容性风险。

}
};

Expand Down
20 changes: 18 additions & 2 deletions paddle/fluid/operators/elementwise/elementwise_pow_op.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ struct PowFunctor {
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
if (std::is_integral<T>::value) {
return std::llrint(std::pow(a, b));
return std::llrint(
std::pow(static_cast<double>(a), static_cast<double>(b)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上,这里加上显式转化是和以前一致吗

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这是只是将 int / int64_t 转换为 double类型后,进行pow运算,然后再用 llrint 函数取最接近的整数。因此不会导致结果不同,也就不会有非兼容性风险。

}
#endif
return std::pow(a, b);
Expand Down Expand Up @@ -60,17 +61,32 @@ class ElementwisePowKernel : public framework::OpKernel<T> {
template <typename T>
struct PowGradDX {
HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
if (std::is_integral<T>::value) {
return std::llrint(dout * y * std::pow(static_cast<double>(x),
static_cast<double>(y - 1)));
}
#endif
return dout * y * std::pow(x, y - 1);
}
};

template <typename T>
template <typename T, typename Enable = void>
struct PowGradDY {
HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
return dout * std::log(x) * std::pow(x, y);
}
};

template <typename T>
struct PowGradDY<T, typename std::enable_if<std::is_integral<T>::value>::type> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上

Copy link
Contributor Author

@betterpig betterpig Jan 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

对于PowGradDX和PowGradDY的修改,有两个方面:

  1. 将 int / int64_t 转换为 double类型,以调用CUDA的pow 函数
  2. 加上 llrint 做圆整,而不是使用隐式的类型转换 float -> int, double -> long long。因为隐式的类型转换默认是去尾法,显然是不合理的,应该用 llrint,取最接近的整数。举个例子,设x为6,y为1,则PowGradDY的实际结果为log(6)*6=10.75... 如果不用 llrint,PowGradDY的返回值为 10 ,使用 llrint 后,则为 11.

因此,第2个修改不是升级VS2019所必需的。若是考虑到兼容问题,可以去掉第2个修改。

HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
return std::llrint(
dout * std::log(static_cast<double>(x)) *
std::pow(static_cast<double>(x), static_cast<double>(y)));
}
};

template <typename DeviceContext, typename T>
class ElementwisePowGradKernel : public ElemwiseGradKernel<T> {
public:
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/svd_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void BatchSvd(const T* X, T* U, T* VH, T* S, int rows, int cols, int batches,

template <typename T>
struct PowFunctor {
PowFunctor(const T* input, T* output, int64_t numel, float exp)
PowFunctor(const T* input, T* output, int64_t numel, T exp)
: input_(input), output_(output), numel_(numel), exp_(exp) {}

HOSTDEVICE void operator()(int64_t idx) const {
Expand All @@ -93,7 +93,7 @@ struct PowFunctor {
const T* input_;
T* output_;
int64_t numel_;
float exp_;
T exp_;
};

template <typename T>
Expand Down Expand Up @@ -210,7 +210,7 @@ struct DeviceIndependenceTensorOperations {
const framework::ExecutionContext& context)
: context(context) {}

framework::Tensor Pow(const framework::Tensor& x, float exp) {
framework::Tensor Pow(const framework::Tensor& x, T exp) {
framework::Tensor out;
auto for_range = GetForRange(x.numel());
int numel = x.numel();
Expand Down
9 changes: 7 additions & 2 deletions paddle/scripts/paddle_build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37
if not defined BUILD_DIR set BUILD_DIR=build
set task_name=%1
set UPLOAD_TP_FILE=OFF
set WITH_TPCACHE=OFF

rem ------initialize the python environment------
set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe
Expand Down Expand Up @@ -325,10 +326,14 @@ echo ========================================
rem set vs language to english to block showIncludes, this need vs has installed English language package.
set VSLANG=1033
rem Configure the environment for 64-bit builds. 'DISTUTILS_USE_SDK' indicates that the user has selected the compiler.
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
echo %task_name%|findstr wincheck_inference >nul && (
call "D:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
) || (
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
)
set DISTUTILS_USE_SDK=1
rem Windows 10 Kit bin dir
set PATH=C:\Program Files (x86)\Windows Kits\10\bin\10.0.17763.0\x64;%PATH%
::set PATH=C:\Program Files (x86)\Windows Kits\10\bin\10.0.17763.0\x64;%PATH%
rem Use 64-bit ToolSet to compile
set PreferredToolArchitecture=x64

Expand Down
1 change: 1 addition & 0 deletions paddle/utils/small_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <limits>
#include <memory>
#include <new>
#include <stdexcept>
#include <string>
#include <type_traits>
#include <utility>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,11 @@ def setUp(self):
# dout = 1
self.grad_res = np.asarray([1, 1, 1])
# dx = dout * y * pow(x, y-1)
self.grad_x = self.grad_res * self.y * (self.x
**(self.y - 1)).astype("int")
self.grad_x = (np.rint(self.grad_res * self.y * self.x
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

看起来OP结果会改变 这样会有一些不兼容风险。如果要加很多 显式类型变换,要确保和以前的隐式变换一样

Copy link
Contributor Author

@betterpig betterpig Jan 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

单测的修改是为了和PowGradDY的第2个修改对应,具体说明见上一条。
astype("int")也是采用去尾法,需要先取最接近的整数,再转换为int。
image

**(self.y - 1))).astype("int")
# dy = dout * log(x) * pow(x, y)
self.grad_y = (self.grad_res * np.log(self.x) *
(self.x**self.y)).astype("int")
self.grad_y = (np.rint(self.grad_res * np.log(self.x) *
(self.x**self.y))).astype("int")
print(self.grad_res, self.grad_x, self.grad_y)

def test_grad(self):
Expand All @@ -176,6 +176,7 @@ def test_grad(self):
y.stop_gradient = False
res = x**y
res.backward()
print(res.gradient(), x.gradient(), y.gradient())
self.assertTrue(np.array_equal(res.gradient(), self.grad_res))
self.assertTrue(np.array_equal(x.gradient(), self.grad_x))
self.assertTrue(np.array_equal(y.gradient(), self.grad_y))
Expand Down