Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions paddle/phi/kernels/funcs/fused_gemm_epilogue_xpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,11 @@ void ComputeFusedGemmEpilogueBackwardXPU(const phi::XPUContext& dev_ctx,
XPUType* dbias_ptr;
auto* dbias_tmp_ptr = dev_ctx.template Alloc<T>(dbias);
dbias_ptr = reinterpret_cast<XPUType*>(dbias_tmp_ptr);
r = xpu::reduce_sum(
xpu_ctx, dout_fc_ptr, dbias_ptr, {info_forward.m, info_forward.n}, {0});
r = xpu::reduce_sum(xpu_ctx,
dout_fc_ptr,
dbias_ptr,
{(int64_t)info_forward.m, (int64_t)info_forward.n},
{0LL});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");
}
}
Expand Down
11 changes: 7 additions & 4 deletions paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,11 @@ void FFNGrad(const phi::XPUContext& dev_ctx,
dropout_param2,
bsz_seq * d_model);
// linear_grad2
r = xpu::reduce_sum(
xpu_ctx, d_dropout2_out_ptr, d_linear2_bias_ptr, {bsz_seq, d_model}, {0});
r = xpu::reduce_sum(xpu_ctx,
d_dropout2_out_ptr,
d_linear2_bias_ptr,
{(int64_t)bsz_seq, (int64_t)d_model},
{0LL});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");

phi::XpuFcInfo linear2_fc_info;
Expand Down Expand Up @@ -285,8 +288,8 @@ void FFNGrad(const phi::XPUContext& dev_ctx,
r = xpu::reduce_sum(xpu_ctx,
d_act_out_ptr,
d_linear1_bias_ptr,
{bsz_seq, dim_feedforward},
{0});
{(int64_t)bsz_seq, (int64_t)dim_feedforward},
{0LL});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");

phi::XpuFcInfo linear1_fc_info;
Expand Down
33 changes: 17 additions & 16 deletions paddle/phi/kernels/legacy/xpu/compare_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,25 @@
namespace phi {

template <typename T, typename XPUType, typename Context>
void XPUCompareRawKernelImpl(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out,
std::function<int(xpu::Context*,
const XPUType*,
const XPUType*,
bool*,
const std::vector<int>&,
const std::vector<int>&)> func) {
auto x_shape = common::vectorize<int>(x.dims());
auto y_shape = common::vectorize<int>(y.dims());
void XPUCompareRawKernelImpl(
const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out,
std::function<int(xpu::Context*,
const XPUType*,
const XPUType*,
bool*,
const std::vector<int64_t>&,
const std::vector<int64_t>&)> func) {
auto x_shape = common::vectorize<int64_t>(x.dims());
auto y_shape = common::vectorize<int64_t>(y.dims());

if (x.dims().size() == 0) {
x_shape = std::vector<int>({1});
x_shape = std::vector<int64_t>({1});
}
if (y.dims().size() == 0) {
y_shape = std::vector<int>({1});
y_shape = std::vector<int64_t>({1});
}

auto x_data = reinterpret_cast<const XPUType*>(x.data<T>());
Expand All @@ -64,8 +65,8 @@ void XPUCompareRawKernelImpl(const Context& dev_ctx,
const XPUType* x, \
const XPUType* y, \
bool* z, \
const std::vector<int>& xshape, \
const std::vector<int>& yshape) { \
const std::vector<int64_t>& xshape, \
const std::vector<int64_t>& yshape) { \
return functor(ctx, x, y, z, xshape, yshape); \
}; \
XPUCompareRawKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, f); \
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/legacy/xpu/elementwise_add_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ void AddRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_add<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/legacy/xpu/elementwise_divide_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ void DivideRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_div<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand Down
20 changes: 10 additions & 10 deletions paddle/phi/kernels/legacy/xpu/elementwise_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ void MaximumRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_max<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand All @@ -49,8 +49,8 @@ void MinimumRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_min<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand All @@ -68,8 +68,8 @@ void RemainderRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_mod<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand All @@ -87,8 +87,8 @@ void FloorDivideRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_floordiv<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand All @@ -106,8 +106,8 @@ void ElementwisePowRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_pow<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/legacy/xpu/elementwise_multiply_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ void MultiplyRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_mul<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/legacy/xpu/elementwise_subtract_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ void SubtractRawKernel(const Context& dev_ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_sub<XPUType>(ctx, x, y, z, xshape, yshape);
};

Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/legacy/xpu/reduce_max_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ void MaxRawKernel(const Context& dev_ctx,
auto f = [](xpu::Context* ctx,
const T* x,
T* y,
const std::vector<int>& xdims,
const std::vector<int>& reduce_dims) {
const std::vector<int64_t>& xdims,
const std::vector<int64_t>& reduce_dims) {
return xpu::reduce_max<XPUType>(ctx,
reinterpret_cast<const XPUType*>(x),
reinterpret_cast<XPUType*>(y),
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/xpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,11 @@ struct XPULogGradFunctor : public funcs::BaseActivationFunctor<T> {
dev_ctx.x_context(), tmp, x->numel(), static_cast<T>(1.0));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");

auto x_dims = common::vectorize<int>(x->dims());
auto x_dims = common::vectorize<int64_t>(x->dims());

// use [1] to replace [], because xpu not support []
if (x_dims.size() == 0) {
x_dims = std::vector<int>({1});
x_dims = std::vector<int64_t>({1});
}
// dx.device(d) = dout * (static_cast<T>(1) / x);
r = xpu::broadcast_div(dev_ctx.x_context(),
Expand Down
12 changes: 8 additions & 4 deletions paddle/phi/kernels/xpu/addmm_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,10 @@ void AddmmGradKernel(const Context& dev_ctx,
xpu_ctx,
c_1,
reinterpret_cast<XPUType*>(x_grad->data<T>()),
{info_forward.bs, info_forward.m, info_forward.k},
{0});
{(int64_t)info_forward.bs,
(int64_t)info_forward.m,
(int64_t)info_forward.k},
{0LL});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");
}
}
Expand All @@ -123,8 +125,10 @@ void AddmmGradKernel(const Context& dev_ctx,
xpu_ctx,
c_2,
reinterpret_cast<XPUType*>(y_grad->data<T>()),
{info_forward.bs, info_forward.k, info_forward.n},
{0});
{(int64_t)info_forward.bs,
(int64_t)info_forward.k,
(int64_t)info_forward.n},
{0LL});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");
}
}
Expand Down
6 changes: 3 additions & 3 deletions paddle/phi/kernels/xpu/affine_channel_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ void AffineChannelGradXPUKernel(const Context& dev_ctx,
T* dscale_d = dscale ? dev_ctx.template Alloc<T>(dscale) : nullptr;
T* dbias_d = dbias ? dev_ctx.template Alloc<T>(dbias) : nullptr;

std::vector<int> x_shape;
std::vector<int> b_shape;
std::vector<int> rdims;
std::vector<int64_t> x_shape;
std::vector<int64_t> b_shape;
std::vector<int64_t> rdims;
if (layout == phi::DataLayout::kNCHW) {
x_shape.push_back(N);
x_shape.push_back(C);
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/xpu/affine_channel_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ void AffineChannelXPUKernel(const Context& dev_ctx,

auto* x_d = x->data<T>();
auto* y_d = y->data<T>();
std::vector<int> x_shape;
std::vector<int> b_shape;
std::vector<int64_t> x_shape;
std::vector<int64_t> b_shape;
if (layout == phi::DataLayout::kNCHW) {
x_shape.push_back(N);
x_shape.push_back(C);
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/xpu/batch_norm_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ static int CalculateInvBNY(xpu::Context *ctx,
y,
common::errors::InvalidArgument(
"X and Y should be inplaced in inplace mode"));
std::vector<int> tensor_shape_vec({N, C, M});
std::vector<int> array_shape_vec({1, C, 1});
std::vector<int64_t> tensor_shape_vec({N, C, M});
std::vector<int64_t> array_shape_vec({1, C, 1});
// y - bias
int r1 =
xpu::broadcast_sub<T>(ctx, bias, y, x, array_shape_vec, tensor_shape_vec);
Expand All @@ -62,8 +62,8 @@ static int CalculateInvVar(xpu::Context *ctx,
T *epsilon_data,
T *inv_var) {
int r1 = constant(ctx, epsilon_data, 1, epsilon);
std::vector<int> tensor_shape_vec({C});
std::vector<int> array_shape_vec({1});
std::vector<int64_t> tensor_shape_vec({C});
std::vector<int64_t> array_shape_vec({1});
int r2 = xpu::broadcast_add<T>(
ctx, epsilon_data, var, inv_var, array_shape_vec, tensor_shape_vec);
int r3 = xpu::rsqrt<T>(ctx, inv_var, inv_var, C);
Expand Down
12 changes: 6 additions & 6 deletions paddle/phi/kernels/xpu/c_softmax_with_cross_entropy_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::XPUContext, T> {
auto f = [](xpu::Context* ctx,
const T* x,
T* y,
const std::vector<int>& xdims,
const std::vector<int>& reduce_dims) {
const std::vector<int64_t>& xdims,
const std::vector<int64_t>& reduce_dims) {
return xpu::reduce_max<XPUType>(ctx,
reinterpret_cast<const XPUType*>(x),
reinterpret_cast<XPUType*>(y),
Expand All @@ -210,8 +210,8 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::XPUContext, T> {
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
const std::vector<int64_t>& xshape,
const std::vector<int64_t>& yshape) {
return xpu::broadcast_sub<XPUType>(ctx, x, y, z, xshape, yshape);
};
phi::XPUElementwise<T, XPUType>(
Expand Down Expand Up @@ -277,8 +277,8 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::XPUContext, T> {
auto f = [](xpu::Context* ctx,
const T* x,
T* y,
const std::vector<int>& xdims,
const std::vector<int>& reduce_dims) {
const std::vector<int64_t>& xdims,
const std::vector<int64_t>& reduce_dims) {
return xpu::reduce_sum<XPUType>(ctx,
reinterpret_cast<const XPUType*>(x),
reinterpret_cast<XPUType*>(y),
Expand Down
33 changes: 17 additions & 16 deletions paddle/phi/kernels/xpu/compare_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,25 @@
namespace phi {

template <typename T, typename XPUType, typename Context>
void XPUCompareKernelImpl(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out,
std::function<int(xpu::Context*,
const XPUType*,
const XPUType*,
bool*,
const std::vector<int>&,
const std::vector<int>&)> func) {
auto x_shape = common::vectorize<int>(x.dims());
auto y_shape = common::vectorize<int>(y.dims());
void XPUCompareKernelImpl(
const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out,
std::function<int(xpu::Context*,
const XPUType*,
const XPUType*,
bool*,
const std::vector<int64_t>&,
const std::vector<int64_t>&)> func) {
auto x_shape = common::vectorize<int64_t>(x.dims());
auto y_shape = common::vectorize<int64_t>(y.dims());

if (x.dims().size() == 0) {
x_shape = std::vector<int>({1});
x_shape = std::vector<int64_t>({1});
}
if (y.dims().size() == 0) {
y_shape = std::vector<int>({1});
y_shape = std::vector<int64_t>({1});
}

auto x_data = reinterpret_cast<const XPUType*>(x.data<T>());
Expand All @@ -63,8 +64,8 @@ void XPUCompareKernelImpl(const Context& dev_ctx,
const XPUType* x, \
const XPUType* y, \
bool* z, \
const std::vector<int>& xshape, \
const std::vector<int>& yshape) { \
const std::vector<int64_t>& xshape, \
const std::vector<int64_t>& yshape) { \
return functor(ctx, x, y, z, xshape, yshape); \
}; \
XPUCompareKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, f); \
Expand Down
Loading