Skip to content

[cherry-pick][LITE][XPU] Add xpu softsign kernel (#4860) #4897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lite/backends/xpu/xpu_header_sitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#pragma GCC system_header
#include <xpu/api.h>
#include <xpu/golden.h>
#include <xpu/refactor/fusion.h>
#include <xpu/refactor/math.h>
#include <xpu/refactor/nn.h>
#include <xpu/runtime.h>

#if defined(LITE_WITH_XTCL)
Expand Down
8 changes: 7 additions & 1 deletion lite/kernels/xpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ if(LITE_WITH_XTCL)
else()
# basic
add_kernel(conv_compute_xpu XPU basic SRCS conv_compute.cc DEPS ${lite_kernel_deps})
add_kernel(conv2d_transpose_compute_xpu XPU basic SRCS conv2d_transpose_compute.cc DEPS ${lite_kernel_deps})
add_kernel(io_copy_compute_xpu XPU basic SRCS io_copy_compute.cc DEPS ${lite_kernel_deps} target_wrapper_xpu)
add_kernel(batch_norm_compute_xpu XPU basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
add_kernel(activation_compute_xpu XPU basic SRCS activation_compute.cc DEPS ${lite_kernel_deps})
Expand All @@ -27,6 +28,9 @@ else()
add_kernel(reshape_compute_xpu XPU basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(reduce_mean_compute_xpu XPU basic SRCS reduce_mean_compute.cc DEPS ${lite_kernel_deps})
add_kernel(reduce_sum_compute_xpu XPU basic SRCS reduce_sum_compute.cc DEPS ${lite_kernel_deps})
add_kernel(transpose_compute_xpu XPU basic SRCS transpose_compute.cc DEPS ${lite_kernel_deps})
add_kernel(density_prior_box_compute XPU basic SRCS density_prior_box_compute.cc DEPS ${lite_kernel_deps})
add_kernel(prior_box_compute_xpu XPU basic SRCS prior_box_compute.cc DEPS ${lite_kernel_deps})

# extra
add_kernel(lookup_table_compute_xpu XPU extra SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps})
Expand All @@ -38,7 +42,9 @@ else()
add_kernel(match_matrix_tensor_compute_xpu XPU extra SRCS match_matrix_tensor_compute.cc DEPS ${lite_kernel_deps})
add_kernel(var_conv_2d_compute_xpu XPU extra SRCS var_conv_2d_compute.cc DEPS ${lite_kernel_deps})
add_kernel(search_grnn_compute_xpu XPU extra SRCS search_grnn_compute.cc DEPS ${lite_kernel_deps})
add_kernel(sequence_unpad_compute_xpu XPU extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps})
add_kernel(sequence_unpad_compute_xpu XPU extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps})
add_kernel(lrn_compute_xpu XPU extra SRCS lrn_compute.cc DEPS ${lite_kernel_deps})
add_kernel(topk_compute_xpu XPU extra SRCS topk_compute.cc DEPS ${lite_kernel_deps})
add_kernel(unstack_compute_xpu XPU extra SRCS unstack_compute.cc DEPS ${lite_kernel_deps})

# extra(fused kernel)
Expand Down
209 changes: 149 additions & 60 deletions lite/kernels/xpu/activation_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,103 +25,100 @@ void ReluCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::RELU, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r = xdnn::relu(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

void Relu6Compute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::relu6(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

void TanhCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::TANH, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r = xdnn::tanh(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

void SigmoidCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::SIGMOID, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r = xdnn::sigmoid(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

void AbsCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::ABS, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r = xdnn::abs(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

void ExpCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::EXP, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r = xdnn::exp(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

void SquareCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::SQUARE, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r = xdnn::square(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

void ReciprocalCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::RECIPROCAL, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r =
xdnn::activation_forward(ctx.GetRawContext(),
xdnn::Activation_t::RECIPROCAL,
param.X->numel(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)));
CHECK_EQ(r, 0);
}

void SqrtCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::SQRT, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r = xdnn::sqrt(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

Expand All @@ -132,25 +129,71 @@ void PowCompute::Run() {
xdnn::Activation_t act_type(xdnn::Activation_t::ACT_POW);
act_type.pow_factor = param.factor;

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
act_type, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r =
xdnn::activation_forward(ctx.GetRawContext(),
act_type,
param.X->numel(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)));
CHECK_EQ(r, 0);
}

void SignCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::SIGN, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
int r =
xdnn::activation_forward(ctx.GetRawContext(),
xdnn::Activation_t::SIGN,
param.X->numel(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)));
CHECK_EQ(r, 0);
}

void HardSwishCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::hard_swish(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

void HardSigmoidCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::hard_sigmoid(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel(),
param.hard_sigmoid_slope);
CHECK_EQ(r, 0);
}

void LeakyReluCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::leaky_relu(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel(),
param.Leaky_relu_alpha);
CHECK_EQ(r, 0);
}

void SoftsignCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

int r = xdnn::softsign(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

Expand All @@ -165,6 +208,12 @@ REGISTER_LITE_KERNEL(
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(
relu6, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::Relu6Compute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(
tanh, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::TanhCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
Expand Down Expand Up @@ -226,3 +275,43 @@ REGISTER_LITE_KERNEL(reciprocal,
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(hard_sigmoid,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::HardSigmoidCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(hard_swish,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::HardSwishCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(leaky_relu,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::LeakyReluCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(softsign,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::SoftsignCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
45 changes: 45 additions & 0 deletions lite/kernels/xpu/activation_compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ class ReluCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
virtual ~ReluCompute() = default;
};

class Relu6Compute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;

virtual void Run();

virtual ~Relu6Compute() = default;
};

class TanhCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
Expand Down Expand Up @@ -110,6 +119,42 @@ class SignCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
virtual ~SignCompute() = default;
};

class HardSwishCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;

virtual void Run();

virtual ~HardSwishCompute() = default;
};

class HardSigmoidCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;

virtual void Run();

virtual ~HardSigmoidCompute() = default;
};

class LeakyReluCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;

virtual void Run();

virtual ~LeakyReluCompute() = default;
};

class SoftsignCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;

virtual void Run();

virtual ~SoftsignCompute() = default;
};

} // namespace xpu
} // namespace kernels
} // namespace lite
Expand Down
Loading