Skip to content

Commit 345545e

Browse files
authored
[cherry-pick][LITE][XPU] Add xpu softsign kernel, activation kernel, and new kernels (#4860) (#4897)
1 parent 9b9a318 commit 345545e

20 files changed

+1025
-131
lines changed

lite/backends/xpu/xpu_header_sitter.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#pragma GCC system_header
1818
#include <xpu/api.h>
1919
#include <xpu/golden.h>
20+
#include <xpu/refactor/fusion.h>
21+
#include <xpu/refactor/math.h>
22+
#include <xpu/refactor/nn.h>
2023
#include <xpu/runtime.h>
2124

2225
#if defined(LITE_WITH_XTCL)

lite/kernels/xpu/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ if(LITE_WITH_XTCL)
88
else()
99
# basic
1010
add_kernel(conv_compute_xpu XPU basic SRCS conv_compute.cc DEPS ${lite_kernel_deps})
11+
add_kernel(conv2d_transpose_compute_xpu XPU basic SRCS conv2d_transpose_compute.cc DEPS ${lite_kernel_deps})
1112
add_kernel(io_copy_compute_xpu XPU basic SRCS io_copy_compute.cc DEPS ${lite_kernel_deps} target_wrapper_xpu)
1213
add_kernel(batch_norm_compute_xpu XPU basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
1314
add_kernel(activation_compute_xpu XPU basic SRCS activation_compute.cc DEPS ${lite_kernel_deps})
@@ -27,6 +28,9 @@ else()
2728
add_kernel(reshape_compute_xpu XPU basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps})
2829
add_kernel(reduce_mean_compute_xpu XPU basic SRCS reduce_mean_compute.cc DEPS ${lite_kernel_deps})
2930
add_kernel(reduce_sum_compute_xpu XPU basic SRCS reduce_sum_compute.cc DEPS ${lite_kernel_deps})
31+
add_kernel(transpose_compute_xpu XPU basic SRCS transpose_compute.cc DEPS ${lite_kernel_deps})
32+
add_kernel(density_prior_box_compute XPU basic SRCS density_prior_box_compute.cc DEPS ${lite_kernel_deps})
33+
add_kernel(prior_box_compute_xpu XPU basic SRCS prior_box_compute.cc DEPS ${lite_kernel_deps})
3034

3135
# extra
3236
add_kernel(lookup_table_compute_xpu XPU extra SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps})
@@ -38,7 +42,9 @@ else()
3842
add_kernel(match_matrix_tensor_compute_xpu XPU extra SRCS match_matrix_tensor_compute.cc DEPS ${lite_kernel_deps})
3943
add_kernel(var_conv_2d_compute_xpu XPU extra SRCS var_conv_2d_compute.cc DEPS ${lite_kernel_deps})
4044
add_kernel(search_grnn_compute_xpu XPU extra SRCS search_grnn_compute.cc DEPS ${lite_kernel_deps})
41-
add_kernel(sequence_unpad_compute_xpu XPU extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps})
45+
add_kernel(sequence_unpad_compute_xpu XPU extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps})
46+
add_kernel(lrn_compute_xpu XPU extra SRCS lrn_compute.cc DEPS ${lite_kernel_deps})
47+
add_kernel(topk_compute_xpu XPU extra SRCS topk_compute.cc DEPS ${lite_kernel_deps})
4248
add_kernel(unstack_compute_xpu XPU extra SRCS unstack_compute.cc DEPS ${lite_kernel_deps})
4349

4450
# extra(fused kernel)

lite/kernels/xpu/activation_compute.cc

Lines changed: 149 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -25,103 +25,100 @@ void ReluCompute::Run() {
2525
auto& param = this->Param<param_t>();
2626
auto& ctx = this->ctx_->As<XPUContext>();
2727

28-
int r = xdnn::activation_forward(
29-
ctx.GetRawContext(), /* context */
30-
xdnn::Activation_t::RELU, /* type */
31-
param.X->numel(), /* len */
32-
param.X->data<float>(), /* x */
33-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
28+
int r = xdnn::relu(ctx.GetRawContext(),
29+
param.X->data<float>(),
30+
param.Out->mutable_data<float>(TARGET(kXPU)),
31+
param.X->numel());
32+
CHECK_EQ(r, 0);
33+
}
34+
35+
void Relu6Compute::Run() {
36+
auto& param = this->Param<param_t>();
37+
auto& ctx = this->ctx_->As<XPUContext>();
38+
39+
int r = xdnn::relu6(ctx.GetRawContext(),
40+
param.X->data<float>(),
41+
param.Out->mutable_data<float>(TARGET(kXPU)),
42+
param.X->numel());
3443
CHECK_EQ(r, 0);
3544
}
3645

3746
void TanhCompute::Run() {
3847
auto& param = this->Param<param_t>();
3948
auto& ctx = this->ctx_->As<XPUContext>();
4049

41-
int r = xdnn::activation_forward(
42-
ctx.GetRawContext(), /* context */
43-
xdnn::Activation_t::TANH, /* type */
44-
param.X->numel(), /* len */
45-
param.X->data<float>(), /* x */
46-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
50+
int r = xdnn::tanh(ctx.GetRawContext(),
51+
param.X->data<float>(),
52+
param.Out->mutable_data<float>(TARGET(kXPU)),
53+
param.X->numel());
4754
CHECK_EQ(r, 0);
4855
}
4956

5057
void SigmoidCompute::Run() {
5158
auto& param = this->Param<param_t>();
5259
auto& ctx = this->ctx_->As<XPUContext>();
5360

54-
int r = xdnn::activation_forward(
55-
ctx.GetRawContext(), /* context */
56-
xdnn::Activation_t::SIGMOID, /* type */
57-
param.X->numel(), /* len */
58-
param.X->data<float>(), /* x */
59-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
61+
int r = xdnn::sigmoid(ctx.GetRawContext(),
62+
param.X->data<float>(),
63+
param.Out->mutable_data<float>(TARGET(kXPU)),
64+
param.X->numel());
6065
CHECK_EQ(r, 0);
6166
}
6267

6368
void AbsCompute::Run() {
6469
auto& param = this->Param<param_t>();
6570
auto& ctx = this->ctx_->As<XPUContext>();
6671

67-
int r = xdnn::activation_forward(
68-
ctx.GetRawContext(), /* context */
69-
xdnn::Activation_t::ABS, /* type */
70-
param.X->numel(), /* len */
71-
param.X->data<float>(), /* x */
72-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
72+
int r = xdnn::abs(ctx.GetRawContext(),
73+
param.X->data<float>(),
74+
param.Out->mutable_data<float>(TARGET(kXPU)),
75+
param.X->numel());
7376
CHECK_EQ(r, 0);
7477
}
7578

7679
void ExpCompute::Run() {
7780
auto& param = this->Param<param_t>();
7881
auto& ctx = this->ctx_->As<XPUContext>();
7982

80-
int r = xdnn::activation_forward(
81-
ctx.GetRawContext(), /* context */
82-
xdnn::Activation_t::EXP, /* type */
83-
param.X->numel(), /* len */
84-
param.X->data<float>(), /* x */
85-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
83+
int r = xdnn::exp(ctx.GetRawContext(),
84+
param.X->data<float>(),
85+
param.Out->mutable_data<float>(TARGET(kXPU)),
86+
param.X->numel());
8687
CHECK_EQ(r, 0);
8788
}
8889

8990
void SquareCompute::Run() {
9091
auto& param = this->Param<param_t>();
9192
auto& ctx = this->ctx_->As<XPUContext>();
9293

93-
int r = xdnn::activation_forward(
94-
ctx.GetRawContext(), /* context */
95-
xdnn::Activation_t::SQUARE, /* type */
96-
param.X->numel(), /* len */
97-
param.X->data<float>(), /* x */
98-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
94+
int r = xdnn::square(ctx.GetRawContext(),
95+
param.X->data<float>(),
96+
param.Out->mutable_data<float>(TARGET(kXPU)),
97+
param.X->numel());
9998
CHECK_EQ(r, 0);
10099
}
101100

102101
void ReciprocalCompute::Run() {
103102
auto& param = this->Param<param_t>();
104103
auto& ctx = this->ctx_->As<XPUContext>();
105104

106-
int r = xdnn::activation_forward(
107-
ctx.GetRawContext(), /* context */
108-
xdnn::Activation_t::RECIPROCAL, /* type */
109-
param.X->numel(), /* len */
110-
param.X->data<float>(), /* x */
111-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
105+
int r =
106+
xdnn::activation_forward(ctx.GetRawContext(),
107+
xdnn::Activation_t::RECIPROCAL,
108+
param.X->numel(),
109+
param.X->data<float>(),
110+
param.Out->mutable_data<float>(TARGET(kXPU)));
112111
CHECK_EQ(r, 0);
113112
}
114113

115114
void SqrtCompute::Run() {
116115
auto& param = this->Param<param_t>();
117116
auto& ctx = this->ctx_->As<XPUContext>();
118117

119-
int r = xdnn::activation_forward(
120-
ctx.GetRawContext(), /* context */
121-
xdnn::Activation_t::SQRT, /* type */
122-
param.X->numel(), /* len */
123-
param.X->data<float>(), /* x */
124-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
118+
int r = xdnn::sqrt(ctx.GetRawContext(),
119+
param.X->data<float>(),
120+
param.Out->mutable_data<float>(TARGET(kXPU)),
121+
param.X->numel());
125122
CHECK_EQ(r, 0);
126123
}
127124

@@ -132,25 +129,71 @@ void PowCompute::Run() {
132129
xdnn::Activation_t act_type(xdnn::Activation_t::ACT_POW);
133130
act_type.pow_factor = param.factor;
134131

135-
int r = xdnn::activation_forward(
136-
ctx.GetRawContext(), /* context */
137-
act_type, /* type */
138-
param.X->numel(), /* len */
139-
param.X->data<float>(), /* x */
140-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
132+
int r =
133+
xdnn::activation_forward(ctx.GetRawContext(),
134+
act_type,
135+
param.X->numel(),
136+
param.X->data<float>(),
137+
param.Out->mutable_data<float>(TARGET(kXPU)));
141138
CHECK_EQ(r, 0);
142139
}
143140

144141
void SignCompute::Run() {
145142
auto& param = this->Param<param_t>();
146143
auto& ctx = this->ctx_->As<XPUContext>();
147144

148-
int r = xdnn::activation_forward(
149-
ctx.GetRawContext(), /* context */
150-
xdnn::Activation_t::SIGN, /* type */
151-
param.X->numel(), /* len */
152-
param.X->data<float>(), /* x */
153-
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
145+
int r =
146+
xdnn::activation_forward(ctx.GetRawContext(),
147+
xdnn::Activation_t::SIGN,
148+
param.X->numel(),
149+
param.X->data<float>(),
150+
param.Out->mutable_data<float>(TARGET(kXPU)));
151+
CHECK_EQ(r, 0);
152+
}
153+
154+
void HardSwishCompute::Run() {
155+
auto& param = this->Param<param_t>();
156+
auto& ctx = this->ctx_->As<XPUContext>();
157+
158+
int r = xdnn::hard_swish(ctx.GetRawContext(),
159+
param.X->data<float>(),
160+
param.Out->mutable_data<float>(TARGET(kXPU)),
161+
param.X->numel());
162+
CHECK_EQ(r, 0);
163+
}
164+
165+
void HardSigmoidCompute::Run() {
166+
auto& param = this->Param<param_t>();
167+
auto& ctx = this->ctx_->As<XPUContext>();
168+
169+
int r = xdnn::hard_sigmoid(ctx.GetRawContext(),
170+
param.X->data<float>(),
171+
param.Out->mutable_data<float>(TARGET(kXPU)),
172+
param.X->numel(),
173+
param.hard_sigmoid_slope);
174+
CHECK_EQ(r, 0);
175+
}
176+
177+
void LeakyReluCompute::Run() {
178+
auto& param = this->Param<param_t>();
179+
auto& ctx = this->ctx_->As<XPUContext>();
180+
181+
int r = xdnn::leaky_relu(ctx.GetRawContext(),
182+
param.X->data<float>(),
183+
param.Out->mutable_data<float>(TARGET(kXPU)),
184+
param.X->numel(),
185+
param.Leaky_relu_alpha);
186+
CHECK_EQ(r, 0);
187+
}
188+
189+
void SoftsignCompute::Run() {
190+
auto& param = this->Param<param_t>();
191+
auto& ctx = this->ctx_->As<XPUContext>();
192+
193+
int r = xdnn::softsign(ctx.GetRawContext(),
194+
param.X->data<float>(),
195+
param.Out->mutable_data<float>(TARGET(kXPU)),
196+
param.X->numel());
154197
CHECK_EQ(r, 0);
155198
}
156199

@@ -165,6 +208,12 @@ REGISTER_LITE_KERNEL(
165208
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
166209
.Finalize();
167210

211+
REGISTER_LITE_KERNEL(
212+
relu6, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::Relu6Compute, def)
213+
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
214+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
215+
.Finalize();
216+
168217
REGISTER_LITE_KERNEL(
169218
tanh, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::TanhCompute, def)
170219
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
@@ -226,3 +275,43 @@ REGISTER_LITE_KERNEL(reciprocal,
226275
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
227276
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
228277
.Finalize();
278+
279+
REGISTER_LITE_KERNEL(hard_sigmoid,
280+
kXPU,
281+
kFloat,
282+
kNCHW,
283+
paddle::lite::kernels::xpu::HardSigmoidCompute,
284+
def)
285+
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
286+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
287+
.Finalize();
288+
289+
REGISTER_LITE_KERNEL(hard_swish,
290+
kXPU,
291+
kFloat,
292+
kNCHW,
293+
paddle::lite::kernels::xpu::HardSwishCompute,
294+
def)
295+
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
296+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
297+
.Finalize();
298+
299+
REGISTER_LITE_KERNEL(leaky_relu,
300+
kXPU,
301+
kFloat,
302+
kNCHW,
303+
paddle::lite::kernels::xpu::LeakyReluCompute,
304+
def)
305+
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
306+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
307+
.Finalize();
308+
309+
REGISTER_LITE_KERNEL(softsign,
310+
kXPU,
311+
kFloat,
312+
kNCHW,
313+
paddle::lite::kernels::xpu::SoftsignCompute,
314+
def)
315+
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
316+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
317+
.Finalize();

lite/kernels/xpu/activation_compute.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ class ReluCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
2929
virtual ~ReluCompute() = default;
3030
};
3131

32+
class Relu6Compute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
33+
public:
34+
using param_t = operators::ActivationParam;
35+
36+
virtual void Run();
37+
38+
virtual ~Relu6Compute() = default;
39+
};
40+
3241
class TanhCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
3342
public:
3443
using param_t = operators::ActivationParam;
@@ -110,6 +119,42 @@ class SignCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
110119
virtual ~SignCompute() = default;
111120
};
112121

122+
class HardSwishCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
123+
public:
124+
using param_t = operators::ActivationParam;
125+
126+
virtual void Run();
127+
128+
virtual ~HardSwishCompute() = default;
129+
};
130+
131+
class HardSigmoidCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
132+
public:
133+
using param_t = operators::ActivationParam;
134+
135+
virtual void Run();
136+
137+
virtual ~HardSigmoidCompute() = default;
138+
};
139+
140+
class LeakyReluCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
141+
public:
142+
using param_t = operators::ActivationParam;
143+
144+
virtual void Run();
145+
146+
virtual ~LeakyReluCompute() = default;
147+
};
148+
149+
class SoftsignCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
150+
public:
151+
using param_t = operators::ActivationParam;
152+
153+
virtual void Run();
154+
155+
virtual ~SoftsignCompute() = default;
156+
};
157+
113158
} // namespace xpu
114159
} // namespace kernels
115160
} // namespace lite

0 commit comments

Comments
 (0)