PaddlePaddle · zhupengyang · Jul 19, 2022 · Jun 29, 2022 · Jun 30, 2022 · Jul 18, 2022
@@ -159,4 +159,4 @@ void ReshapeCalcOfflinePass::RemoveReshapePattern(
 
 REGISTER_MIR_PASS(reshape_calc_offline_pass,
                   paddle::lite::mir::ReshapeCalcOfflinePass)
-    .BindTargets({TARGET(kNNAdapter), TARGET(kXPU)});
+    .BindTargets({TARGET(kNNAdapter)});
@@ -54,6 +54,9 @@ add_kernel(grid_sampler_compute_xpu XPU basic SRCS grid_sampler_compute.cc)
 add_kernel(fill_zeros_like_compute_xpu XPU basic SRCS fill_zeros_like_compute.cc)
 add_kernel(reduce_compute_xpu XPU basic SRCS reduce_compute.cc)
 add_kernel(expand_v2_compute_xpu XPU basic SRCS expand_v2_compute.cc)
+add_kernel(range_compute_xpu XPU extra SRCS range_compute.cc)
+add_kernel(where_compute_xpu XPU extra SRCS where_compute.cc)
+add_kernel(gather_nd_compute_xpu XPU extra SRCS gather_nd_compute.cc)
 
 # extra
 add_kernel(lookup_table_compute_xpu XPU extra SRCS lookup_table_compute.cc)

@@ -272,6 +272,17 @@ void PReluCompute::Run() {
   CHECK_EQ(r, 0);
 }
 
+void FloorCompute::Run() {
+  auto& param = this->template Param<param_t>();
+  auto& ctx = this->ctx_->template As<XPUContext>();
+
+  int r = xdnn::floor(ctx.GetRawContext(),
+                      param.X->data<float>(),
+                      param.Out->mutable_data<float>(TARGET(kXPU)),
+                      param.X->numel());
+  CHECK_EQ(r, 0);
+}
+
 }  // namespace xpu
 }  // namespace kernels
 }  // namespace lite
@@ -445,3 +456,9 @@ REGISTER_LITE_KERNEL(
     .BindInput("Alpha", {LiteType::GetTensorTy(TARGET(kXPU))})
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
     .Finalize();
+
+REGISTER_LITE_KERNEL(
+    floor, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::FloorCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .Finalize();
@@ -204,6 +204,15 @@ class PReluCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
   virtual ~PReluCompute() = default;
 };
 
+class FloorCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::ActivationParam;
+
+  void Run() override;
+
+  virtual ~FloorCompute() = default;
+};
+
 }  // namespace xpu
 }  // namespace kernels
 }  // namespace lite

@@ -58,6 +58,18 @@ struct GreaterThanFunctor {
   }
 };
 
+template <typename T>
+struct GreaterEqualFunctor {
+  inline int operator()(xdnn::Context* ctx,
+                        const T* x,
+                        const T* y,
+                        bool* z,
+                        const std::vector<int>& xshape,
+                        const std::vector<int>& yshape) const {
+    return xdnn::broadcast_greater_equal<T>(ctx, x, y, z, xshape, yshape);
+  }
+};
+
 template <PrecisionType PType, class T, class Functor>
 void CompareCompute<PType, T, Functor>::Run() {
   auto& param = this->template Param<operators::CompareParam>();
@@ -76,7 +88,6 @@ void CompareCompute<PType, T, Functor>::Run() {
   int axis = (param.axis == -1 ? abs(static_cast<int>(x_dims.size()) -
                                      static_cast<int>(y_dims.size()))
                                : param.axis);
-
   // constrains:
   // 1. X size should be larger than Y
   CHECK_GE(x_size, y_size) << "Input X cannot be smaller than Y";
@@ -298,3 +309,66 @@ REGISTER_LITE_KERNEL(
                                        DATALAYOUT(kAny))})
     .BindPaddleOpVersion("greater_than", 1)
     .Finalize();
+
+using greater_equal_float = paddle::lite::kernels::xpu::CompareCompute<
+    PRECISION(kFloat),
+    float,
+    paddle::lite::kernels::xpu::GreaterEqualFunctor<float>>;
+REGISTER_LITE_KERNEL(
+    greater_equal, kXPU, kFloat, kAny, greater_equal_float, def)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kXPU),
+                                      PRECISION(kFloat),
+                                      DATALAYOUT(kAny))})
+    .BindInput("Y",
+               {LiteType::GetTensorTy(TARGET(kXPU),
+                                      PRECISION(kFloat),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kXPU),
+                                       PRECISION(kBool),
+                                       DATALAYOUT(kAny))})
+    .BindPaddleOpVersion("greater_equal", 1)
+    .Finalize();
+
+using greater_equal_int32 = paddle::lite::kernels::xpu::CompareCompute<
+    PRECISION(kFloat),
+    int,
+    paddle::lite::kernels::xpu::GreaterEqualFunctor<int>>;
+REGISTER_LITE_KERNEL(
+    greater_equal, kXPU, kFloat, kAny, greater_equal_int32, int32)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kXPU),
+                                      PRECISION(kInt32),
+                                      DATALAYOUT(kAny))})
+    .BindInput("Y",
+               {LiteType::GetTensorTy(TARGET(kXPU),
+                                      PRECISION(kInt32),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kXPU),
+                                       PRECISION(kBool),
+                                       DATALAYOUT(kAny))})
+    .BindPaddleOpVersion("greater_equal", 1)
+    .Finalize();
+
+using greater_equal_int64 = paddle::lite::kernels::xpu::CompareCompute<
+    PRECISION(kFloat),
+    int64_t,
+    paddle::lite::kernels::xpu::GreaterEqualFunctor<int64_t>>;
+REGISTER_LITE_KERNEL(
+    greater_equal, kXPU, kFloat, kAny, greater_equal_int64, int64)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kXPU),
+                                      PRECISION(kInt64),
+                                      DATALAYOUT(kAny))})
+    .BindInput("Y",
+               {LiteType::GetTensorTy(TARGET(kXPU),
+                                      PRECISION(kInt64),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kXPU),
+                                       PRECISION(kBool),
+                                       DATALAYOUT(kAny))})
+    .BindPaddleOpVersion("greater_equal", 1)
+    .Finalize();
@@ -0,0 +1,132 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/xpu/gather_nd_compute.h"
+#include <vector>
+#include "lite/backends/xpu/xpu_header_sitter.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+template <typename DataType, typename IndexType, PrecisionType PType>
+void GatherNdCompute<DataType, IndexType, PType>::Run() {
+  auto& param = this->template Param<param_t>();
+  auto& ctx = this->ctx_->template As<XPUContext>();
+
+  auto x = param.x;
+  auto index = param.index;
+  auto out = param.out;
+  if (out->numel() == 0) {
+    out->set_target(TARGET(kXPU));
+    return;
+  }
+
+  std::vector<int> x_dims_cpu(x->dims().data().begin(), x->dims().data().end());
+  xdnn::VectorParam<int> x_dims = xdnn::VectorParam<int>{
+      x_dims_cpu.data(), static_cast<int>(x_dims_cpu.size()), nullptr};
+  std::vector<int> index_dims(index->dims().data().begin(),
+                              index->dims().data().end());
+  int r = xdnn::gather_nd<DataType, IndexType>(
+      ctx.GetRawContext(),
+      x->template data<DataType>(),
+      index->template data<IndexType>(),
+      out->template mutable_data<DataType>(TARGET(kXPU)),
+      x_dims,
+      index_dims);
+  CHECK_EQ(r, 0);
+}
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+using GatherXPUInt32Int32 =
+    paddle::lite::kernels::xpu::GatherNdCompute<int32_t,
+                                                int32_t,
+                                                PRECISION(kInt32)>;
+
+using GatherXPUInt32Int64 =
+    paddle::lite::kernels::xpu::GatherNdCompute<int32_t,
+                                                int64_t,
+                                                PRECISION(kInt32)>;
+
+using GatherXPUFloatInt32 =
+    paddle::lite::kernels::xpu::GatherNdCompute<float,
+                                                int32_t,
+                                                PRECISION(kFloat)>;
+
+using GatherXPUFloatInt64 =
+    paddle::lite::kernels::xpu::GatherNdCompute<float,
+                                                int64_t,
+                                                PRECISION(kFloat)>;
+
+using GatherXPUInt64Int32 =
+    paddle::lite::kernels::xpu::GatherNdCompute<int64_t,
+                                                int32_t,
+                                                PRECISION(kInt64)>;
+
+using GatherXPUInt64Int64 =
+    paddle::lite::kernels::xpu::GatherNdCompute<int64_t,
+                                                int64_t,
+                                                PRECISION(kInt64)>;
+REGISTER_LITE_KERNEL(gather_nd, kXPU, kFloat, kNCHW, GatherXPUFloatInt32, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFloat))})
+    .BindInput("Index",
+               {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFloat))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(
+    gather_nd, kXPU, kFloat, kNCHW, GatherXPUFloatInt64, gather_FP32_INT64)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFloat))})
+    .BindInput("Index",
+               {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(
+    gather_nd, kXPU, kInt32, kNCHW, GatherXPUInt32Int32, gather_INT32_INT32)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
+    .BindInput("Index",
+               {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(
+    gather_nd, kXPU, kInt32, kNCHW, GatherXPUInt32Int64, gather_INT32_INT64)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
+    .BindInput("Index",
+               {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(
+    gather_nd, kXPU, kInt64, kNCHW, GatherXPUInt64Int32, gather_INT64_INT32)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .BindInput("Index",
+               {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(
+    gather_nd, kXPU, kInt64, kNCHW, GatherXPUInt64Int64, gather_INT64_INT64)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .BindInput("Index",
+               {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .Finalize();
@@ -0,0 +1,37 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "lite/core/kernel.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+template <typename DataType, typename IndexType, PrecisionType PType>
+class GatherNdCompute : public KernelLite<TARGET(kXPU), PType> {
+ public:
+  using param_t = operators::GatherNdParam;
+
+  virtual void Run();
+
+  virtual ~GatherNdCompute() = default;
+};
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle