Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,4 @@ void ReshapeCalcOfflinePass::RemoveReshapePattern(

REGISTER_MIR_PASS(reshape_calc_offline_pass,
paddle::lite::mir::ReshapeCalcOfflinePass)
.BindTargets({TARGET(kNNAdapter), TARGET(kXPU)});
.BindTargets({TARGET(kNNAdapter)});
3 changes: 3 additions & 0 deletions lite/kernels/xpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ add_kernel(grid_sampler_compute_xpu XPU basic SRCS grid_sampler_compute.cc)
add_kernel(fill_zeros_like_compute_xpu XPU basic SRCS fill_zeros_like_compute.cc)
add_kernel(reduce_compute_xpu XPU basic SRCS reduce_compute.cc)
add_kernel(expand_v2_compute_xpu XPU basic SRCS expand_v2_compute.cc)
add_kernel(range_compute_xpu XPU extra SRCS range_compute.cc)
add_kernel(where_compute_xpu XPU extra SRCS where_compute.cc)
add_kernel(gather_nd_compute_xpu XPU extra SRCS gather_nd_compute.cc)

# extra
add_kernel(lookup_table_compute_xpu XPU extra SRCS lookup_table_compute.cc)
Expand Down
17 changes: 17 additions & 0 deletions lite/kernels/xpu/activation_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,17 @@ void PReluCompute::Run() {
CHECK_EQ(r, 0);
}

void FloorCompute::Run() {
auto& param = this->template Param<param_t>();
auto& ctx = this->ctx_->template As<XPUContext>();

int r = xdnn::floor(ctx.GetRawContext(),
param.X->data<float>(),
param.Out->mutable_data<float>(TARGET(kXPU)),
param.X->numel());
CHECK_EQ(r, 0);
}

} // namespace xpu
} // namespace kernels
} // namespace lite
Expand Down Expand Up @@ -445,3 +456,9 @@ REGISTER_LITE_KERNEL(
.BindInput("Alpha", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(
floor, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::FloorCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
9 changes: 9 additions & 0 deletions lite/kernels/xpu/activation_compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,15 @@ class PReluCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
virtual ~PReluCompute() = default;
};

class FloorCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;

void Run() override;

virtual ~FloorCompute() = default;
};

} // namespace xpu
} // namespace kernels
} // namespace lite
Expand Down
76 changes: 75 additions & 1 deletion lite/kernels/xpu/compare_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ struct GreaterThanFunctor {
}
};

template <typename T>
struct GreaterEqualFunctor {
inline int operator()(xdnn::Context* ctx,
const T* x,
const T* y,
bool* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) const {
return xdnn::broadcast_greater_equal<T>(ctx, x, y, z, xshape, yshape);
}
};

template <PrecisionType PType, class T, class Functor>
void CompareCompute<PType, T, Functor>::Run() {
auto& param = this->template Param<operators::CompareParam>();
Expand All @@ -76,7 +88,6 @@ void CompareCompute<PType, T, Functor>::Run() {
int axis = (param.axis == -1 ? abs(static_cast<int>(x_dims.size()) -
static_cast<int>(y_dims.size()))
: param.axis);

// constrains:
// 1. X size should be larger than Y
CHECK_GE(x_size, y_size) << "Input X cannot be smaller than Y";
Expand Down Expand Up @@ -298,3 +309,66 @@ REGISTER_LITE_KERNEL(
DATALAYOUT(kAny))})
.BindPaddleOpVersion("greater_than", 1)
.Finalize();

using greater_equal_float = paddle::lite::kernels::xpu::CompareCompute<
PRECISION(kFloat),
float,
paddle::lite::kernels::xpu::GreaterEqualFunctor<float>>;
REGISTER_LITE_KERNEL(
greater_equal, kXPU, kFloat, kAny, greater_equal_float, def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.BindInput("Y",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindPaddleOpVersion("greater_equal", 1)
.Finalize();

using greater_equal_int32 = paddle::lite::kernels::xpu::CompareCompute<
PRECISION(kFloat),
int,
paddle::lite::kernels::xpu::GreaterEqualFunctor<int>>;
REGISTER_LITE_KERNEL(
greater_equal, kXPU, kFloat, kAny, greater_equal_int32, int32)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kInt32),
DATALAYOUT(kAny))})
.BindInput("Y",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kInt32),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindPaddleOpVersion("greater_equal", 1)
.Finalize();

using greater_equal_int64 = paddle::lite::kernels::xpu::CompareCompute<
PRECISION(kFloat),
int64_t,
paddle::lite::kernels::xpu::GreaterEqualFunctor<int64_t>>;
REGISTER_LITE_KERNEL(
greater_equal, kXPU, kFloat, kAny, greater_equal_int64, int64)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kInt64),
DATALAYOUT(kAny))})
.BindInput("Y",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kInt64),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindPaddleOpVersion("greater_equal", 1)
.Finalize();
132 changes: 132 additions & 0 deletions lite/kernels/xpu/gather_nd_compute.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/xpu/gather_nd_compute.h"
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {

template <typename DataType, typename IndexType, PrecisionType PType>
void GatherNdCompute<DataType, IndexType, PType>::Run() {
auto& param = this->template Param<param_t>();
auto& ctx = this->ctx_->template As<XPUContext>();

auto x = param.x;
auto index = param.index;
auto out = param.out;
if (out->numel() == 0) {
out->set_target(TARGET(kXPU));
return;
}

std::vector<int> x_dims_cpu(x->dims().data().begin(), x->dims().data().end());
xdnn::VectorParam<int> x_dims = xdnn::VectorParam<int>{
x_dims_cpu.data(), static_cast<int>(x_dims_cpu.size()), nullptr};
std::vector<int> index_dims(index->dims().data().begin(),
index->dims().data().end());
int r = xdnn::gather_nd<DataType, IndexType>(
ctx.GetRawContext(),
x->template data<DataType>(),
index->template data<IndexType>(),
out->template mutable_data<DataType>(TARGET(kXPU)),
x_dims,
index_dims);
CHECK_EQ(r, 0);
}

} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle

using GatherXPUInt32Int32 =
paddle::lite::kernels::xpu::GatherNdCompute<int32_t,
int32_t,
PRECISION(kInt32)>;

using GatherXPUInt32Int64 =
paddle::lite::kernels::xpu::GatherNdCompute<int32_t,
int64_t,
PRECISION(kInt32)>;

using GatherXPUFloatInt32 =
paddle::lite::kernels::xpu::GatherNdCompute<float,
int32_t,
PRECISION(kFloat)>;

using GatherXPUFloatInt64 =
paddle::lite::kernels::xpu::GatherNdCompute<float,
int64_t,
PRECISION(kFloat)>;

using GatherXPUInt64Int32 =
paddle::lite::kernels::xpu::GatherNdCompute<int64_t,
int32_t,
PRECISION(kInt64)>;

using GatherXPUInt64Int64 =
paddle::lite::kernels::xpu::GatherNdCompute<int64_t,
int64_t,
PRECISION(kInt64)>;
REGISTER_LITE_KERNEL(gather_nd, kXPU, kFloat, kNCHW, GatherXPUFloatInt32, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFloat))})
.BindInput("Index",
{LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFloat))})
.Finalize();

REGISTER_LITE_KERNEL(
gather_nd, kXPU, kFloat, kNCHW, GatherXPUFloatInt64, gather_FP32_INT64)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFloat))})
.BindInput("Index",
{LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(
gather_nd, kXPU, kInt32, kNCHW, GatherXPUInt32Int32, gather_INT32_INT32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindInput("Index",
{LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(
gather_nd, kXPU, kInt32, kNCHW, GatherXPUInt32Int64, gather_INT32_INT64)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindInput("Index",
{LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(
gather_nd, kXPU, kInt64, kNCHW, GatherXPUInt64Int32, gather_INT64_INT32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
.BindInput("Index",
{LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
.Finalize();

REGISTER_LITE_KERNEL(
gather_nd, kXPU, kInt64, kNCHW, GatherXPUInt64Int64, gather_INT64_INT64)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
.BindInput("Index",
{LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
.Finalize();
37 changes: 37 additions & 0 deletions lite/kernels/xpu/gather_nd_compute.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "lite/core/kernel.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {

template <typename DataType, typename IndexType, PrecisionType PType>
class GatherNdCompute : public KernelLite<TARGET(kXPU), PType> {
public:
using param_t = operators::GatherNdParam;

virtual void Run();

virtual ~GatherNdCompute() = default;
};

} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
Loading