PaddlePaddle · wanghaoshuang · Jun 13, 2018 · Jun 13, 2018 · Jun 14, 2018 · Jun 14, 2018
diff --git a/paddle/fluid/operators/dense_triplet_loss_op.cc b/paddle/fluid/operators/dense_triplet_loss_op.cc
@@ -0,0 +1,150 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/dense_triplet_loss_op.h"
+
+namespace paddle {
+namespace operators {
+
+template <>
+std::vector<int> GetOffsets<platform::CPUDeviceContext>(const Tensor* t) {
+  std::vector<int> offsets;
+  offsets.push_back(0);
+  const int64_t* data = t->data<int64_t>();
+  int64_t currrent_value = data[0];
+  for (int i = 1; i < t->numel(); ++i) {
+    if (data[i] != currrent_value) {
+      offsets.push_back(i);
+    }
+    currrent_value = data[i];
+  }
+  offsets.push_back(t->numel());
+  return offsets;
+}
+
+class DenseTripletLossOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("Logits",
+             "(Tensor, default: Tensor<float>), A 2-D tensor with shape [N x "
+             "K]. N is the batch_size, "
+             "and K is the feature length in each sample.");
+    AddInput("Label",
+             "(Tensor) The ground truth which is a 2-D tensor.  "
+             "Label is a Tensor<int64> with shape [N x 1]. ");
+    AddOutput("Loss",
+              "(Tensor, default: Tensor<float>), A 2-D tensor. The triplet "
+              "loss with shape [batch_size x 1].");
+    AddOutput("LogitsGrad",
+              "(Tensor, default: Tensor<float>), A temporary "
+              "output Tensor to store the gradients of triplet loss, which is "
+              "computed with loss together in one call. It is a 2-D Tensor of "
+              "the shape [batch_size, feature_len].")
+        .AsIntermediate();
+    AddAttr<float>("margin", "(float), The min margin between two sample.");
+
+    AddComment(R"DOC(
+
+)DOC");
+  }
+};
+
+class DenseTripletLossOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Logits"),
+                   "Input(Logits) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Loss"), "Output(Loss) should be not null.");
+    PADDLE_ENFORCE(ctx->HasOutput("LogitsGrad"),
+                   "Output(LogitsGrad) should be not null.");
+    auto labels_dims = ctx->GetInputDim("Label");
+    auto logits_dims = ctx->GetInputDim("Logits");
+    PADDLE_ENFORCE_EQ(
+        logits_dims.size(), 2UL,
+        "The input of dense_triplet_loss should be a 2-D tensor.");
+    PADDLE_ENFORCE_EQ(labels_dims.size(), 2UL,
+                      "The labels should be a 2-D tensor.");
+    PADDLE_ENFORCE_EQ(labels_dims[1], 1UL,
+                      "The 2nd dimension of "
+                      "Input(Label) should be 1.");
+    ctx->SetOutputDim("Loss", {logits_dims[0], 1});
+    ctx->SetOutputDim("LogitsGrad", logits_dims);
+    ctx->ShareLoD("Logits", /*->*/ "Loss");
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("Logits")->type()),
+        ctx.device_context());
+  }
+};
+
+class DenseTripletLossGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Loss")),
+                   "Input(Loss@Grad) should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput("LogitsGrad"),
+                   "Input(LogitsGrad) should be not null.");
+    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Logits")),
+                   "Output(Logits@Grad) should be not null.");
+
+    auto labels_dims = ctx->GetInputDim("Label");
+    PADDLE_ENFORCE_EQ(labels_dims.size(), 2UL,
+                      "The labels should be a 2-D tensor.");
+
+    PADDLE_ENFORCE_EQ(labels_dims[1], 1UL,
+                      "the 2nd dimension of Input(Label) should be 1.");
+
+    ctx->SetOutputDim(framework::GradVarName("Logits"),
+                      ctx->GetInputDim("LogitsGrad"));
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        framework::ToDataType(
+            ctx.Input<Tensor>(framework::GradVarName("Loss"))->type()),
+        ctx.device_context());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(dense_triplet_loss, ops::DenseTripletLossOp,
+                  ops::DenseTripletLossOpMaker,
+                  paddle::framework::DefaultGradOpDescMaker<true>);
+
+REGISTER_OPERATOR(dense_triplet_loss_grad, ops::DenseTripletLossGradOp);
+
+REGISTER_OP_CPU_KERNEL(
+    dense_triplet_loss,
+    ops::DenseTripletLossKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::DenseTripletLossKernel<paddle::platform::CPUDeviceContext, double>);
+REGISTER_OP_CPU_KERNEL(
+    dense_triplet_loss_grad,
+    ops::DenseTripletLossGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::DenseTripletLossGradKernel<paddle::platform::CPUDeviceContext,
+                                    double>);
diff --git a/paddle/fluid/operators/dense_triplet_loss_op.cu b/paddle/fluid/operators/dense_triplet_loss_op.cu
@@ -0,0 +1,52 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+
+#include "paddle/fluid/operators/dense_triplet_loss_op.h"
+
+namespace paddle {
+namespace operators {
+
+template <>
+std::vector<int> GetOffsets<platform::CUDADeviceContext>(const Tensor* t) {
+  framework::Tensor t_cpu;
+  framework::TensorCopy(*t, platform::CPUPlace(), &t_cpu);
+  std::vector<int> offsets;
+  offsets.push_back(0);
+  int64_t* data = t_cpu.data<int64_t>();
+  int64_t currrent_value = data[0];
+  for (int i = 1; i < t->numel(); ++i) {
+    if (data[i] != currrent_value) {
+      offsets.push_back(i);
+    }
+    currrent_value = data[i];
+  }
+  offsets.push_back(t->numel());
+  return offsets;
+}
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    dense_triplet_loss,
+    ops::DenseTripletLossKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::DenseTripletLossKernel<paddle::platform::CUDADeviceContext, double>);
+REGISTER_OP_CUDA_KERNEL(
+    dense_triplet_loss_grad,
+    ops::DenseTripletLossGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::DenseTripletLossGradKernel<paddle::platform::CUDADeviceContext,
+                                    double>);
diff --git a/paddle/fluid/operators/dense_triplet_loss_op.h b/paddle/fluid/operators/dense_triplet_loss_op.h
@@ -0,0 +1,183 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <vector>
+#include "paddle/fluid/framework/eigen.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math/blas.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
+
+using DIM1 = Eigen::array<int, 1>;
+using DIM2 = Eigen::array<int, 2>;
+
+template <typename DeviceContext>
+std::vector<int> GetOffsets(const Tensor* t);
+
+template <typename T>
+class ReluFunctor {
+ public:
+  explicit ReluFunctor(T margin) : margin_(margin) {}
+  HOSTDEVICE T operator()(const T& x) const {
+    if ((x + margin_) > 0)
+      return x + margin_;
+    else
+      return 0;
+  }
+
+ private:
+  T margin_;
+};
+
+template <typename T>
+class ReluGradFunctor {
+ public:
+  HOSTDEVICE T operator()(const T& x) const { return x < 0 ? T(0) : T(1); }
+};
+
+template <typename DeviceContext, typename T>
+class DenseTripletLossKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto& place =
+        *context.template device_context<DeviceContext>().eigen_device();
+    const Tensor* logits = context.Input<Tensor>("Logits");
+    const Tensor* labels = context.Input<Tensor>("Label");
+    T margin = static_cast<T>(context.Attr<float>("margin"));
+    Tensor* loss = context.Output<Tensor>("Loss");
+    Tensor* d_logits = context.Output<Tensor>("LogitsGrad");
+    loss->mutable_data<T>(context.GetPlace());
+    d_logits->mutable_data<T>(context.GetPlace());
+    logits->data<T>();
+    auto x_dims = logits->dims();
+    int batch_size = x_dims[0];
+    int feature_len = x_dims[1];
+
+    // step 1: get distance matrix
+    Tensor distances;
+    distances.mutable_data<T>({batch_size, batch_size}, context.GetPlace());
+    Tensor d_distances;
+    d_distances.mutable_data<T>({batch_size, batch_size}, context.GetPlace());
+    Tensor tmp;
+    tmp.mutable_data<T>({batch_size, batch_size}, context.GetPlace());
+    math::SetConstant<DeviceContext, T> set_zero;
+    auto& dev_ctx = context.template device_context<DeviceContext>();
+    set_zero(dev_ctx, &d_distances, static_cast<T>(0));
+
+    auto logits_t = EigenMatrix<T>::From(*logits);
+    auto d_logits_t = EigenMatrix<T>::From(*d_logits);
+    auto loss_t = EigenMatrix<T>::From(*loss);
+    auto distances_t = EigenMatrix<T>::From(distances);
+    auto d_distances_t = EigenMatrix<T>::From(d_distances);
+    auto tmp_t = EigenMatrix<T>::From(tmp);
+
+    auto blas = math::GetBlas<DeviceContext, T>(context);
+    auto x_mat = math::CreateMatrixDescriptor(x_dims, 0, false);
+    auto x_mat_trans = math::CreateMatrixDescriptor(x_dims, 0, true);
+    blas.MatMul(*logits, x_mat, *logits, x_mat_trans, T(1), &distances, T(0));
+    auto a = logits_t.square()
+                 .sum(DIM1({1}))
+                 .reshape(DIM2({batch_size, 1}))
+                 .broadcast(DIM2({1, batch_size}));
+    auto b = a.shuffle(DIM2({1, 0}));
+    distances_t.device(place) = a + b - distances_t * T(2.0);
+
+    // step 2: get loss in each line of distance matrix
+    ReluGradFunctor<T> relu_grad;
+    ReluFunctor<T> relu(margin);
+    auto offsets = GetOffsets<DeviceContext>(labels);
+    for (size_t i = 0; i < offsets.size() - 1; ++i) {
+      int begin = offsets[i];
+      int end = offsets[i + 1];
+      int pos_num = end - begin;
+      for (int j = begin; j < end; ++j) {
+        // get loss in current line
+        auto p_dis = distances_t.slice(DIM2({j, begin}), DIM2({1, pos_num}))
+                         .reshape(DIM2{1, pos_num});
+        auto n_dis = distances_t.chip(j, 0).reshape(DIM2({1, batch_size}));
+        auto n_p_sub =
+            n_dis.broadcast(DIM2({pos_num, 1})) -
+            p_dis.reshape(DIM2{pos_num, 1}).broadcast(DIM2({1, batch_size}));
+        auto p_p_sub =
+            p_dis.broadcast(DIM2({pos_num, 1})) -
+            p_dis.shuffle(DIM2({1, 0})).broadcast(DIM2({1, pos_num}));
+
+        loss_t.chip(j, 0).device(place) =
+            n_p_sub.unaryExpr(relu).sum() - p_p_sub.unaryExpr(relu).sum();
+        // get gradient of distance matric in current line
+        d_distances_t.chip(j, 0).device(place) =
+            n_p_sub.unaryExpr(relu_grad).sum(DIM1({0})).reshape(
+                DIM2({1, batch_size}));
+
+        d_distances_t.slice(DIM2({j, begin}), DIM2({1, pos_num}))
+            .device(place) =
+            p_p_sub.unaryExpr(relu_grad).sum(DIM1({1})).reshape(
+                DIM2({1, pos_num})) -
+            n_p_sub.unaryExpr(relu_grad).sum(DIM1({1})).reshape(
+                DIM2({1, pos_num}));
+      }
+    }
+
+    // get gradient of logits
+    tmp_t.device(place) = d_distances_t + d_distances_t.shuffle(DIM2({1, 0}));
+    auto dis_mat =
+        math::CreateMatrixDescriptor({batch_size, batch_size}, 0, false);
+    blas.MatMul(tmp, dis_mat, *logits, x_mat, T(-2), d_logits, T(0));
+
+    auto sub_grad = tmp_t.sum(DIM1{1})
+                        .reshape(DIM2({batch_size, 1}))
+                        .broadcast(DIM2({1, feature_len})) *
+                    logits_t * T(2.0);
+    auto result = d_logits_t + sub_grad;
+    d_logits_t.device(place) = result;
+  }
+};
+
+template <typename DeviceContext, typename T>
+class DenseTripletLossGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto& place =
+        *context.template device_context<DeviceContext>().eigen_device();
+    const Tensor* d_loss =
+        context.Input<Tensor>(framework::GradVarName("Loss"));
+    d_loss->data<T>();
+    const Tensor* d_logits = context.Input<Tensor>("LogitsGrad");
+    Tensor* d_in = context.Output<Tensor>(framework::GradVarName("Logits"));
+    d_in->mutable_data<T>(context.GetPlace());
+    auto d_in_dims = d_in->dims();
+    int batch_size = d_in_dims[0];
+    int feature_len = d_in_dims[1];
+    auto d_logits_t = EigenMatrix<T>::From(*d_logits);
+    auto d_loss_t = EigenMatrix<T>::From(*d_loss, {batch_size, 1});
+    auto d_in_t = EigenMatrix<T>::From(*d_in);
+    d_in_t.device(place) =
+        d_logits_t * d_loss_t.broadcast(DIM2({1, feature_len}));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle