Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
23b0388
add sub sequence operator code and unittest
Nov 9, 2017
f23d6cc
update the sub_sequence_op tp sequence_slice_op code.
Nov 14, 2017
b24afd8
update the sub_sequence_op to sequence_slice_op code.
Nov 14, 2017
8e7c8bb
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Nov 14, 2017
29c2582
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Nov 14, 2017
9a18e78
update sequence slice op, fix some error
Nov 14, 2017
4c426f1
update sequence_slice_op.h, change some code
Nov 15, 2017
40a6c48
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Nov 16, 2017
3dcf0da
change offset and length's rank to 2, dim[0] for batch size
Nov 16, 2017
1d95173
change offset and length's rank to 2, dim[0] for batch size
Nov 16, 2017
7bb2680
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Nov 16, 2017
4579bd4
Merge branch 'sub_sequence_op' of https://github.com/wanghaox/Paddle …
Nov 16, 2017
352c5a9
update some code
Nov 16, 2017
069dcc2
Merge branch 'sub_sequence_op' of https://github.com/wanghaox/Paddle …
Nov 16, 2017
49a5942
fix some typos
Nov 16, 2017
294570f
Merge branch 'sub_sequence_op' of https://github.com/wanghaox/Paddle …
Nov 16, 2017
a0b7a07
fix some comments
Nov 16, 2017
06dc89e
Merge branch 'sub_sequence_op' of https://github.com/wanghaox/Paddle …
Nov 16, 2017
3603546
fix some codes
Nov 17, 2017
39bce49
fix some comments
Nov 20, 2017
794117b
fix some comments
Nov 20, 2017
f7c1562
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Nov 20, 2017
d68f861
fix some comments
Nov 20, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions paddle/operators/sequence_slice_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/operators/sequence_slice_op.h"

namespace paddle {
namespace operators {

class SequenceSliceOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Offset"),
"Input(Offset) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Length"),
"Input(Length) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequenceSliceOp should not be null.");
auto input_dims = ctx->GetInputDim("X");

auto offset_dim = ctx->GetInputDim("Offset");
auto length_dim = ctx->GetInputDim("Length");

PADDLE_ENFORCE_EQ(
offset_dim.size(), 2UL,
"Only support one level sequence now, The rank of offset must be 2.");
PADDLE_ENFORCE_EQ(
length_dim.size(), 2UL,
"Only support one level sequence now, The rank of Length must be 2.");

// Initialize the output's dims to maximum,
// and re-set to real dims by the value of Offset and Length at kernel
ctx->SetOutputDim("Out", input_dims);
}

protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};

class SequenceSliceGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"The gradient of Out should not be null.");
PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")),
"The gradient of X should not be null.");
ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
}

protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};

class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SequenceSliceOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(LoDTensor), "
"the input of SequenceSliceOp.");
AddInput("Offset",
"(Tensor), "
"a vector<int> to describe the offset of every input sequence for "
"sub sequence item.");
AddInput("Length",
"(Tensor), "
"a vector<int> to describe the length of every input sequence for "
"sub sequence item.");
AddOutput("Out",
"(LoDTensor), the output of SequenceSliceOp.");
AddComment(R"DOC(
Sequence slice operator

The operator crops a subsequence from given sequence with given start offset and subsequence length.
It only supports sequence (LoD Tensor with level number is 1).
- Case:
X = [[a1, a2;
b1, b2;
c1, c2]
[d1, d2;
e1, e2]]
LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2)
Offset = [[0], [1]]; Length = [[2], [1]]

Out = [[a1, a2;
b1, b2]
[e1, e2]]
LoD(Out) = {{0, 2, 3}}; Dims(Out) = (3, 2)
NOTE: The first dimension size of input, the size of offset and Length, should be equal. The offset start from 0.
)DOC");
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP(sequence_slice, ops::SequenceSliceOp, ops::SequenceSliceOpMaker,
sequence_slice_grad, ops::SequenceSliceGradOp);
REGISTER_OP_CPU_KERNEL(
sequence_slice,
ops::SequenceSliceOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
sequence_slice_grad,
ops::SequenceSliceGradOpKernel<paddle::platform::CPUPlace, float>);
23 changes: 23 additions & 0 deletions paddle/operators/sequence_slice_op.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/operators/sequence_slice_op.h"

namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
sequence_slice,
ops::SequenceSliceOpKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
sequence_slice_grad,
ops::SequenceSliceGradOpKernel<paddle::platform::GPUPlace, float>);
173 changes: 173 additions & 0 deletions paddle/operators/sequence_slice_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/strided_memcpy.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;

template <typename T>
inline LoD SequenceSliceLoD(const T& in, const int64_t* offset_data,
const int64_t* length_data) {
auto out_lod = in.lod();
size_t lod_offset = 0;

auto n = in.lod()[0].size() - 1;
out_lod[0][0] = 0;
for (size_t i = 0; i < n; ++i) {
lod_offset += length_data[i];
out_lod[0][i+1] = lod_offset;
}
return out_lod;
}

template <typename Place, typename T>
class SequenceSliceOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<LoDTensor>("X");
auto* offset = ctx.Input<Tensor>("Offset");
auto* length = ctx.Input<Tensor>("Length");
auto* out = ctx.Output<LoDTensor>("Out");

auto lod = in->lod();
auto n = lod[0].size() - 1;

PADDLE_ENFORCE_EQ(lod.size(), 1UL,
"Only support one level sequence now.");
PADDLE_ENFORCE_EQ(
n, static_cast<size_t>(length->dims()[0]),
"The size of input-sequence and length-array should be the same")
PADDLE_ENFORCE_EQ(
n, static_cast<size_t>(offset->dims()[0]),
"The size of input-sequence and offset-array should be the same")

const int64_t* offset_data = offset->data<int64_t>();
const int64_t* length_data = length->data<int64_t>();
framework::Tensor offset_cpu;
framework::Tensor length_cpu;

if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context());
offset_data = offset_cpu.data<int64_t>();

length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context());
length_data = length_cpu.data<int64_t>();
}

for (size_t i = 0; i < n; ++i) {
PADDLE_ENFORCE_LT(0, offset_data[i],
"The offset[%d] must greater than zero.", i)
PADDLE_ENFORCE_LT(0, length_data[i],
"The length[%d] must greater than zero.", i)
PADDLE_ENFORCE_LT(
lod[0][i] + offset_data[i] + length_data[i],
lod[0][i + 1],
"The target tensor's length overflow.")
}

out->mutable_data<T>(ctx.GetPlace());
auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
auto out_dims = in->dims();
out_dims[0] = out_lod[0][out_lod[0].size() - 1];
out->Resize(out_dims);
out->set_lod(out_lod);

auto in_stride = framework::stride(in->dims());
auto out_stride = framework::stride(out->dims());

size_t out_offset = 0;
for (size_t i = 0; i < n; ++i) {
Tensor in_t =
in->Slice(static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] +
length_data[i]));

StridedMemcpy<T>(ctx.device_context(), in_t.data<T>(),
in_stride, in_t.dims(), out_stride,
out->data<T>() + out_offset);
out_offset += length_data[i] * in_stride[0];
}
}
};

template <typename Place, typename T>
class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<LoDTensor>("X");
auto* offset = ctx.Input<Tensor>("Offset");
auto* length = ctx.Input<Tensor>("Length");
auto* out_grad =
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));

const int64_t* offset_data = offset->data<int64_t>();
const int64_t* length_data = length->data<int64_t>();
framework::Tensor offset_cpu;
framework::Tensor length_cpu;

if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context());
offset_data = offset_cpu.data<int64_t>();

length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context());
length_data = length_cpu.data<int64_t>();
}

auto lod = in->lod();
auto out_lod = out_grad->lod();

if (x_grad) {
x_grad->mutable_data<T>(ctx.GetPlace());
x_grad->set_lod(in->lod());
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), x_grad, static_cast<T>(0));

auto out_grad_stride = framework::stride(out_grad->dims());

for (size_t i = 0; i < out_lod[0].size() - 1; ++i) {
Tensor out_grad_t =
out_grad->Slice(static_cast<int>(out_lod[0][i]),
static_cast<int>(out_lod[0][i + 1]));
auto out_grad_stride = framework::stride(out_grad_t.dims());

auto x_grad_stride = framework::stride(x_grad->dims());

Tensor x_grad_t = x_grad->Slice(
static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));

StridedMemcpy<T>(ctx.device_context(), out_grad_t.data<T>(),
out_grad_stride, out_grad_t.dims(), x_grad_stride,
x_grad_t.data<T>());
}
}
}
};

} // namespace operators
} // namespace paddle
45 changes: 45 additions & 0 deletions python/paddle/v2/fluid/tests/test_sequence_slice_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import unittest
import numpy as np
import sys
from op_test import OpTest

class TestSequenceSliceOp(OpTest):
def set_data(self):
self.init_test_case()
# only supprot one level LoD
x = np.random.random(self.x_dim).astype('float32')
lod = self.x_lod
offset = np.array(self.offset).astype("int64")
length = np.array(self.length).astype("int64")

self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length}
outs = [] #np.zeros((100, 3, 2)).astype('float32')
out_lod = [[0]]
out_lod_offset = 0
for i in range(len(offset)):
sub_x = x[lod[0][i] + offset[i, 0]: lod[0]
[i] + offset[i, 0] + length[i, 0], :]
out_lod_offset = out_lod_offset + len(sub_x)
outs.append(sub_x)
out_lod[0].append(out_lod_offset)
outs = np.concatenate(outs, axis=0)
self.outputs = {'Out': (outs, out_lod)}

def init_test_case(self):
self.x_dim = (100, 3, 2)
self.x_lod = [[0, 20, 40, 60, 80, 100]]
self.offset = [[1], [2], [3], [4], [5]]
self.length = [[10], [8], [6], [4], [2]]

def setUp(self):
self.op_type = "sequence_slice"
self.set_data()

def test_check_output(self):
self.check_output()

def test_check_grad(self):
self.check_grad(['X'], 'Out')

if __name__ == '__main__':
unittest.main()