-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Add broadcasting support (e.g. matrix-vector) for cos sim operator. #3918
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,33 +25,51 @@ class CosSimOp : public framework::OperatorWithKernel { | |
|
||
protected: | ||
void InferShape(const framework::InferShapeContext &ctx) const override { | ||
// notnull check | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) must not be null."); | ||
PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("X")->dims(), | ||
ctx.Input<Tensor>("Y")->dims(), | ||
"Dimensions of Input(X) and Input(Y) must be the same."); | ||
|
||
auto dims = ctx.Input<Tensor>("X")->dims(); | ||
ctx.Output<Tensor>("Out")->Resize({dims[0], 1}); | ||
ctx.Output<Tensor>("XNorm")->Resize({dims[0], 1}); | ||
ctx.Output<Tensor>("YNorm")->Resize({dims[0], 1}); | ||
|
||
// shape check | ||
auto x_dims = ctx.Input<Tensor>("X")->dims(); | ||
auto y_dims = ctx.Input<Tensor>("Y")->dims(); | ||
PADDLE_ENFORCE_EQ(framework::arity(x_dims), framework::arity(y_dims), | ||
"Ranks of Input(X) and Input(Y) must be equal."); | ||
PADDLE_ENFORCE_GE(framework::arity(x_dims), 2, | ||
|
||
"Rank of Input(X) must not be less than 2."); | ||
PADDLE_ENFORCE_EQ( | ||
framework::slice_ddim(x_dims, 1, framework::arity(x_dims)), | ||
framework::slice_ddim(y_dims, 1, framework::arity(y_dims)), | ||
|
||
"All dimensions except 1st of Input(X) and Input(Y) must be equal."); | ||
|
||
PADDLE_ENFORCE(x_dims[0] == y_dims[0] || y_dims[0] == 1, | ||
"1st dimension of Input(Y) must be equal to Input(X) or " | ||
|
||
"just 1 (which will be broadcasted to match Input(X))."); | ||
|
||
// resize tensor | ||
ctx.Output<Tensor>("Out")->Resize({x_dims[0], 1}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the second dimension is just 1, why do we need it? We can squeeze it to one dimension. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. That's a question we've discussed with @QiJune ---- make the output tensor to be of rank 1 or of rank 2? I prefer rank2, since when we use the output for further processing, it is very clear that it is a batch of samples of scalar instead of one single sample of vector. In a word, it reduces ambiguity. We can discuss this more. |
||
ctx.Output<Tensor>("XNorm")->Resize({x_dims[0], 1}); | ||
ctx.Output<Tensor>("YNorm")->Resize({y_dims[0], 1}); | ||
} | ||
}; | ||
|
||
class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { | ||
public: | ||
CosSimOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) | ||
: OpProtoAndCheckerMaker(proto, op_checker) { | ||
AddInput("X", "The first input of cos_sim op."); | ||
AddInput("Y", "The second input of cos_sim op."); | ||
AddInput("X", "The 1st input of cos_sim op."); | ||
AddInput("Y", "The 2nd input of cos_sim op."); | ||
AddOutput("Out", "The output of cos_sim op."); | ||
AddOutput("XNorm", "Row norm of the first input.").AsIntermediate(); | ||
|
||
AddOutput("YNorm", "Row norm of the second input.").AsIntermediate(); | ||
|
||
AddComment(R"DOC( | ||
Cosine Similarity Operator. | ||
|
||
The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)) | ||
The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)). | ||
|
||
Input(X) and Input(Y) must have the same shape, except that the 1st dimension | ||
of Input(Y) could be just 1 (different from Input(X)), which will be | ||
broadcasted to match the shape of Input(X) before computing their cosine | ||
similarity. | ||
)DOC"); | ||
} | ||
}; | ||
|
@@ -62,32 +80,47 @@ class CosSimOpGrad : public framework::OperatorWithKernel { | |
|
||
protected: | ||
void InferShape(const framework::InferShapeContext &ctx) const override { | ||
// notnull check | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) must not be null."); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("XNorm"), | ||
"Input(XNorm) must not be null."); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("YNorm"), | ||
"Input(YNorm) must not be null."); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Out"), | ||
"Input(Out) must not be null."); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), | ||
"Input(Out@GRAD) must not be null."); | ||
|
||
// shape check | ||
auto x_dims = ctx.Input<Tensor>("X")->dims(); | ||
auto y_dims = ctx.Input<Tensor>("Y")->dims(); | ||
PADDLE_ENFORCE_GE(framework::arity(x_dims), framework::arity(y_dims), | ||
"Ranks of Input(X) and Input(Y) must be equal."); | ||
PADDLE_ENFORCE_GE(framework::arity(x_dims), 2, | ||
"Rank of Input(X) must not be less than 2."); | ||
PADDLE_ENFORCE_EQ( | ||
framework::slice_ddim(x_dims, 1, framework::arity(x_dims)), | ||
framework::slice_ddim(y_dims, 1, framework::arity(y_dims)), | ||
"All dimensions except 1st of Input(X) and Input(Y) must be equal."); | ||
PADDLE_ENFORCE(x_dims[0] == y_dims[0] || y_dims[0] == 1, | ||
"1st dimension of Input(Y) must be equal to Input(X) or " | ||
"just 1 (which will be broadcasted to match Input(X))."); | ||
auto xnorm_dims = ctx.Input<Tensor>("XNorm")->dims(); | ||
PADDLE_ENFORCE_EQ(xnorm_dims, framework::make_ddim({x_dims[0], 1}), | ||
"Shape of Input(XNorm) must be [X.Dim(0), 1]."); | ||
auto ynorm_dims = ctx.Input<Tensor>("YNorm")->dims(); | ||
auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims(); | ||
PADDLE_ENFORCE_EQ(x_dims, y_dims, | ||
"Dimensions of Input(X) and Input(Y) must be the same."); | ||
PADDLE_ENFORCE_EQ(xnorm_dims[0], x_dims[0], | ||
"1st dimension of XNorm must equal that of Input(X)."); | ||
PADDLE_ENFORCE_EQ(xnorm_dims[1], 1, "2st dimension of XNorm must be one."); | ||
PADDLE_ENFORCE_EQ(ynorm_dims[0], y_dims[0], | ||
"1st dimension of YNorm must equal that of Input(Y)."); | ||
PADDLE_ENFORCE_EQ(ynorm_dims[1], 1, "2st dimension of YNorm must be one."); | ||
PADDLE_ENFORCE_EQ(out_dims[0], x_dims[0], | ||
"1st dimension of Out@GRAD must equal that of Input(X)"); | ||
PADDLE_ENFORCE_EQ(out_dims[1], 1, "1st dimension of Out@GRAD must be one."); | ||
|
||
PADDLE_ENFORCE_EQ(ynorm_dims, framework::make_ddim({y_dims[0], 1}), | ||
"Shape of Input(YNorm) must be [Y.Dim(0), 1]."); | ||
auto out_dims = ctx.Input<Tensor>("Out")->dims(); | ||
PADDLE_ENFORCE_EQ(out_dims, framework::make_ddim({x_dims[0], 1}), | ||
"Shape of Input(Out) must be [X.Dim(0), 1]."); | ||
auto out_grad_dims = | ||
ctx.Input<Tensor>(framework::GradVarName("Out"))->dims(); | ||
PADDLE_ENFORCE_EQ(out_grad_dims, framework::make_ddim({x_dims[0], 1}), | ||
|
||
"Shape of Input(Out@Grad) must be [X.Dim(0), 1]."); | ||
|
||
// resize tensor | ||
auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X")); | ||
auto *y_grad = ctx.Output<Tensor>(framework::GradVarName("Y")); | ||
if (x_grad) x_grad->Resize(x_dims); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,74 +28,114 @@ template <typename Place, typename T> | |
class CosSimKernel : public framework::OpKernel { | ||
public: | ||
void Compute(const framework::ExecutionContext& context) const override { | ||
auto* input_x = context.Input<Tensor>("X"); | ||
auto* input_y = context.Input<Tensor>("Y"); | ||
auto* output_z = context.Output<Tensor>("Out"); | ||
auto* output_x_norm = context.Output<Tensor>("XNorm"); | ||
auto* output_y_norm = context.Output<Tensor>("YNorm"); | ||
// get Tensor | ||
auto* in_x = context.Input<Tensor>("X"); | ||
auto* in_y = context.Input<Tensor>("Y"); | ||
auto* out_z = context.Output<Tensor>("Out"); | ||
auto* out_x_norm = context.Output<Tensor>("XNorm"); | ||
auto* out_y_norm = context.Output<Tensor>("YNorm"); | ||
out_z->mutable_data<T>(context.GetPlace()); | ||
out_x_norm->mutable_data<T>(context.GetPlace()); | ||
out_y_norm->mutable_data<T>(context.GetPlace()); | ||
|
||
output_z->mutable_data<T>(context.GetPlace()); | ||
output_x_norm->mutable_data<T>(context.GetPlace()); | ||
output_y_norm->mutable_data<T>(context.GetPlace()); | ||
|
||
auto dims = input_x->dims(); | ||
int size = static_cast<int>(framework::product(dims)); | ||
auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); | ||
auto x = EigenMatrix<T>::From(*input_x, new_dims); | ||
auto y = EigenMatrix<T>::From(*input_y, new_dims); | ||
auto z = EigenMatrix<T>::From(*output_z); | ||
auto x_norm = EigenMatrix<T>::From(*output_x_norm); | ||
auto y_norm = EigenMatrix<T>::From(*output_y_norm); | ||
// convert Tensor to Eigen Tensor | ||
int rows_x = in_x->dims()[0]; | ||
int rows_y = in_y->dims()[0]; | ||
int cols = framework::product(in_x->dims()) / rows_x; | ||
auto x = EigenMatrix<T>::From(*in_x, framework::make_ddim({rows_x, cols})); | ||
|
||
auto y = EigenMatrix<T>::From(*in_y, framework::make_ddim({rows_y, cols})); | ||
auto z = EigenMatrix<T>::From(*out_z); | ||
auto x_norm = EigenMatrix<T>::From(*out_x_norm); | ||
auto y_norm = EigenMatrix<T>::From(*out_y_norm); | ||
|
||
// compute | ||
auto place = context.GetEigenDevice<Place>(); | ||
auto xy = (x * y).sum(Eigen::array<int, 1>({1})); | ||
x_norm.device(place) = x.square().sum(Eigen::array<int, 1>({1})).sqrt(); | ||
|
||
y_norm.device(place) = y.square().sum(Eigen::array<int, 1>({1})).sqrt(); | ||
z.device(place) = xy / x_norm / y_norm; | ||
if (rows_x == rows_y) { | ||
auto xy = (x * y).sum(Eigen::array<int, 1>({1})); | ||
z.device(place) = xy / x_norm / y_norm; | ||
} else { | ||
Eigen::DSizes<int, 2> bcast(rows_x, 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. better change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to |
||
auto xy = (x * y.broadcast(bcast)).sum(Eigen::array<int, 1>({1})); | ||
z.device(place) = xy / x_norm / y_norm.broadcast(bcast); | ||
} | ||
} | ||
}; | ||
|
||
template <typename Place, typename T> | ||
class CosSimGradKernel : public framework::OpKernel { | ||
public: | ||
void Compute(const framework::ExecutionContext& context) const override { | ||
auto* input_x = context.Input<Tensor>("X"); | ||
auto* input_y = context.Input<Tensor>("Y"); | ||
auto* input_z = context.Input<Tensor>("Out"); | ||
auto* input_x_norm = context.Input<Tensor>("XNorm"); | ||
auto* input_y_norm = context.Input<Tensor>("YNorm"); | ||
auto* output_grad_x = context.Output<Tensor>(framework::GradVarName("X")); | ||
auto* output_grad_y = context.Output<Tensor>(framework::GradVarName("Y")); | ||
auto* input_grad_z = context.Input<Tensor>(framework::GradVarName("Out")); | ||
// get Tensor | ||
auto* in_x = context.Input<Tensor>("X"); | ||
auto* in_y = context.Input<Tensor>("Y"); | ||
auto* in_z = context.Input<Tensor>("Out"); | ||
auto* in_x_norm = context.Input<Tensor>("XNorm"); | ||
auto* in_y_norm = context.Input<Tensor>("YNorm"); | ||
auto* out_grad_x = context.Output<Tensor>(framework::GradVarName("X")); | ||
auto* out_grad_y = context.Output<Tensor>(framework::GradVarName("Y")); | ||
auto* in_grad_z = context.Input<Tensor>(framework::GradVarName("Out")); | ||
|
||
auto dims = input_x->dims(); | ||
int size = static_cast<int>(framework::product(dims)); | ||
auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); | ||
auto x = EigenMatrix<T>::From(*input_x, new_dims); | ||
auto y = EigenMatrix<T>::From(*input_y, new_dims); | ||
auto z = EigenMatrix<T>::From(*input_z); | ||
auto x_norm = EigenMatrix<T>::From(*input_x_norm); | ||
auto y_norm = EigenMatrix<T>::From(*input_y_norm); | ||
auto dz = EigenMatrix<T>::From(*input_grad_z); | ||
// convert Tensor to Eigen Tensor | ||
int rows_x = in_x->dims()[0]; | ||
int rows_y = in_y->dims()[0]; | ||
int cols = framework::product(in_x->dims()) / rows_x; | ||
auto x = EigenMatrix<T>::From(*in_x, framework::make_ddim({rows_x, cols})); | ||
auto y = EigenMatrix<T>::From(*in_y, framework::make_ddim({rows_y, cols})); | ||
|
||
auto z = EigenMatrix<T>::From(*in_z); | ||
auto x_norm = EigenMatrix<T>::From(*in_x_norm); | ||
auto y_norm = EigenMatrix<T>::From(*in_y_norm); | ||
auto dz = EigenMatrix<T>::From(*in_grad_z); | ||
|
||
Eigen::DSizes<int, 2> bcast(1, new_dims[1]); | ||
// compute gradident | ||
Eigen::DSizes<int, 2> bcast(1, cols); | ||
auto z_bcast = z.broadcast(bcast); | ||
auto dz_bcast = dz.broadcast(bcast); | ||
auto place = context.GetEigenDevice<Place>(); | ||
auto x_snorm_bcast = x_norm.square().eval().broadcast(bcast); | ||
auto y_snorm_bcast = y_norm.square().eval().broadcast(bcast); | ||
auto norm_prod_bcast = (x_norm * y_norm).eval().broadcast(bcast); | ||
if (output_grad_x) { | ||
output_grad_x->mutable_data<T>(context.GetPlace()); | ||
auto dx = EigenMatrix<T>::From(*output_grad_x, new_dims); | ||
dx.device(place) = | ||
dz_bcast * (y / norm_prod_bcast - z_bcast * x / x_snorm_bcast); | ||
} | ||
if (output_grad_y) { | ||
output_grad_y->mutable_data<T>(context.GetPlace()); | ||
auto dy = EigenMatrix<T>::From(*output_grad_y, new_dims); | ||
dy.device(place) = | ||
dz_bcast * (x / norm_prod_bcast - z_bcast * y / y_snorm_bcast); | ||
auto place = context.GetEigenDevice<Place>(); | ||
if (rows_x == rows_y) { | ||
auto y_snorm_bcast = y_norm.square().eval().broadcast(bcast); | ||
auto norm_prod_bcast = (x_norm * y_norm).eval().broadcast(bcast); | ||
// compute dx | ||
if (out_grad_x) { | ||
out_grad_x->mutable_data<T>(context.GetPlace()); | ||
auto dx = EigenMatrix<T>::From(*out_grad_x, | ||
framework::make_ddim({rows_x, cols})); | ||
auto grad = y / norm_prod_bcast - z_bcast * x / x_snorm_bcast; | ||
dx.device(place) = dz_bcast * grad; | ||
} | ||
// compute dy | ||
if (out_grad_y) { | ||
out_grad_y->mutable_data<T>(context.GetPlace()); | ||
auto dy = EigenMatrix<T>::From(*out_grad_y, | ||
framework::make_ddim({rows_y, cols})); | ||
auto grad = x / norm_prod_bcast - z_bcast * y / y_snorm_bcast; | ||
dy.device(place) = dz_bcast * grad; | ||
} | ||
} else { | ||
Eigen::DSizes<int, 2> bcast_row(rows_x, 1); | ||
auto y_bcast = y.broadcast(bcast_row); | ||
auto y_snorm_bcast = | ||
y_norm.square().eval().broadcast(bcast_row).eval().broadcast(bcast); | ||
|
||
auto norm_prod_bcast = | ||
(x_norm * y_norm.broadcast(bcast_row)).eval().broadcast(bcast); | ||
// compute dx | ||
if (out_grad_x) { | ||
out_grad_x->mutable_data<T>(context.GetPlace()); | ||
auto dx = EigenMatrix<T>::From( | ||
*out_grad_x, framework::make_ddim({rows_x, cols})); | ||
auto grad = y_bcast / norm_prod_bcast - z_bcast * x / x_snorm_bcast; | ||
dx.device(place) = dz_bcast * grad; | ||
} | ||
// compute dy | ||
if (out_grad_y) { | ||
out_grad_y->mutable_data<T>(context.GetPlace()); | ||
auto dy = EigenMatrix<T>::From( | ||
*out_grad_y, framework::make_ddim({rows_y, cols})); | ||
auto grad = x / norm_prod_bcast - z_bcast * y_bcast / y_snorm_bcast; | ||
dy.device(place) = (dz_bcast * grad).sum(Eigen::array<int, 1>({0})); | ||
} | ||
} | ||
} | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
DDim
has a member functionsize()
, with returns the arity of itself.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.