Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -152,18 +152,13 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::XPUContext, T> {
dev_ctx.template Alloc(loss, logits->dtype());

const auto& logits_dims = logits->dims();

const int axis = logits_dims.size() - 1;
const int64_t N = phi::funcs::SizeToAxis(axis, logits_dims);
const int64_t D = phi::funcs::SizeFromAxis(axis, logits_dims);

phi::DenseTensor logits_2d, softmax_2d;
framework::TensorCopy(
*logits, ctx.GetPlace(), ctx.device_context(), &logits_2d);
framework::TensorCopy(
*softmax, ctx.GetPlace(), ctx.device_context(), &softmax_2d);
logits_2d.Resize({N, D});
softmax_2d.Resize({N, D});
logits_2d.ShareDataWith(*logits).Resize({N, D});
softmax_2d.ShareDataWith(*softmax).Resize({N, D});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

下面CSoftmaxWithCrossEntropyFunctor(line 389~line 395)那里是不是也可以一起改下?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sharedatawith我记得paddle同学说过,是访问同一块地址,会有潜在的风险。之前paddle cast,在数据类型相同时,就是用了sharedatawith,模型训练有时就会有奇怪的现象

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个算子比较特殊,sharedatawith的是两个临时tensor。其中logits_2d不会被修改,softmax_2d在修改后原本就会被写回原地址,逻辑上跟之前一样,所以不会有问题。


int ret = -1;
// step 1, obtain logit_max
Expand Down Expand Up @@ -324,12 +319,6 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::XPUContext, T> {
// 将label和ignore_index相同的那些loss,置为0
FixLossAccordingToIgnoreIndex<T>(
ctx, labels, &predicted_logits, loss, N, ignore_index);

phi::memory_utils::Copy(ctx.GetPlace(),
softmax->data(),
ctx.GetPlace(),
softmax_2d.data(),
N * D * sizeof(T));
}
};

Expand Down Expand Up @@ -398,12 +387,8 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::XPUContext, T> {
const int64_t D = phi::funcs::SizeFromAxis(axis, logits_dims);

phi::DenseTensor logits_2d, softmax_2d;
framework::TensorCopy(
*logits, ctx.GetPlace(), ctx.device_context(), &logits_2d);
framework::TensorCopy(
*softmax, ctx.GetPlace(), ctx.device_context(), &softmax_2d);
logits_2d.Resize({N, D});
softmax_2d.Resize({N, D});
logits_2d.ShareDataWith(*logits).Resize({N, D});
softmax_2d.ShareDataWith(*softmax).Resize({N, D});

int ret = -1;
// step 1, obtain logit_max
Expand Down Expand Up @@ -593,12 +578,6 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::XPUContext, T> {
// 将label和ignore_index相同的那些loss,置为0
FixLossAccordingToIgnoreIndex<T>(
ctx, labels, &predicted_logits, loss, N, ignore_index);

phi::memory_utils::Copy(ctx.GetPlace(),
softmax->data(),
ctx.GetPlace(),
softmax_2d.data(),
N * D * sizeof(T));
}
};

Expand Down