Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 25 additions & 21 deletions paddle/fluid/operators/data_norm_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,22 @@ class DataNormKernel<T, phi::GPUContext> : public framework::OpKernel<T> {
"The dims of Input(X) should be greater than 0."));

const T *batch_size_in =
ctx.Input<phi::DenseTensor>("BatchSize")->data<T>();
const T *batch_sum_in = ctx.Input<phi::DenseTensor>("BatchSum")->data<T>();
ctx.Input<phi::DenseTensor>("BatchSize")->template data<T>();
const T *batch_sum_in =
ctx.Input<phi::DenseTensor>("BatchSum")->template data<T>();
const T *batch_square_sum_in =
ctx.Input<phi::DenseTensor>("BatchSquareSum")->data<T>();
auto *x_data = x->data<T>();
ctx.Input<phi::DenseTensor>("BatchSquareSum")->template data<T>();
auto *x_data = x->template data<T>();

// alloc memory
T *y_data =
ctx.Output<phi::DenseTensor>("Y")->mutable_data<T>(ctx.GetPlace());
T *y_data = ctx.Output<phi::DenseTensor>("Y")->template mutable_data<T>(
ctx.GetPlace());
T *mean_out_data =
ctx.Output<phi::DenseTensor>("Means")->mutable_data<T>(ctx.GetPlace());
ctx.Output<phi::DenseTensor>("Means")->template mutable_data<T>(
ctx.GetPlace());
T *scale_out_data =
ctx.Output<phi::DenseTensor>("Scales")->mutable_data<T>(ctx.GetPlace());
ctx.Output<phi::DenseTensor>("Scales")->template mutable_data<T>(
ctx.GetPlace());

auto stream = ctx.template device_context<phi::GPUContext>().stream();

Expand Down Expand Up @@ -185,31 +188,32 @@ class DataNormGradKernel<T, phi::GPUContext> : public framework::OpKernel<T> {
}
T *d_batch_size =
ctx.Output<phi::DenseTensor>(framework::GradVarName("BatchSize"))
->mutable_data<T>(ctx.GetPlace());
->template mutable_data<T>(ctx.GetPlace());
T *d_batch_sum =
ctx.Output<phi::DenseTensor>(framework::GradVarName("BatchSum"))
->mutable_data<T>(ctx.GetPlace());
->template mutable_data<T>(ctx.GetPlace());
T *d_batch_square_sum =
ctx.Output<phi::DenseTensor>(framework::GradVarName("BatchSquareSum"))
->mutable_data<T>(ctx.GetPlace());
->template mutable_data<T>(ctx.GetPlace());

auto stream = ctx.template device_context<phi::GPUContext>().stream();
if (d_x != nullptr) {
KernelDataNormBP<<<GET_BLOCKS(C * N),
PADDLE_CUDA_NUM_THREADS,
0,
stream>>>(N,
C,
d_y->data<T>(),
scales->data<T>(),
d_x->mutable_data<T>(ctx.GetPlace()));
stream>>>(
N,
C,
d_y->template data<T>(),
scales->template data<T>(),
d_x->template mutable_data<T>(ctx.GetPlace()));
}

KernelDataNormBPStat<<<GET_BLOCKS(C), PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
N,
C,
x->data<T>(),
means->data<T>(),
x->template data<T>(),
means->template data<T>(),
epsilon,
d_batch_size,
d_batch_sum,
Expand Down Expand Up @@ -306,11 +310,11 @@ class DataNormGradKernel<T, phi::GPUContext> : public framework::OpKernel<T> {
}

T *batch_size_data = ctx.Output<phi::DenseTensor>("BatchSize")
->mutable_data<T>(ctx.GetPlace());
->template mutable_data<T>(ctx.GetPlace());
T *batch_sum_data = ctx.Output<phi::DenseTensor>("BatchSum")
->mutable_data<T>(ctx.GetPlace());
->template mutable_data<T>(ctx.GetPlace());
T *batch_square_sum_data = ctx.Output<phi::DenseTensor>("BatchSquareSum")
->mutable_data<T>(ctx.GetPlace());
->template mutable_data<T>(ctx.GetPlace());
KernelUpdateParam<<<GET_BLOCKS(C), PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
C,
d_batch_size,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/isfinite_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class OverflowKernel : public framework::OpKernel<T> {
virtual void Compute(const framework::ExecutionContext& ctx) const {
auto* x = ctx.InputVar("X");
auto* out = ctx.Output<phi::DenseTensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
out->template mutable_data<T>(ctx.GetPlace());
Functor functor;
if (x->IsType<phi::DenseTensor>()) {
auto* in = ctx.Input<phi::DenseTensor>("X");
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/operators/pull_gpups_sparse_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ static void PullGpuPSSparseFunctor(const framework::ExecutionContext &ctx) {
for (size_t i = 0; i < slot_size; i++) {
const auto *slot = inputs[i];
const uint64_t *single_slot_keys =
reinterpret_cast<const uint64_t *>(slot->data<int64_t>());
reinterpret_cast<const uint64_t *>(slot->template data<int64_t>());
all_keys[i] = single_slot_keys;
slot_lengths[i] = slot->numel();
auto *output = outputs[i]->mutable_data<T>(ctx.GetPlace());
auto *output = outputs[i]->template mutable_data<T>(ctx.GetPlace());
// double type is not fully supported now
all_values[i] = reinterpret_cast<float *>(output);
}
Expand Down Expand Up @@ -68,7 +68,7 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) {
for (size_t i = 0; i < slot_size; i++) {
const auto *slot = inputs[i];
const uint64_t *single_slot_keys =
reinterpret_cast<const uint64_t *>(slot->data<int64_t>());
reinterpret_cast<const uint64_t *>(slot->template data<int64_t>());
all_keys[i] = single_slot_keys;
slot_lengths[i] = slot->numel();
int cur_batch_size =
Expand All @@ -82,7 +82,7 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) {
"The batch size of all input slots should be same, "
"please check"));
}
const float *grad_value = d_output[i]->data<float>();
const float *grad_value = d_output[i]->template data<float>();
all_grad_values[i] = grad_value;
}
#ifdef PADDLE_WITH_HETERPS
Expand Down
2 changes: 1 addition & 1 deletion test/cpp/fluid/framework/data_device_transform_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class TestKernel : public OpKernel<float> {
std::cout << "input place:" << input->place() << std::endl;
auto* output = ctx.Output<phi::DenseTensor>("output");
output->Resize(input->dims());
output->mutable_data<T>(ctx.GetPlace());
output->template mutable_data<T>(ctx.GetPlace());

phi::funcs::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
*input,
Expand Down
Loading