PaddlePaddle
diff --git a/‎paddle/phi/backends/gpu/gpu_primitives.h‎
Lines changed: 610 additions & 0 deletions b/‎paddle/phi/backends/gpu/gpu_primitives.h‎
Lines changed: 610 additions & 0 deletions
diff --git a/‎paddle/phi/kernels/funcs/detail/gru_gpu_kernel.h‎
Lines changed: 1 addition & 1 deletion b/‎paddle/phi/kernels/funcs/detail/gru_gpu_kernel.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/phi/kernels/funcs/detail/lstm_gpu_kernel.h‎
Lines changed: 4 additions & 7 deletions b/‎paddle/phi/kernels/funcs/detail/lstm_gpu_kernel.h‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎paddle/phi/kernels/funcs/gather.cu.h‎
Lines changed: 2 additions & 2 deletions b/‎paddle/phi/kernels/funcs/gather.cu.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/phi/kernels/funcs/pooling.cu‎
Lines changed: 4 additions & 5 deletions b/‎paddle/phi/kernels/funcs/pooling.cu‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎paddle/phi/kernels/funcs/scatter.cu.h‎
Lines changed: 3 additions & 3 deletions b/‎paddle/phi/kernels/funcs/scatter.cu.h‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎paddle/phi/kernels/funcs/segment_pooling.cu‎
Lines changed: 10 additions & 10 deletions b/‎paddle/phi/kernels/funcs/segment_pooling.cu‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎paddle/phi/kernels/funcs/selected_rows_functor.cu‎
Lines changed: 6 additions & 6 deletions b/‎paddle/phi/kernels/funcs/selected_rows_functor.cu‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎paddle/phi/kernels/gpu/accuracy_kernel.cu‎
Lines changed: 2 additions & 2 deletions b/‎paddle/phi/kernels/gpu/accuracy_kernel.cu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/phi/kernels/gpu/adagrad_kernel.cu‎
Lines changed: 5 additions & 5 deletions b/‎paddle/phi/kernels/gpu/adagrad_kernel.cu‎
Lines changed: 5 additions & 5 deletions
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 #include <type_traits>
 
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/kernels/funcs/detail/activation_functions.h"
 #include "paddle/phi/kernels/funcs/gru_compute.h"
 
 
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 #include <type_traits>
 
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/kernels/funcs/detail/activation_functions.h"
 #include "paddle/phi/kernels/funcs/lstm_compute.h"
 
@@ -202,15 +202,12 @@ __global__ void KeLstmBackward(Op op,
   if (is_batch) {
     if (value.prev_state_value) {
       if (grad.check_ig_grad)
-        paddle::platform::CudaAtomicAdd(grad.check_ig_grad + frame_idx,
-                                        r_checkIGrad);
+        phi::CudaAtomicAdd(grad.check_ig_grad + frame_idx, r_checkIGrad);
       if (grad.check_fg_grad)
-        paddle::platform::CudaAtomicAdd(grad.check_fg_grad + frame_idx,
-                                        r_checkFGrad);
+        phi::CudaAtomicAdd(grad.check_fg_grad + frame_idx, r_checkFGrad);
     }
     if (grad.check_og_grad)
-      paddle::platform::CudaAtomicAdd(grad.check_og_grad + frame_idx,
-                                      r_checkOGrad);
+      phi::CudaAtomicAdd(grad.check_og_grad + frame_idx, r_checkOGrad);
   } else {
     if (value.prev_state_value) {
       if (grad.check_ig_grad) grad.check_ig_grad[frame_idx] += r_checkIGrad;
 
@@ -18,8 +18,8 @@ limitations under the License. */
 
 #include "paddle/fluid/memory/memcpy.h"
 // TODO(paddle-dev): move gpu_primitives.h to phi
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
@@ -217,7 +217,7 @@ __global__ void GatherGradGPUKernel(const T* input,
     int64_t out_index =
         inner_dim_index * (outer_dim_size * out_index_dim_size) +
         index[index_dim_index] * outer_dim_size + out_dim_index;
-    paddle::platform::CudaAtomicAdd(out + out_index, *(input + idx));
+    phi::CudaAtomicAdd(out + out_index, *(input + idx));
   }
 }
 
 
@@ -15,8 +15,8 @@ limitations under the License. */
 #include <algorithm>
 #include <vector>
 
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/kernels/funcs/pooling.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"
 #include "paddle/phi/kernels/primitive/datamover_primitives.h"
@@ -428,8 +428,7 @@ __global__ void KernelMaxPool2DGrad(const int nthreads,
 
     if (maxIndex != -1) {
       // atomic add
-      paddle::platform::CudaAtomicAdd(input_grad + maxIndex,
-                                      output_grad[index]);
+      phi::CudaAtomicAdd(input_grad + maxIndex, output_grad[index]);
     }
   }
 }
@@ -1330,7 +1329,7 @@ __global__ void KernelMaxPool3DGrad(const int nthreads,
     }
     if (maxIdx != -1) {
       // atomic add
-      paddle::platform::CudaAtomicAdd(input_grad + maxIdx, output_grad[index]);
+      phi::CudaAtomicAdd(input_grad + maxIdx, output_grad[index]);
     }
   }
 }
@@ -2359,7 +2358,7 @@ __global__ void KernelMaxPool3DWithIdxGrad(
           w_offset;
       int max_index = mask[output_index];
       if (max_index != -1) {
-        paddle::platform::CudaAtomicAdd(
+        phi::CudaAtomicAdd(
             &input_grad[nc_offset * input_depth * input_height * input_width +
                         max_index],
             output_grad[output_index]);
 
@@ -16,8 +16,8 @@ limitations under the License. */
 #include <unordered_set>
 #include <vector>
 
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
@@ -70,7 +70,7 @@ __global__ void ScatterCUDAKernel(const T* params,
     if (overwrite) {
       *(output + out_i) = *(params + i);
     } else {
-      paddle::platform::CudaAtomicAdd(output + out_i, *(params + i));
+      phi::CudaAtomicAdd(output + out_i, *(params + i));
     }
   }
 }
@@ -104,7 +104,7 @@ __global__ void ScatterNdCUDAKernel(const T* update,
       temp *= output_dims[j];
     }
     int64_t output_i = gather_i + slice_i;
-    paddle::platform::CudaAtomicAdd(output + output_i, *(update + i));
+    phi::CudaAtomicAdd(output + output_i, *(update + i));
   }
 }
 
 
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #include <algorithm>
 
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/kernels/funcs/gather.cu.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/segment_pooling.h"
@@ -60,7 +60,7 @@ __global__ void SegmentSumIdsKernel(const Index* segment_ids,
         }
         if (j > 0) {
           if (last_segment_id == first_segment_id) {
-            paddle::platform::CudaAtomicAdd(summed_ids + last_segment_id, sum);
+            phi::CudaAtomicAdd(summed_ids + last_segment_id, sum);
           } else {
             *(summed_ids + last_segment_id) = sum;
           }
@@ -70,7 +70,7 @@ __global__ void SegmentSumIdsKernel(const Index* segment_ids,
       sum += T(1);
       last_segment_id = current_segment_id;
     }
-    paddle::platform::CudaAtomicAdd(summed_ids + last_segment_id, sum);
+    phi::CudaAtomicAdd(summed_ids + last_segment_id, sum);
   }
 }
 
@@ -111,8 +111,8 @@ __global__ void SegmentMeanKernel(const Index* segment_ids,
               last_segment_id * inner_dim_size + segment_offset;
 
           if (last_segment_id == first_segment_id) {
-            paddle::platform::CudaAtomicAdd(
-                output + output_index, sum / *(summed_ids + last_segment_id));
+            phi::CudaAtomicAdd(output + output_index,
+                               sum / *(summed_ids + last_segment_id));
           } else {
             *(output + output_index) = sum / *(summed_ids + last_segment_id);
           }
@@ -123,8 +123,8 @@ __global__ void SegmentMeanKernel(const Index* segment_ids,
       last_segment_id = current_segment_id;
     }
     Index output_index = last_segment_id * inner_dim_size + segment_offset;
-    paddle::platform::CudaAtomicAdd(output + output_index,
-                                    sum / *(summed_ids + last_segment_id));
+    phi::CudaAtomicAdd(output + output_index,
+                       sum / *(summed_ids + last_segment_id));
   }
 }
 
@@ -215,7 +215,7 @@ class MaxPool {
   DEVICE inline T initial() { return static_cast<T>(-FLT_MAX); }
   DEVICE inline void compute(const T& x, T* y) { *y = *y > x ? *y : x; }
   DEVICE inline T atomic(T* address, const T val) {
-    return paddle::platform::CudaAtomicMax(address, val);
+    return phi::CudaAtomicMax(address, val);
   }
 };
 
@@ -225,7 +225,7 @@ class MinPool {
   DEVICE inline T initial() { return static_cast<T>(FLT_MAX); }
   DEVICE inline void compute(const T& x, T* y) { *y = *y < x ? *y : x; }
   DEVICE inline T atomic(T* address, const T val) {
-    return paddle::platform::CudaAtomicMin(address, val);
+    return phi::CudaAtomicMin(address, val);
   }
 };
 
@@ -235,7 +235,7 @@ class SumPool {
   DEVICE inline T initial() { return static_cast<T>(0); }
   DEVICE inline void compute(const T& x, T* y) { *y = *y + x; }
   DEVICE inline T atomic(T* address, const T val) {
-    return paddle::platform::CudaAtomicAdd(address, val);
+    return phi::CudaAtomicAdd(address, val);
   }
 };
 
 
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <set>
 #include <vector>
 
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
@@ -127,7 +127,7 @@ __global__ void SelectedRowsAddTensorKernel(const T* selected_rows,
     // Since index in rows of SelectedRows can be duplicate, we can not use
     // tensor_out[index] += selected_rows[index]; Instead, we have to use
     // AtomicAdd to avoid concurrent write error.
-    paddle::platform::CudaAtomicAdd(tensor_out + index, selected_rows[index]);
+    phi::CudaAtomicAdd(tensor_out + index, selected_rows[index]);
   }
 }
 }  // namespace
@@ -279,7 +279,7 @@ __global__ void SelectedRowsAddToTensorKernel(const T* selected_rows,
   for (int index = tid; index < row_numel; index += block_size) {
     // Since index in rows of SelectedRows can be duplicate, we have to use
     // Atomic Operation to avoid concurrent write error.
-    paddle::platform::CudaAtomicAdd(tensor_out + index, selected_rows[index]);
+    phi::CudaAtomicAdd(tensor_out + index, selected_rows[index]);
   }
 }
 }  // namespace
@@ -360,7 +360,7 @@ __global__ void MergeAddKernel(const T* input,
   input += ty * row_numel;
   out += out_idx * row_numel;
   for (int index = tid; index < row_numel; index += block_size) {
-    paddle::platform::CudaAtomicAdd(out + index, input[index]);
+    phi::CudaAtomicAdd(out + index, input[index]);
   }
 }
 
@@ -623,9 +623,9 @@ struct UpdateToTensor<phi::GPUContext, T> {
     auto* in1_data = in1_value.template data<T>();
     auto* in2_data = input2->data<T>();
 
-    dim3 threads(paddle::platform::PADDLE_CUDA_NUM_THREADS, 1);
+    dim3 threads(phi::PADDLE_CUDA_NUM_THREADS, 1);
     dim3 grid(in1_rows.size(), 1);
-    UpdateToTensorKernel<T, paddle::platform::PADDLE_CUDA_NUM_THREADS>
+    UpdateToTensorKernel<T, phi::PADDLE_CUDA_NUM_THREADS>
         <<<grid, threads, 0, context.stream()>>>(
             in1_data, in1_rows.cuda_data(), op, in2_data, in1_row_numel);
   }
 
@@ -17,14 +17,14 @@
 #include <thrust/execution_policy.h>
 #include <thrust/reduce.h>
 
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/kernel_registry.h"
 
 namespace phi {
-using paddle::platform::PADDLE_CUDA_NUM_THREADS;
+using phi::PADDLE_CUDA_NUM_THREADS;
 
 template <int BlockSize>
 __global__ void AccuracyCudaKernel(const int N,
 
@@ -14,8 +14,8 @@
 
 #include "paddle/phi/kernels/adagrad_kernel.h"
 
-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/selected_rows_functor.h"
@@ -47,7 +47,7 @@ __global__ void MergeGradKernel(const T* grad,
   grad += ty * row_numel;
   grad_merge += grad_merge_idx * row_numel;
   for (int index = tid; index < row_numel; index += block_size) {
-    paddle::platform::CudaAtomicAdd(grad_merge + index, grad[index]);
+    phi::CudaAtomicAdd(grad_merge + index, grad[index]);
   }
 }
 
@@ -69,9 +69,9 @@ __global__ void SparseAdagradFunctorKernel(const T* grad,
   for (int index = tid; index < row_numel; index += block_size) {
     // Since index in rows of SelectedRows can be duplicate, we have to use
     // Atomic Operation to avoid concurrent write error.
-    paddle::platform::CudaAtomicAdd(param + index,
-                                    -1.0 * learning_rate[0] * grad[index] /
-                                        (sqrt(moment[index]) + epsilon));
+    phi::CudaAtomicAdd(param + index,
+                       -1.0 * learning_rate[0] * grad[index] /
+                           (sqrt(moment[index]) + epsilon));
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -15,8 +15,8 @@ limitations under the License. */`
`15`	`15`	`#include <algorithm>`
`16`	`16`	`#include <vector>`
`17`	`17`
`18`		`-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"`
`19`	`18`	`#include "paddle/phi/backends/gpu/gpu_launch_config.h"`
	`19`	`+#include "paddle/phi/backends/gpu/gpu_primitives.h"`
`20`	`20`	`#include "paddle/phi/kernels/funcs/pooling.h"`
`21`	`21`	`#include "paddle/phi/kernels/funcs/reduce_function.h"`
`22`	`22`	`#include "paddle/phi/kernels/primitive/datamover_primitives.h"`
`@@ -428,8 +428,7 @@ __global__ void KernelMaxPool2DGrad(const int nthreads,`
`428`	`428`
`429`	`429`	`if (maxIndex != -1) {`
`430`	`430`	`// atomic add`
`431`		`- paddle::platform::CudaAtomicAdd(input_grad + maxIndex,`
`432`		`- output_grad[index]);`
	`431`	`+ phi::CudaAtomicAdd(input_grad + maxIndex, output_grad[index]);`
`433`	`432`	`}`
`434`	`433`	`}`
`435`	`434`	`}`
`@@ -1330,7 +1329,7 @@ __global__ void KernelMaxPool3DGrad(const int nthreads,`
`1330`	`1329`	`}`
`1331`	`1330`	`if (maxIdx != -1) {`
`1332`	`1331`	`// atomic add`
`1333`		`- paddle::platform::CudaAtomicAdd(input_grad + maxIdx, output_grad[index]);`
	`1332`	`+ phi::CudaAtomicAdd(input_grad + maxIdx, output_grad[index]);`
`1334`	`1333`	`}`
`1335`	`1334`	`}`
`1336`	`1335`	`}`
`@@ -2359,7 +2358,7 @@ __global__ void KernelMaxPool3DWithIdxGrad(`
`2359`	`2358`	`w_offset;`
`2360`	`2359`	`int max_index = mask[output_index];`
`2361`	`2360`	`if (max_index != -1) {`
`2362`		`- paddle::platform::CudaAtomicAdd(`
	`2361`	`+ phi::CudaAtomicAdd(`
`2363`	`2362`	`&input_grad[nc_offset * input_depth * input_height * input_width +`
`2364`	`2363`	`max_index],`
`2365`	`2364`	`output_grad[output_index]);`
Original file line number	Diff line number	Diff line change
`@@ -16,8 +16,8 @@ limitations under the License. */`
`16`	`16`	`#include <unordered_set>`
`17`	`17`	`#include <vector>`
`18`	`18`
`19`		`-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"`
`20`	`19`	`#include "paddle/phi/backends/gpu/gpu_launch_config.h"`
	`20`	`+#include "paddle/phi/backends/gpu/gpu_primitives.h"`
`21`	`21`	`#include "paddle/phi/common/place.h"`
`22`	`22`	`#include "paddle/phi/core/dense_tensor.h"`
`23`	`23`	`#include "paddle/phi/kernels/funcs/math_function.h"`
`@@ -70,7 +70,7 @@ __global__ void ScatterCUDAKernel(const T* params,`
`70`	`70`	`if (overwrite) {`
`71`	`71`	`(output + out_i) = (params + i);`
`72`	`72`	`} else {`
`73`		`- paddle::platform::CudaAtomicAdd(output + out_i, *(params + i));`
	`73`	`+ phi::CudaAtomicAdd(output + out_i, *(params + i));`
`74`	`74`	`}`
`75`	`75`	`}`
`76`	`76`	`}`
`@@ -104,7 +104,7 @@ __global__ void ScatterNdCUDAKernel(const T* update,`
`104`	`104`	`temp *= output_dims[j];`
`105`	`105`	`}`
`106`	`106`	`int64_t output_i = gather_i + slice_i;`
`107`		`- paddle::platform::CudaAtomicAdd(output + output_i, *(update + i));`
	`107`	`+ phi::CudaAtomicAdd(output + output_i, *(update + i));`
`108`	`108`	`}`
`109`	`109`	`}`
`110`	`110`
Original file line number	Diff line number	Diff line change
`@@ -14,9 +14,9 @@ limitations under the License. */`
`14`	`14`
`15`	`15`	`#include <algorithm>`
`16`	`16`
`17`		`-#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"`
`18`	`17`	`#include "paddle/phi/backends/gpu/gpu_context.h"`
`19`	`18`	`#include "paddle/phi/backends/gpu/gpu_launch_config.h"`
	`19`	`+#include "paddle/phi/backends/gpu/gpu_primitives.h"`
`20`	`20`	`#include "paddle/phi/kernels/funcs/gather.cu.h"`
`21`	`21`	`#include "paddle/phi/kernels/funcs/math_function.h"`
`22`	`22`	`#include "paddle/phi/kernels/funcs/segment_pooling.h"`
`@@ -60,7 +60,7 @@ __global__ void SegmentSumIdsKernel(const Index* segment_ids,`
`60`	`60`	`}`
`61`	`61`	`if (j > 0) {`
`62`	`62`	`if (last_segment_id == first_segment_id) {`
`63`		`- paddle::platform::CudaAtomicAdd(summed_ids + last_segment_id, sum);`
	`63`	`+ phi::CudaAtomicAdd(summed_ids + last_segment_id, sum);`
`64`	`64`	`} else {`
`65`	`65`	`*(summed_ids + last_segment_id) = sum;`
`66`	`66`	`}`
`@@ -70,7 +70,7 @@ __global__ void SegmentSumIdsKernel(const Index* segment_ids,`
`70`	`70`	`sum += T(1);`
`71`	`71`	`last_segment_id = current_segment_id;`
`72`	`72`	`}`
`73`		`- paddle::platform::CudaAtomicAdd(summed_ids + last_segment_id, sum);`
	`73`	`+ phi::CudaAtomicAdd(summed_ids + last_segment_id, sum);`
`74`	`74`	`}`
`75`	`75`	`}`
`76`	`76`
`@@ -111,8 +111,8 @@ __global__ void SegmentMeanKernel(const Index* segment_ids,`
`111`	`111`	`last_segment_id * inner_dim_size + segment_offset;`
`112`	`112`
`113`	`113`	`if (last_segment_id == first_segment_id) {`
`114`		`- paddle::platform::CudaAtomicAdd(`
`115`		`- output + output_index, sum / *(summed_ids + last_segment_id));`
	`114`	`+ phi::CudaAtomicAdd(output + output_index,`
	`115`	`+ sum / *(summed_ids + last_segment_id));`
`116`	`116`	`} else {`
`117`	`117`	`(output + output_index) = sum / (summed_ids + last_segment_id);`
`118`	`118`	`}`
`@@ -123,8 +123,8 @@ __global__ void SegmentMeanKernel(const Index* segment_ids,`
`123`	`123`	`last_segment_id = current_segment_id;`
`124`	`124`	`}`
`125`	`125`	`Index output_index = last_segment_id * inner_dim_size + segment_offset;`
`126`		`- paddle::platform::CudaAtomicAdd(output + output_index,`
`127`		`- sum / *(summed_ids + last_segment_id));`
	`126`	`+ phi::CudaAtomicAdd(output + output_index,`
	`127`	`+ sum / *(summed_ids + last_segment_id));`
`128`	`128`	`}`
`129`	`129`	`}`
`130`	`130`
`@@ -215,7 +215,7 @@ class MaxPool {`
`215`	`215`	`DEVICE inline T initial() { return static_cast<T>(-FLT_MAX); }`
`216`	`216`	`DEVICE inline void compute(const T& x, T* y) { y = y > x ? *y : x; }`
`217`	`217`	`DEVICE inline T atomic(T* address, const T val) {`
`218`		`- return paddle::platform::CudaAtomicMax(address, val);`
	`218`	`+ return phi::CudaAtomicMax(address, val);`
`219`	`219`	`}`
`220`	`220`	`};`
`221`	`221`
`@@ -225,7 +225,7 @@ class MinPool {`
`225`	`225`	`DEVICE inline T initial() { return static_cast<T>(FLT_MAX); }`
`226`	`226`	`DEVICE inline void compute(const T& x, T* y) { y = y < x ? *y : x; }`
`227`	`227`	`DEVICE inline T atomic(T* address, const T val) {`
`228`		`- return paddle::platform::CudaAtomicMin(address, val);`
	`228`	`+ return phi::CudaAtomicMin(address, val);`
`229`	`229`	`}`
`230`	`230`	`};`
`231`	`231`
`@@ -235,7 +235,7 @@ class SumPool {`
`235`	`235`	`DEVICE inline T initial() { return static_cast<T>(0); }`
`236`	`236`	`DEVICE inline void compute(const T& x, T* y) { y = y + x; }`
`237`	`237`	`DEVICE inline T atomic(T* address, const T val) {`
`238`		`- return paddle::platform::CudaAtomicAdd(address, val);`
	`238`	`+ return phi::CudaAtomicAdd(address, val);`
`239`	`239`	`}`
`240`	`240`	`};`
`241`	`241`