Skip to content

Commit 4da1a0f

Browse files
authored
[PHI decoupling] remove "gpu_device_function.h" in fluid. (#48117)
* move "paddle/phi/backends/gpu/gpu_device_function.h" to phi * update copyright years * rm "fluid/platform/device/gpu/gpu_device_function.h" in phi * rm dependence to "gpu_device_function.h" in fluid * rm gpu_device_function.h etc in fluid * fix rocm-complie bugs * fix cuda_helper_test.cu bugs
1 parent 2995f74 commit 4da1a0f

17 files changed

+34
-413
lines changed

paddle/fluid/operators/activation_op.kps

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ limitations under the License. */
1313
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
1414
#include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h"
1515
#include "paddle/fluid/platform/bfloat16.h"
16-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
16+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
1717
#include "paddle/phi/kernels/funcs/activation_functor.h"
1818

1919
namespace paddle {

paddle/fluid/operators/elementwise/elementwise_op_function.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ limitations under the License. */
4242

4343
#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h"
4444
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
45-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
45+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
4646
#include "paddle/phi/backends/gpu/gpu_primitives.h"
4747
#include "paddle/phi/kernels/gpu/elementwise_grad.h"
4848

@@ -982,7 +982,7 @@ static __global__ void FusedElemwiseAndActGradBroadcast1CUDAKernel(
982982
#pragma unroll
983983
for (int i = BLOCK_X >> 1; i > 0; i >>= 1) {
984984
// reduce sum with wrap
985-
val += platform::CudaShuffleXorSync(0xFFFFFFFF, val, i);
985+
val += phi::backends::gpu::CudaShuffleXorSync(0xFFFFFFFF, val, i);
986986
}
987987

988988
size_t idx_j = j + threadIdx.y;
@@ -1004,7 +1004,8 @@ static __global__ void FusedElemwiseAndActGradBroadcast1CUDAKernel(
10041004
#pragma unroll
10051005
for (int i = BLOCK_X >> 1; i > 0; i >>= 1) {
10061006
// reduce sum with wrap
1007-
inter_val += platform::CudaShuffleXorSync(0xFFFFFFFF, inter_val, i);
1007+
inter_val +=
1008+
phi::backends::gpu::CudaShuffleXorSync(0xFFFFFFFF, inter_val, i);
10081009
}
10091010
if (threadIdx.x == 0 && (idx_j < w)) d_intermediate[idx_j] = inter_val;
10101011
}
@@ -1160,22 +1161,22 @@ static __global__ void FusedElemwiseAndActGradBroadcast2CUDAKernel(
11601161
h = h > ELEMWISE_MAX_BLOCK_DIM ? ELEMWISE_MAX_BLOCK_DIM : h;
11611162
if (BcastY) {
11621163
if (dy) {
1163-
val = paddle::platform::reduceSum(val, tid, h);
1164+
val = phi::backends::gpu::reduceSum(val, tid, h);
11641165
if (threadIdx.x == 0) {
11651166
dy[j] = val;
11661167
}
11671168
}
11681169
} else {
11691170
if (dx) {
1170-
val = paddle::platform::reduceSum(val, tid, h);
1171+
val = phi::backends::gpu::reduceSum(val, tid, h);
11711172
if (threadIdx.x == 0) {
11721173
dx[j] = val;
11731174
}
11741175
}
11751176
}
11761177
if (!SameShapeOfIntermediateOutAndOut) {
11771178
if (d_intermediate) {
1178-
inter_val = paddle::platform::reduceSum(inter_val, tid, h);
1179+
inter_val = phi::backends::gpu::reduceSum(inter_val, tid, h);
11791180
if (threadIdx.x == 0) {
11801181
d_intermediate[j] = inter_val;
11811182
}

paddle/fluid/operators/fused/fused_attention_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ limitations under the License. */
2222
#include "paddle/fluid/operators/fused/attn_gemm.h"
2323
#include "paddle/fluid/operators/fused/fmha_ref.h"
2424
#include "paddle/fluid/operators/fused/fused_dropout_helper.h"
25-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
2625
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
2726
#include "paddle/phi/api/include/tensor.h"
27+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
2828
#include "paddle/phi/kernels/funcs/broadcast_function.h"
2929
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
3030
#include "paddle/phi/kernels/funcs/math_function.h"

paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ limitations under the License. */
1919
#include "paddle/fluid/framework/op_registry.h"
2020
#include "paddle/fluid/framework/operator.h"
2121
#include "paddle/fluid/operators/fused/fused_dropout_helper.h"
22-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
2322
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
23+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
2424

2525
namespace paddle {
2626
namespace operators {

paddle/fluid/operators/fused/fused_dropout_common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ limitations under the License. */
2222
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
2323
#include "paddle/fluid/operators/fused/quant_dequant_kernel.h"
2424
#include "paddle/fluid/operators/layer_norm_kernel.cu.h"
25-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
2625
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
2726
#include "paddle/fluid/platform/device_context.h"
2827
#include "paddle/fluid/platform/float16.h"
28+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
2929
#include "paddle/phi/kernels/funcs/aligned_vector.h"
3030
#include "paddle/phi/kernels/funcs/functors.h"
3131

paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ namespace cub = hipcub;
2525
#endif
2626

2727
#include "paddle/fluid/framework/op_registry.h"
28-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
2928
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
29+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
3030
#include "paddle/phi/kernels/funcs/blas/blas.h"
3131

3232
namespace paddle {

paddle/fluid/operators/fused/fused_gate_attention_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ limitations under the License. */
1616
#include "paddle/fluid/framework/operator.h"
1717
#include "paddle/fluid/operators/fused/attn_gemm.h"
1818
#include "paddle/fluid/operators/fused/fused_gate_attention.h"
19-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
19+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
2020
#include "paddle/phi/kernels/funcs/math_function.h"
2121

2222
namespace paddle {

paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ limitations under the License. */
2626
#include "paddle/fluid/operators/fused/attn_gemm.h"
2727
#include "paddle/fluid/operators/fused/fmha_ref.h"
2828
#include "paddle/fluid/operators/fused/fused_dropout_helper.h"
29-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
3029
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
3130
#include "paddle/phi/api/include/tensor.h"
31+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
3232
#include "paddle/phi/kernels/funcs/math_function.h"
3333

3434
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)

paddle/fluid/operators/group_norm_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace cub = hipcub;
2121
#endif
2222

2323
#include "paddle/fluid/operators/group_norm_op.h"
24-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
24+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
2525
#include "paddle/phi/backends/gpu/gpu_primitives.h"
2626

2727
namespace paddle {

paddle/fluid/operators/layer_norm_kernel.cu.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ namespace cub = hipcub;
2525
#include <iostream>
2626

2727
#include "paddle/fluid/operators/fused/quant_dequant_kernel.h"
28-
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
2928
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
29+
#include "paddle/phi/backends/gpu/gpu_device_function.h"
3030
#include "paddle/phi/core/ddim.h"
3131
#include "paddle/phi/kernels/funcs/aligned_vector.h"
3232

@@ -55,7 +55,7 @@ static __forceinline__ __device__ U WarpReduceSum(U val) {
5555
unsigned mask = 0u;
5656
CREATE_SHFL_MASK(mask, true);
5757
for (int offset = warpSize / 2; offset > 0; offset /= 2) {
58-
val += paddle::platform::CudaShuffleDownSync(mask, val, offset);
58+
val += phi::backends::gpu::CudaShuffleDownSync(mask, val, offset);
5959
}
6060
return val;
6161
}

0 commit comments

Comments
 (0)