Skip to content

Commit 7c4d947

Browse files
committed
Fix
1 parent 8f96a48 commit 7c4d947

25 files changed

+1854
-1583
lines changed

paddle/fluid/operators/custom_device_common_op_registry.cc

Lines changed: 0 additions & 1521 deletions
Large diffs are not rendered by default.
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/kernels/assign_pos_kernel.h"
16+
#include "paddle/phi/backends/all_context.h"
17+
#include "paddle/phi/core/kernel_registry.h"
18+
#include "paddle/phi/core/tensor_utils.h"
19+
20+
#ifdef PADDLE_WITH_CUSTOM_DEVICE
21+
namespace phi {
22+
23+
template <typename T, typename Context>
24+
void AssignPosKernel(const Context& dev_ctx,
25+
const DenseTensor& x,
26+
const DenseTensor& cum_count_in,
27+
const DenseTensor& eff_num_len_in,
28+
DenseTensor* out) {
29+
// assign pos decides which tokens should be fetched belong to specially
30+
// counter orderly.
31+
auto cum_count = &cum_count_in; // (counter number) int32 | int64
32+
auto numbers = &x; // (batch_size * seq_len, topk) int32
33+
auto eff_num_len = &eff_num_len_in; // (sum(cum_count))
34+
// out: (cum_count) value ranges
35+
// from 0 to batch_size *
36+
// seq_len * topk
37+
38+
phi::DenseTensor cpu_eff_num_len;
39+
int64_t cpu_eff_num_len_data = 0;
40+
if (eff_num_len->place().GetType() == phi::AllocationType::CPU) {
41+
cpu_eff_num_len_data = eff_num_len->data<T>()[0];
42+
} else {
43+
phi::Copy(dev_ctx, *eff_num_len, phi::CPUPlace(), true, &cpu_eff_num_len);
44+
cpu_eff_num_len_data = cpu_eff_num_len.data<T>()[0];
45+
}
46+
47+
out->Resize({cpu_eff_num_len_data});
48+
dev_ctx.template Alloc<T>(out);
49+
50+
phi::DenseTensor numbers_cpu, cum_count_cpu;
51+
phi::Copy(dev_ctx, *numbers, phi::CPUPlace(), true, &numbers_cpu);
52+
phi::Copy(dev_ctx, *cum_count, phi::CPUPlace(), true, &cum_count_cpu);
53+
auto* numbers_data = numbers_cpu.data<T>();
54+
auto* cum_count_data = cum_count_cpu.data<T>();
55+
56+
std::vector<T> out_data(cpu_eff_num_len_data);
57+
for (int64_t i = 0; i < numbers->numel(); ++i) {
58+
int number_idx = numbers_data[i];
59+
if (number_idx > -1) {
60+
cum_count_data[number_idx] -= 1;
61+
int p = cum_count_data[number_idx];
62+
out_data[p] = i;
63+
}
64+
}
65+
phi::TensorFromVector<int64_t>(out_data, dev_ctx, out);
66+
}
67+
68+
} // namespace phi
69+
70+
PD_REGISTER_KERNEL(
71+
assign_pos, Custom, ALL_LAYOUT, phi::AssignPosKernel, int64_t) {}
72+
#endif
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/api/backward/backward_api.h"
16+
#include "paddle/phi/api/include/api.h"
17+
#include "paddle/phi/backends/all_context.h"
18+
#include "paddle/phi/backends/device_manager.h"
19+
#include "paddle/phi/core/distributed/collective/process_group.h"
20+
#include "paddle/phi/core/distributed/comm_context_manager.h"
21+
#include "paddle/phi/core/distributed/xccl_comm_context.h"
22+
#include "paddle/phi/core/kernel_registry.h"
23+
#include "paddle/phi/core/tensor_utils.h"
24+
#ifdef PADDLE_WITH_CUSTOM_DEVICE
25+
namespace phi {
26+
27+
template <typename T, typename Context>
28+
void BarrierKernel(const Context& dev_ctx,
29+
const DenseTensor& x_in,
30+
int ring_id,
31+
bool use_calc_stream,
32+
DenseTensor* out) {
33+
auto in = &x_in;
34+
35+
auto place = dev_ctx.GetPlace();
36+
int64_t numel = in->numel();
37+
const void* sendbuff = in->data();
38+
void* recvbuff = dev_ctx.template Alloc<T>(out);
39+
int rid = ring_id;
40+
41+
auto comm = reinterpret_cast<phi::distributed::XCCLCommContext*>(
42+
phi::distributed::CommContextManager::GetInstance().Get(
43+
std::to_string(rid)));
44+
45+
std::shared_ptr<phi::stream::Stream> stream;
46+
if (use_calc_stream) {
47+
stream = dev_ctx.GetStream();
48+
} else {
49+
stream = comm->GetStream();
50+
}
51+
phi::DeviceManager::CCLAllReduce(place.GetDeviceType(),
52+
const_cast<void*>(sendbuff),
53+
recvbuff,
54+
numel,
55+
in->dtype(),
56+
phi::ccl::CCLReduceOp::SUM,
57+
comm->GetXcclComm(),
58+
*stream);
59+
}
60+
} // namespace phi
61+
62+
PD_REGISTER_KERNEL(barrier, Custom, ALL_LAYOUT, phi::BarrierKernel, int) {}
63+
#endif
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include "paddle/phi/backends/all_context.h"
18+
#include "paddle/phi/backends/device_manager.h"
19+
#include "paddle/phi/core/distributed/collective/process_group.h"
20+
#include "paddle/phi/core/distributed/comm_context_manager.h"
21+
#include "paddle/phi/core/distributed/xccl_comm_context.h"
22+
#include "paddle/phi/core/kernel_registry.h"
23+
#include "paddle/phi/core/tensor_utils.h"
24+
#ifdef PADDLE_WITH_CUSTOM_DEVICE
25+
namespace phi {
26+
27+
template <typename T, typename Context, phi::ccl::CCLReduceOp red_type>
28+
void CAllReduceKernel(const Context& dev_ctx,
29+
const DenseTensor& x_in,
30+
int ring_id,
31+
bool use_calc_stream,
32+
bool use_model_parallel,
33+
DenseTensor* out) {
34+
auto in = &x_in;
35+
int rid = ring_id;
36+
37+
auto place = dev_ctx.GetPlace();
38+
auto dtype = in->dtype();
39+
int64_t numel = in->numel();
40+
const void* sendbuff = in->data<T>();
41+
out->Resize(in->dims());
42+
void* recvbuff = dev_ctx.template Alloc<T>(out);
43+
44+
auto map = phi::distributed::ProcessGroupMapFromGid::getInstance();
45+
if (map->has(rid)) {
46+
// Use ProcessGroup
47+
phi::distributed::ProcessGroup* pg = map->get(rid);
48+
std::vector<phi::DenseTensor> in_tensor;
49+
std::vector<phi::DenseTensor> out_tensor;
50+
in_tensor.push_back(*in);
51+
out_tensor.push_back(*out);
52+
53+
phi::distributed::AllreduceOptions opts;
54+
switch (red_type) {
55+
case phi::ccl::CCLReduceOp::SUM:
56+
opts.reduce_op = phi::distributed::ReduceOp::SUM;
57+
break;
58+
59+
case phi::ccl::CCLReduceOp::MAX:
60+
opts.reduce_op = phi::distributed::ReduceOp::MAX;
61+
break;
62+
63+
case phi::ccl::CCLReduceOp::MIN:
64+
opts.reduce_op = phi::distributed::ReduceOp::MIN;
65+
break;
66+
67+
case phi::ccl::CCLReduceOp::PRODUCT:
68+
opts.reduce_op = phi::distributed::ReduceOp::PRODUCT;
69+
break;
70+
71+
default:
72+
PADDLE_THROW(common::errors::InvalidArgument("Invalid reduce type: %d",
73+
red_type));
74+
}
75+
76+
auto task = pg->AllReduce(in_tensor, out_tensor, opts);
77+
task->Wait();
78+
return;
79+
}
80+
81+
auto comm = reinterpret_cast<phi::distributed::XCCLCommContext*>(
82+
phi::distributed::CommContextManager::GetInstance().Get(
83+
std::to_string(rid)));
84+
85+
std::shared_ptr<phi::stream::Stream> stream;
86+
if (use_calc_stream) {
87+
stream = dev_ctx.GetStream();
88+
} else {
89+
stream = comm->GetStream();
90+
}
91+
phi::DeviceManager::CCLAllReduce(place.GetDeviceType(),
92+
const_cast<void*>(sendbuff),
93+
recvbuff,
94+
numel,
95+
dtype,
96+
red_type,
97+
comm->GetXcclComm(),
98+
*stream);
99+
}
100+
} // namespace phi
101+
102+
#endif
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
#include "paddle/phi/kernels/custom/c_allreduce_kernel_impl.h"
15+
#ifdef PADDLE_WITH_CUSTOM_DEVICE
16+
namespace phi {
17+
template <typename T, typename Context>
18+
void CAllReduceMaxKernel(const Context& dev_ctx,
19+
const DenseTensor& x_in,
20+
int ring_id,
21+
bool use_calc_stream,
22+
bool use_model_parallel,
23+
DenseTensor* out) {
24+
CAllReduceKernel<T, Context, phi::ccl::CCLReduceOp::MAX>(
25+
dev_ctx, x_in, ring_id, use_calc_stream, use_model_parallel, out);
26+
}
27+
} // namespace phi
28+
29+
PD_REGISTER_KERNEL(c_allreduce_max,
30+
Custom,
31+
ALL_LAYOUT,
32+
phi::CAllReduceMaxKernel,
33+
float,
34+
double,
35+
int32_t,
36+
int64_t,
37+
phi::dtype::float16) {}
38+
#endif
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
#include "paddle/phi/kernels/custom/c_allreduce_kernel_impl.h"
15+
#ifdef PADDLE_WITH_CUSTOM_DEVICE
16+
namespace phi {
17+
template <typename T, typename Context>
18+
void CAllReduceMinKernel(const Context& dev_ctx,
19+
const DenseTensor& x_in,
20+
int ring_id,
21+
bool use_calc_stream,
22+
bool use_model_parallel,
23+
DenseTensor* out) {
24+
CAllReduceKernel<T, Context, phi::ccl::CCLReduceOp::MIN>(
25+
dev_ctx, x_in, ring_id, use_calc_stream, use_model_parallel, out);
26+
}
27+
} // namespace phi
28+
29+
PD_REGISTER_KERNEL(c_allreduce_min,
30+
Custom,
31+
ALL_LAYOUT,
32+
phi::CAllReduceMinKernel,
33+
float,
34+
double,
35+
int32_t,
36+
int64_t,
37+
phi::dtype::float16) {}
38+
#endif
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
#include "paddle/phi/kernels/custom/c_allreduce_kernel_impl.h"
15+
#ifdef PADDLE_WITH_CUSTOM_DEVICE
16+
namespace phi {
17+
template <typename T, typename Context>
18+
void CAllReduceProdKernel(const Context& dev_ctx,
19+
const DenseTensor& x_in,
20+
int ring_id,
21+
bool use_calc_stream,
22+
bool use_model_parallel,
23+
DenseTensor* out) {
24+
CAllReduceKernel<T, Context, phi::ccl::CCLReduceOp::PRODUCT>(
25+
dev_ctx, x_in, ring_id, use_calc_stream, use_model_parallel, out);
26+
}
27+
} // namespace phi
28+
29+
PD_REGISTER_KERNEL(c_allreduce_prod,
30+
Custom,
31+
ALL_LAYOUT,
32+
phi::CAllReduceProdKernel,
33+
float,
34+
double,
35+
int32_t,
36+
int64_t,
37+
phi::dtype::float16) {}
38+
#endif
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
#include "paddle/phi/kernels/custom/c_allreduce_kernel_impl.h"
15+
#ifdef PADDLE_WITH_CUSTOM_DEVICE
16+
namespace phi {
17+
template <typename T, typename Context>
18+
void CAllReduceSumKernel(const Context& dev_ctx,
19+
const DenseTensor& x_in,
20+
int ring_id,
21+
bool use_calc_stream,
22+
bool use_model_parallel,
23+
DenseTensor* out) {
24+
CAllReduceKernel<T, Context, phi::ccl::CCLReduceOp::SUM>(
25+
dev_ctx, x_in, ring_id, use_calc_stream, use_model_parallel, out);
26+
}
27+
} // namespace phi
28+
29+
PD_REGISTER_KERNEL(c_allreduce_sum,
30+
Custom,
31+
ALL_LAYOUT,
32+
phi::CAllReduceSumKernel,
33+
float,
34+
double,
35+
int32_t,
36+
int64_t,
37+
phi::dtype::float16) {}
38+
#endif

0 commit comments

Comments
 (0)