Skip to content

Commit fd1526f

Browse files
authored
[AutoParallel] support compute in local view dynamic graph part (#70519)
1 parent 423d815 commit fd1526f

17 files changed

+975
-17
lines changed

paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
#include "paddle/phi/api/include/tensor.h"
1818
#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
19+
#include "paddle/phi/core/distributed/auto_parallel/placement_types.h"
20+
#include "paddle/phi/core/distributed/auto_parallel/process_mesh.h"
1921

2022
paddle::Tensor add_n_ad_func(const std::vector<paddle::Tensor>& x);
2123

@@ -55,6 +57,13 @@ paddle::Tensor reshard_ad_function(
5557
const paddle::Tensor& tensor,
5658
const phi::distributed::TensorDistAttr dist_attr);
5759

60+
paddle::Tensor dtensor_to_local_ad_function(const paddle::Tensor& input);
61+
62+
paddle::Tensor dtensor_from_local_ad_function(
63+
const paddle::Tensor& input,
64+
const phi::distributed::ProcessMesh& processmesh,
65+
const phi::distributed::Placements& placements);
66+
5867
namespace sparse {
5968
std::tuple<paddle::Tensor,
6069
paddle::Tensor&,

paddle/fluid/eager/api/manual/eager_manual/forwards/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@ set(eager_manual_functions
44
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/sync_batch_norm_fwd_func.cc
55
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc
66
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/reshard_fwd_func.cc
7+
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_to_local_fwd_func.cc
8+
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_from_local_fwd_func.cc
79
PARENT_SCOPE)
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
16+
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
17+
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
18+
#include "paddle/fluid/eager/api/utils/global_utils.h"
19+
#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
20+
#include "paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.h"
21+
#include "paddle/phi/core/platform/profiler/event_tracing.h"
22+
23+
paddle::Tensor dtensor_from_local_ad_function(
24+
const paddle::Tensor& input,
25+
const phi::distributed::ProcessMesh& process_mesh,
26+
const phi::distributed::Placements& placements) {
27+
#ifdef PADDLE_WITH_DISTRIBUTE
28+
VLOG(3) << "Running AD API: "
29+
<< "dtensor_from_local dygraph";
30+
// Dygraph Record Event
31+
phi::RecordEvent dygraph_entrance_record_event(
32+
"dtensor_from_local dygraph", phi::TracerEventType::Communication, 1);
33+
34+
// Get Input AutoGradMeta
35+
egr::AutogradMeta* input_autograd_meta =
36+
egr::EagerUtils::nullable_autograd_meta(input);
37+
bool trace_backward = egr::Controller::Instance().HasGrad();
38+
bool require_any_grad =
39+
egr::EagerUtils::ComputeRequireGrad(trace_backward, input_autograd_meta);
40+
41+
// Node Declaration
42+
std::shared_ptr<DtensorFromLocalGradNode> grad_node;
43+
44+
// Set grad_node before API Call
45+
if (require_any_grad) {
46+
phi::RecordEvent node_creation_record_event(
47+
"dtensor_from_local node_creation",
48+
phi::TracerEventType::Communication,
49+
1);
50+
51+
// Node Construction
52+
grad_node = std::shared_ptr<DtensorFromLocalGradNode>(
53+
new DtensorFromLocalGradNode(1, 1)); // NOLINT
54+
}
55+
56+
auto dense_tensor_ptr =
57+
std::static_pointer_cast<phi::DenseTensor>(input.impl());
58+
59+
auto global_dims = common::vectorize(dense_tensor_ptr->dims());
60+
for (size_t i = 0; i < placements.size(); i++) {
61+
auto placement = placements[i];
62+
if (placement->is_shard()) {
63+
auto shard_dim =
64+
dynamic_cast<const phi::distributed::Shard&>(*placement).get_dim();
65+
global_dims[shard_dim] = global_dims[shard_dim] * process_mesh.shape()[i];
66+
}
67+
}
68+
69+
auto dist_out_ptr = std::make_shared<phi::distributed::DistTensor>(
70+
dense_tensor_ptr,
71+
common::make_ddim(global_dims),
72+
process_mesh,
73+
placements);
74+
75+
auto api_result = paddle::Tensor(dist_out_ptr);
76+
77+
// Get Outputs
78+
auto& out = api_result;
79+
80+
// Get Output AutoGradMeta
81+
egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out);
82+
83+
// Set grad_node after API call
84+
if (require_any_grad) {
85+
egr::EagerUtils::PassStopGradient(false, out_autograd_meta);
86+
87+
// SetGradOutMeta & SetEdges
88+
grad_node->SetGradOutMeta(input, 0);
89+
// SetOutRank & SetHistory & SetGradInMeta
90+
if (out_autograd_meta) {
91+
egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0);
92+
egr::EagerUtils::SetHistory(out_autograd_meta, grad_node);
93+
}
94+
grad_node->SetGradInMeta(out, 0);
95+
grad_node->SetTensorWrapperNoNeedBuffer_Output(out);
96+
}
97+
98+
return out;
99+
#else
100+
PADDLE_THROW(common::errors::Unavailable(
101+
"DtensorFromLocal is not supported in this version of Paddle. Try to "
102+
"recompile it "
103+
"with WITH_DISTRIBUTE=ON and reinstall this package."));
104+
return paddle::Tensor();
105+
#endif
106+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
16+
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
17+
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
18+
#include "paddle/fluid/eager/api/utils/global_utils.h"
19+
#include "paddle/phi/core/platform/profiler/event_tracing.h"
20+
21+
paddle::Tensor dtensor_to_local_ad_function(const paddle::Tensor& input) {
22+
#ifdef PADDLE_WITH_DISTRIBUTE
23+
VLOG(3) << "Running AD API: "
24+
<< "dtensor_to_local dygraph";
25+
// Dygraph Record Event
26+
phi::RecordEvent dygraph_entrance_record_event(
27+
"dtensor_to_local dygraph", phi::TracerEventType::Communication, 1);
28+
29+
// Get Input AutoGradMeta
30+
egr::AutogradMeta* input_autograd_meta =
31+
egr::EagerUtils::nullable_autograd_meta(input);
32+
bool trace_backward = egr::Controller::Instance().HasGrad();
33+
bool require_any_grad =
34+
egr::EagerUtils::ComputeRequireGrad(trace_backward, input_autograd_meta);
35+
36+
// Node Declaration
37+
std::shared_ptr<DtensorToLocalGradNode> grad_node;
38+
39+
// Set grad_node before API Call
40+
if (require_any_grad) {
41+
phi::RecordEvent node_creation_record_event(
42+
"dtensor_to_local node_creation",
43+
phi::TracerEventType::Communication,
44+
1);
45+
46+
// Node Construction
47+
grad_node = std::shared_ptr<DtensorToLocalGradNode>(
48+
new DtensorToLocalGradNode(1, 1)); // NOLINT
49+
50+
// Set TensorWrappers for Forward Inputs if needed
51+
grad_node->SetTensorWrapperNoNeedBuffer_Input(input);
52+
}
53+
54+
// Forward API Call
55+
// dtensor_to_local_func(input, api_result, dist_attr);
56+
PADDLE_ENFORCE_EQ(
57+
input.initialized(),
58+
true,
59+
phi::errors::InvalidArgument("Input tensor must be initialized."));
60+
61+
paddle::Tensor api_result;
62+
if (input.is_dist_tensor()) {
63+
std::shared_ptr<phi::TensorBase> tensor_base = input.impl();
64+
65+
auto dist_tensor =
66+
std::static_pointer_cast<phi::distributed::DistTensor>(tensor_base);
67+
68+
std::shared_ptr<phi::DenseTensor> local_dense = dist_tensor->shared_value();
69+
70+
PADDLE_ENFORCE_NE(local_dense,
71+
nullptr,
72+
phi::errors::InvalidArgument(
73+
"The local DenseTensor inside DistTensor is null."));
74+
75+
PADDLE_ENFORCE_EQ(
76+
local_dense->initialized(),
77+
true,
78+
phi::errors::PreconditionNotMet(
79+
"The local DenseTensor inside DistTensor is not initialized."));
80+
81+
api_result = paddle::Tensor(local_dense);
82+
} else {
83+
api_result = input;
84+
}
85+
86+
// Get Outputs
87+
auto& out = api_result;
88+
89+
// Get Output AutoGradMeta
90+
egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out);
91+
92+
// Set grad_node after API call
93+
if (require_any_grad) {
94+
egr::EagerUtils::PassStopGradient(false, out_autograd_meta);
95+
96+
// SetGradOutMeta & SetEdges
97+
grad_node->SetGradOutMeta(input, 0);
98+
// SetOutRank & SetHistory & SetGradInMeta
99+
if (out_autograd_meta) {
100+
egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0);
101+
egr::EagerUtils::SetHistory(out_autograd_meta, grad_node);
102+
}
103+
grad_node->SetGradInMeta(out, 0);
104+
}
105+
106+
return out;
107+
#else
108+
PADDLE_THROW(common::errors::Unavailable(
109+
"dtensor_to_local is not supported in this version of Paddle. Try to "
110+
"recompile it "
111+
"with WITH_DISTRIBUTE=ON and reinstall this package."));
112+
return paddle::Tensor();
113+
#endif
114+
}

paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@ set(eager_manual_nodes
44
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/sync_batch_norm_node.cc
55
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc
66
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/reshard_node.cc
7+
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/dtensor_to_local_node.cc
8+
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/dtensor_from_local_node.cc
79
PARENT_SCOPE)
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "glog/logging.h"
16+
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
17+
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
18+
#include "paddle/fluid/eager/api/utils/global_utils.h"
19+
#include "paddle/fluid/eager/utils.h"
20+
#include "paddle/fluid/imperative/tracer.h"
21+
#include "paddle/phi/core/platform/profiler/event_tracing.h"
22+
23+
paddle::small_vector<std::vector<paddle::Tensor>,
24+
egr::kSlotSmallVectorSize> // NOLINT
25+
DtensorFromLocalGradNode::operator()(
26+
paddle::small_vector<std::vector<paddle::Tensor>,
27+
egr::kSlotSmallVectorSize>& grads,
28+
bool create_graph,
29+
bool is_new_grad) {
30+
#ifdef PADDLE_WITH_DISTRIBUTE
31+
VLOG(3) << "Running AD API GRAD: "
32+
<< "dtensor_from_local";
33+
34+
// This 'Local_XXXGradNode' record event is different with
35+
// 'Global_XXXGradNode' event.
36+
// * 'Local_XXXGradNode' will only cover execution time of this function.
37+
// * 'Global_XXXGradNode' will not only cover execution time of this function,
38+
// but also include gradient
39+
// accumulation when the output(s) of corresponding forward OP are shared
40+
// by other OP(s), which may have extra accumulation overhead than
41+
// 'Local_XXXGradNode'.
42+
phi::RecordEvent node_execution_inner(
43+
"Local_DtensorFromLocalGradNode", phi::TracerEventType::OperatorInner, 1);
44+
45+
// Apply Gradient Hooks
46+
auto hooked_grad = ApplyGradientHooks(grads);
47+
48+
// Collect GradIn Tensors, Attrs and Recovered TensorWrappers
49+
auto& grad_out = hooked_grad[0][0];
50+
// Prepare Grad function call
51+
52+
const auto& out_metas = OutputMeta();
53+
paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
54+
returns(1);
55+
56+
out_metas[0].size() == 0 ? returns[0].resize(1)
57+
: returns[0].resize(out_metas[0].size());
58+
59+
auto& grad_input = returns[0][0];
60+
61+
VLOG(5) << "Running C++ API: "
62+
<< "dtensor_from_local_func";
63+
64+
if (VLOG_IS_ON(3)) {
65+
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} ";
66+
67+
std::string input_str = "";
68+
const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), ";
69+
std::string input_out_grad_str = paddle::string::Sprintf(
70+
TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_out));
71+
input_str += input_out_grad_str;
72+
VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str);
73+
}
74+
75+
// Backward call dtensor_from_local_func function
76+
auto output = egr::EagerUtils::RecoverTensorWrapper(&this->output_);
77+
const auto& output_dist_attr =
78+
std::static_pointer_cast<phi::distributed::DistTensor>(output.impl())
79+
->dist_attr();
80+
81+
auto grad_out_ptr = paddle::reshard(grad_out, output_dist_attr);
82+
std::shared_ptr<phi::DenseTensor> local_dense = grad_out_ptr->shared_value();
83+
84+
PADDLE_ENFORCE_NE(local_dense,
85+
nullptr,
86+
phi::errors::InvalidArgument(
87+
"The local DenseTensor inside DistTensor is null."));
88+
89+
PADDLE_ENFORCE_EQ(
90+
local_dense->initialized(),
91+
true,
92+
phi::errors::PreconditionNotMet(
93+
"The local DenseTensor inside DistTensor is not initialized."));
94+
95+
grad_input.set_impl(local_dense);
96+
97+
VLOG(5) << "Finish C++ API: dtensor_from_local_func";
98+
VLOG(6) << "gradnode_ptr = " << this;
99+
100+
if (VLOG_IS_ON(4)) {
101+
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } ";
102+
std::string input_str = "";
103+
std::string output_str = "";
104+
const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), ";
105+
std::string input_out_grad_str = paddle::string::Sprintf(
106+
TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_out));
107+
input_str += input_out_grad_str;
108+
const char* TENSOR_X_GRAD_TEMPLATE = " \n ( input_grad , [%s]), ";
109+
std::string output_x_grad_str = paddle::string::Sprintf(
110+
TENSOR_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_input));
111+
output_str += output_x_grad_str;
112+
VLOG(4) << paddle::string::Sprintf(
113+
INPUT_PRINT_TEMPLATE, input_str, output_str);
114+
}
115+
116+
return returns;
117+
#else
118+
PADDLE_THROW(common::errors::Unavailable(
119+
"ReshardGrad is not supported in this version of Paddle. Try to "
120+
"recompile it with WITH_DISTRIBUTE=ON and reinstall this package."));
121+
return paddle::small_vector<std::vector<paddle::Tensor>,
122+
egr::kSlotSmallVectorSize>(1);
123+
#endif
124+
}

0 commit comments

Comments
 (0)