PaddlePaddle
diff --git a/‎paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h‎
Lines changed: 9 additions & 0 deletions b/‎paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎paddle/fluid/eager/api/manual/eager_manual/forwards/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/eager/api/manual/eager_manual/forwards/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_from_local_fwd_func.cc‎
Lines changed: 106 additions & 0 deletions b/‎paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_from_local_fwd_func.cc‎
Lines changed: 106 additions & 0 deletions
diff --git a/‎paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_to_local_fwd_func.cc‎
Lines changed: 114 additions & 0 deletions b/‎paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_to_local_fwd_func.cc‎
Lines changed: 114 additions & 0 deletions
diff --git a/‎paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/eager/api/manual/eager_manual/nodes/dtensor_from_local_node.cc‎
Lines changed: 124 additions & 0 deletions b/‎paddle/fluid/eager/api/manual/eager_manual/nodes/dtensor_from_local_node.cc‎
Lines changed: 124 additions & 0 deletions
@@ -16,6 +16,8 @@
 
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
+#include "paddle/phi/core/distributed/auto_parallel/placement_types.h"
+#include "paddle/phi/core/distributed/auto_parallel/process_mesh.h"
 
 paddle::Tensor add_n_ad_func(const std::vector<paddle::Tensor>& x);
 
@@ -55,6 +57,13 @@ paddle::Tensor reshard_ad_function(
     const paddle::Tensor& tensor,
     const phi::distributed::TensorDistAttr dist_attr);
 
+paddle::Tensor dtensor_to_local_ad_function(const paddle::Tensor& input);
+
+paddle::Tensor dtensor_from_local_ad_function(
+    const paddle::Tensor& input,
+    const phi::distributed::ProcessMesh& processmesh,
+    const phi::distributed::Placements& placements);
+
 namespace sparse {
 std::tuple<paddle::Tensor,
            paddle::Tensor&,
 
@@ -4,4 +4,6 @@ set(eager_manual_functions
     ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/sync_batch_norm_fwd_func.cc
     ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc
     ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/reshard_fwd_func.cc
+    ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_to_local_fwd_func.cc
+    ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_from_local_fwd_func.cc
     PARENT_SCOPE)
@@ -0,0 +1,106 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/eager/accumulation/accumulation_node.h"
+#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
+#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
+#include "paddle/fluid/eager/api/utils/global_utils.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
+#include "paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.h"
+#include "paddle/phi/core/platform/profiler/event_tracing.h"
+
+paddle::Tensor dtensor_from_local_ad_function(
+    const paddle::Tensor& input,
+    const phi::distributed::ProcessMesh& process_mesh,
+    const phi::distributed::Placements& placements) {
+#ifdef PADDLE_WITH_DISTRIBUTE
+  VLOG(3) << "Running AD API: "
+          << "dtensor_from_local dygraph";
+  // Dygraph Record Event
+  phi::RecordEvent dygraph_entrance_record_event(
+      "dtensor_from_local dygraph", phi::TracerEventType::Communication, 1);
+
+  // Get Input AutoGradMeta
+  egr::AutogradMeta* input_autograd_meta =
+      egr::EagerUtils::nullable_autograd_meta(input);
+  bool trace_backward = egr::Controller::Instance().HasGrad();
+  bool require_any_grad =
+      egr::EagerUtils::ComputeRequireGrad(trace_backward, input_autograd_meta);
+
+  // Node Declaration
+  std::shared_ptr<DtensorFromLocalGradNode> grad_node;
+
+  // Set grad_node before API Call
+  if (require_any_grad) {
+    phi::RecordEvent node_creation_record_event(
+        "dtensor_from_local node_creation",
+        phi::TracerEventType::Communication,
+        1);
+
+    // Node Construction
+    grad_node = std::shared_ptr<DtensorFromLocalGradNode>(
+        new DtensorFromLocalGradNode(1, 1));  // NOLINT
+  }
+
+  auto dense_tensor_ptr =
+      std::static_pointer_cast<phi::DenseTensor>(input.impl());
+
+  auto global_dims = common::vectorize(dense_tensor_ptr->dims());
+  for (size_t i = 0; i < placements.size(); i++) {
+    auto placement = placements[i];
+    if (placement->is_shard()) {
+      auto shard_dim =
+          dynamic_cast<const phi::distributed::Shard&>(*placement).get_dim();
+      global_dims[shard_dim] = global_dims[shard_dim] * process_mesh.shape()[i];
+    }
+  }
+
+  auto dist_out_ptr = std::make_shared<phi::distributed::DistTensor>(
+      dense_tensor_ptr,
+      common::make_ddim(global_dims),
+      process_mesh,
+      placements);
+
+  auto api_result = paddle::Tensor(dist_out_ptr);
+
+  // Get Outputs
+  auto& out = api_result;
+
+  // Get Output AutoGradMeta
+  egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out);
+
+  // Set grad_node after API call
+  if (require_any_grad) {
+    egr::EagerUtils::PassStopGradient(false, out_autograd_meta);
+
+    // SetGradOutMeta & SetEdges
+    grad_node->SetGradOutMeta(input, 0);
+    // SetOutRank & SetHistory & SetGradInMeta
+    if (out_autograd_meta) {
+      egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0);
+      egr::EagerUtils::SetHistory(out_autograd_meta, grad_node);
+    }
+    grad_node->SetGradInMeta(out, 0);
+    grad_node->SetTensorWrapperNoNeedBuffer_Output(out);
+  }
+
+  return out;
+#else
+  PADDLE_THROW(common::errors::Unavailable(
+      "DtensorFromLocal is not supported in this version of Paddle. Try to "
+      "recompile it "
+      "with WITH_DISTRIBUTE=ON and reinstall this package."));
+  return paddle::Tensor();
+#endif
+}
@@ -0,0 +1,114 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/eager/accumulation/accumulation_node.h"
+#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
+#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
+#include "paddle/fluid/eager/api/utils/global_utils.h"
+#include "paddle/phi/core/platform/profiler/event_tracing.h"
+
+paddle::Tensor dtensor_to_local_ad_function(const paddle::Tensor& input) {
+#ifdef PADDLE_WITH_DISTRIBUTE
+  VLOG(3) << "Running AD API: "
+          << "dtensor_to_local dygraph";
+  // Dygraph Record Event
+  phi::RecordEvent dygraph_entrance_record_event(
+      "dtensor_to_local dygraph", phi::TracerEventType::Communication, 1);
+
+  // Get Input AutoGradMeta
+  egr::AutogradMeta* input_autograd_meta =
+      egr::EagerUtils::nullable_autograd_meta(input);
+  bool trace_backward = egr::Controller::Instance().HasGrad();
+  bool require_any_grad =
+      egr::EagerUtils::ComputeRequireGrad(trace_backward, input_autograd_meta);
+
+  // Node Declaration
+  std::shared_ptr<DtensorToLocalGradNode> grad_node;
+
+  // Set grad_node before API Call
+  if (require_any_grad) {
+    phi::RecordEvent node_creation_record_event(
+        "dtensor_to_local node_creation",
+        phi::TracerEventType::Communication,
+        1);
+
+    // Node Construction
+    grad_node = std::shared_ptr<DtensorToLocalGradNode>(
+        new DtensorToLocalGradNode(1, 1));  // NOLINT
+
+    // Set TensorWrappers for Forward Inputs if needed
+    grad_node->SetTensorWrapperNoNeedBuffer_Input(input);
+  }
+
+  // Forward API Call
+  // dtensor_to_local_func(input, api_result, dist_attr);
+  PADDLE_ENFORCE_EQ(
+      input.initialized(),
+      true,
+      phi::errors::InvalidArgument("Input tensor must be initialized."));
+
+  paddle::Tensor api_result;
+  if (input.is_dist_tensor()) {
+    std::shared_ptr<phi::TensorBase> tensor_base = input.impl();
+
+    auto dist_tensor =
+        std::static_pointer_cast<phi::distributed::DistTensor>(tensor_base);
+
+    std::shared_ptr<phi::DenseTensor> local_dense = dist_tensor->shared_value();
+
+    PADDLE_ENFORCE_NE(local_dense,
+                      nullptr,
+                      phi::errors::InvalidArgument(
+                          "The local DenseTensor inside DistTensor is null."));
+
+    PADDLE_ENFORCE_EQ(
+        local_dense->initialized(),
+        true,
+        phi::errors::PreconditionNotMet(
+            "The local DenseTensor inside DistTensor is not initialized."));
+
+    api_result = paddle::Tensor(local_dense);
+  } else {
+    api_result = input;
+  }
+
+  // Get Outputs
+  auto& out = api_result;
+
+  // Get Output AutoGradMeta
+  egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out);
+
+  // Set grad_node after API call
+  if (require_any_grad) {
+    egr::EagerUtils::PassStopGradient(false, out_autograd_meta);
+
+    // SetGradOutMeta & SetEdges
+    grad_node->SetGradOutMeta(input, 0);
+    // SetOutRank & SetHistory & SetGradInMeta
+    if (out_autograd_meta) {
+      egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0);
+      egr::EagerUtils::SetHistory(out_autograd_meta, grad_node);
+    }
+    grad_node->SetGradInMeta(out, 0);
+  }
+
+  return out;
+#else
+  PADDLE_THROW(common::errors::Unavailable(
+      "dtensor_to_local is not supported in this version of Paddle. Try to "
+      "recompile it "
+      "with WITH_DISTRIBUTE=ON and reinstall this package."));
+  return paddle::Tensor();
+#endif
+}
@@ -4,4 +4,6 @@ set(eager_manual_nodes
     ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/sync_batch_norm_node.cc
     ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc
     ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/reshard_node.cc
+    ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/dtensor_to_local_node.cc
+    ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/dtensor_from_local_node.cc
     PARENT_SCOPE)
@@ -0,0 +1,124 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "glog/logging.h"
+#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
+#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
+#include "paddle/fluid/eager/api/utils/global_utils.h"
+#include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/imperative/tracer.h"
+#include "paddle/phi/core/platform/profiler/event_tracing.h"
+
+paddle::small_vector<std::vector<paddle::Tensor>,
+                     egr::kSlotSmallVectorSize>  // NOLINT
+DtensorFromLocalGradNode::operator()(
+    paddle::small_vector<std::vector<paddle::Tensor>,
+                         egr::kSlotSmallVectorSize>& grads,
+    bool create_graph,
+    bool is_new_grad) {
+#ifdef PADDLE_WITH_DISTRIBUTE
+  VLOG(3) << "Running AD API GRAD: "
+          << "dtensor_from_local";
+
+  // This 'Local_XXXGradNode' record event is different with
+  // 'Global_XXXGradNode' event.
+  // * 'Local_XXXGradNode' will only cover execution time of this function.
+  // * 'Global_XXXGradNode' will not only cover execution time of this function,
+  // but also include gradient
+  //    accumulation when the output(s) of corresponding forward OP are shared
+  //    by other OP(s), which may have extra accumulation overhead than
+  //    'Local_XXXGradNode'.
+  phi::RecordEvent node_execution_inner(
+      "Local_DtensorFromLocalGradNode", phi::TracerEventType::OperatorInner, 1);
+
+  // Apply Gradient Hooks
+  auto hooked_grad = ApplyGradientHooks(grads);
+
+  // Collect GradIn Tensors, Attrs and Recovered TensorWrappers
+  auto& grad_out = hooked_grad[0][0];
+  // Prepare Grad function call
+
+  const auto& out_metas = OutputMeta();
+  paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
+      returns(1);
+
+  out_metas[0].size() == 0 ? returns[0].resize(1)
+                           : returns[0].resize(out_metas[0].size());
+
+  auto& grad_input = returns[0][0];
+
+  VLOG(5) << "Running C++ API: "
+          << "dtensor_from_local_func";
+
+  if (VLOG_IS_ON(3)) {
+    const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} ";
+
+    std::string input_str = "";
+    const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), ";
+    std::string input_out_grad_str = paddle::string::Sprintf(
+        TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_out));
+    input_str += input_out_grad_str;
+    VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str);
+  }
+
+  // Backward call dtensor_from_local_func function
+  auto output = egr::EagerUtils::RecoverTensorWrapper(&this->output_);
+  const auto& output_dist_attr =
+      std::static_pointer_cast<phi::distributed::DistTensor>(output.impl())
+          ->dist_attr();
+
+  auto grad_out_ptr = paddle::reshard(grad_out, output_dist_attr);
+  std::shared_ptr<phi::DenseTensor> local_dense = grad_out_ptr->shared_value();
+
+  PADDLE_ENFORCE_NE(local_dense,
+                    nullptr,
+                    phi::errors::InvalidArgument(
+                        "The local DenseTensor inside DistTensor is null."));
+
+  PADDLE_ENFORCE_EQ(
+      local_dense->initialized(),
+      true,
+      phi::errors::PreconditionNotMet(
+          "The local DenseTensor inside DistTensor is not initialized."));
+
+  grad_input.set_impl(local_dense);
+
+  VLOG(5) << "Finish C++ API: dtensor_from_local_func";
+  VLOG(6) << "gradnode_ptr = " << this;
+
+  if (VLOG_IS_ON(4)) {
+    const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s],  \n Output: [%s] } ";
+    std::string input_str = "";
+    std::string output_str = "";
+    const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), ";
+    std::string input_out_grad_str = paddle::string::Sprintf(
+        TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_out));
+    input_str += input_out_grad_str;
+    const char* TENSOR_X_GRAD_TEMPLATE = " \n ( input_grad , [%s]), ";
+    std::string output_x_grad_str = paddle::string::Sprintf(
+        TENSOR_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_input));
+    output_str += output_x_grad_str;
+    VLOG(4) << paddle::string::Sprintf(
+        INPUT_PRINT_TEMPLATE, input_str, output_str);
+  }
+
+  return returns;
+#else
+  PADDLE_THROW(common::errors::Unavailable(
+      "ReshardGrad is not supported in this version of Paddle. Try to "
+      "recompile it with WITH_DISTRIBUTE=ON and reinstall this package."));
+  return paddle::small_vector<std::vector<paddle::Tensor>,
+                              egr::kSlotSmallVectorSize>(1);
+#endif
+}