[PIR] Refactor Inplace strategy (PaddlePaddle#65491)

chen2016013 · lixcli · commit 89603325121b · 2024-07-22T09:34:35.000Z
* Refactor Inplace

* update

* handle for tensorarray

* update

* fix assign_value_

* update

* update

* update

* fix custom meta bug

* fix optional value bug
diff --git a/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc
@@ -410,6 +410,18 @@ CustomKernelInstruction::CustomKernelInstruction(
                          GetStreamPriority()));
   VLOG(6) << "finish process device context";
 
+  auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
+  for (auto const& pair : op_inplace_map) {
+    pir::Value input_value =
+        op->operand_source(yaml_info_parser.InputName2Id().at(pair.first));
+    pir::Value output_value =
+        op->result(yaml_info_parser.OutputName2Id().at(pair.second));
+    if (IsInvalid(output_value) && IsInvalid(input_value)) {
+      this->AddInplace(value_exec_info_.GetVarByValue(input_value),
+                       value_exec_info_.GetVarByValue(output_value));
+    }
+  }
+
   InitInputsOutputsIds(op, value_exec_info_);
   VLOG(6) << "finish process inputs outputs index";
 
@@ -453,6 +465,7 @@ void CustomKernelInstruction::UpdateOutputMeta(
     auto out_meta = phi::DenseTensorUtils::GetMutableMeta(out_in_scope);
     out_meta->dims = phi::make_ddim(output_shapes[i]);
     out_meta->dtype = output_dtypes[i];
+    out_meta->strides = out_meta->calc_strides(out_meta->dims);
   }
 }
 
@@ -504,7 +517,9 @@ void CustomKernelInstruction::Run() {
                     vec_input_name2id_map_,
                     custom_attrs_);
   UpdateOutputMeta(output_shapes, output_dtypes);
-
+  for (auto& pair : this->InplaceInfo()) {
+    ShareVarBuffer(pair.first, pair.second);
+  }
   VLOG(6) << "Run custom op " << custom_op_name_ << " kernel.";
   kernel_func_(&custom_kernel_ctx_);
 }
diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_base.cc b/paddle/fluid/framework/new_executor/instruction/instruction_base.cc
@@ -273,12 +273,12 @@ const std::vector<Variable*>& InstructionBase::EagerGCVars() const {
 
 void InstructionBase::ClearEagerGCVars() { eager_gc_vars_.clear(); }
 
-const std::vector<std::pair<Variable*, Variable*>>&
+const std::vector<std::pair<const Variable*, Variable*>>&
 InstructionBase::InplaceInfo() const {
   return vec_inplace_in_to_out_;
 }
 
-void InstructionBase::AddInplace(Variable* in, Variable* out) {
+void InstructionBase::AddInplace(const Variable* in, Variable* out) {
   vec_inplace_in_to_out_.emplace_back(in, out);
 }
 
@@ -334,6 +334,17 @@ void InstructionBase::InitInputsOutputsIds(
       outputs.emplace(value, outputs_id);
     }
   }
+
+  const auto value_2_var_name_map = value_exec_info.GetValue2VarName();
+  for (auto inplace_var_pair : this->InplaceInfo()) {
+    for (auto item : value_2_var_name_map) {
+      if (item.second == value_exec_info.GetVarName(inplace_var_pair.first)) {
+        std::vector<int> outputs_id = GetValueIds(item.first, value_exec_info);
+        outputs.emplace(item.first, outputs_id);
+        break;
+      }
+    }
+  }
   SetOutputs(outputs);
   VLOG(8) << "finish process outputs_index";
 }
diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_base.h b/paddle/fluid/framework/new_executor/instruction/instruction_base.h
@@ -127,8 +127,8 @@ class InstructionBase {
   void AddEagerGCVar(Variable* var);
   void ClearEagerGCVars();
 
-  const std::vector<std::pair<Variable*, Variable*>>& InplaceInfo() const;
-  void AddInplace(Variable* in, Variable* out);
+  const std::vector<std::pair<const Variable*, Variable*>>& InplaceInfo() const;
+  void AddInplace(const Variable* in, Variable* out);
   void ClearInplace();
 
   std::map<int, int>& GetMutableInplaceBackMap() { return inplace_back_map_; }
@@ -207,7 +207,7 @@ class InstructionBase {
 
   std::vector<Variable*> eager_gc_vars_;
 
-  std::vector<std::pair<Variable*, Variable*>>
+  std::vector<std::pair<const Variable*, Variable*>>
       vec_inplace_in_to_out_;  // If not use share data, need this ?
 
   std::map<int, int> inplace_back_map_;
diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
@@ -406,4 +406,106 @@ bool GetCondData(const phi::DenseTensor& cond) {
   return cpu_cond->data<bool>()[0];
 }
 
+// NOTE(chenxi67): Here, we only perform inplace processing for variables whose
+// type is NOT TensorArray. It has already been processed in the previous
+// step(HandleForInplaceVarOp).
+void HandleForInplaceOp(pir::Operation* op,
+                        const ValueExecutionInfo* value_exe_info,
+                        InstructionBase* instr) {
+  if (op->num_results() < 1) return;
+  pir::IrContext* ctx = pir::IrContext::Instance();
+  std::string op_name = op->name();
+  if (op->attributes().count("op_name")) {
+    op_name =
+        op->attributes().at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+  }
+
+  pir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_name);
+  paddle::dialect::OpYamlInfoParser yaml_parser(
+      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>()
+          ->get_op_info_(op_name),
+      paddle::dialect::IsLegacyOp(op_name));
+
+  for (size_t i = 0; i < op->num_results(); ++i) {
+    pir::Value value = op->result(i);
+    if (!IsInvalid(value)) {
+      VLOG(8) << "Number " << i << " result of " << op_name
+              << " is not invalid, so skip build a variable.";
+      continue;
+    }
+    if (IsNeedVarInplace(op, value, op_name)) {
+      continue;
+    }
+    std::string value_name = yaml_parser.OutputNames()[i];
+    if (yaml_parser.HasInplace(value_name)) {
+      const std::string& inplace_name = yaml_parser.InplaceName(value_name);
+      pir::Value inplace_value =
+          op->operand_source(yaml_parser.InputName2Id().at(inplace_name));
+      std::string input_var_name = value_exe_info->GetVarName(inplace_value);
+      std::string output_var_name = value_exe_info->GetVarName(value);
+      PADDLE_ENFORCE_NE(input_var_name,
+                        "",
+                        phi::errors::InvalidArgument(
+                            "The input var name of inplace op is empty."));
+      PADDLE_ENFORCE_NE(output_var_name,
+                        "",
+                        phi::errors::InvalidArgument(
+                            "The output var name of inplace op is empty."));
+      VLOG(4) << "inplace: " << value_name << " -> " << inplace_name
+              << " (var: " << input_var_name << ")";
+      instr->AddInplace(value_exe_info->GetVarByValue(inplace_value),
+                        value_exe_info->GetVarByValue(value));
+    } else if (yaml_parser.HasView(value_name)) {
+      const std::string& view_name = yaml_parser.ViewName(value_name);
+      pir::Value view_value =
+          op->operand_source(yaml_parser.InputName2Id().at(view_name));
+      // const std::string& var_name = value_2_var_name->at(view_value);
+      std::string input_var_name = value_exe_info->GetVarName(view_value);
+      std::string output_var_name = value_exe_info->GetVarName(value);
+
+      PADDLE_ENFORCE_NE(input_var_name,
+                        "",
+                        platform::errors::InvalidArgument(
+                            "The input var name of view op is empty."));
+      PADDLE_ENFORCE_NE(output_var_name,
+                        "",
+                        platform::errors::InvalidArgument(
+                            "The output var name of view op is empty."));
+      VLOG(4) << "view: " << value_name << " -> " << view_name
+              << " (var: " << input_var_name << ")";
+      instr->AddInplace(value_exe_info->GetVarByValue(view_value),
+                        value_exe_info->GetVarByValue(value));
+    }
+  }
+}
+
+void ShareVarBuffer(const Variable* src_var, Variable* dst_var) {
+  if (src_var->IsType<phi::DenseTensor>()) {
+    auto& src_tensor = src_var->Get<phi::DenseTensor>();
+    auto* tmp_dst_tensor = dst_var->GetMutable<phi::DenseTensor>();
+    tmp_dst_tensor->ShareBufferWith(src_tensor);
+    return;
+  } else if (src_var->IsType<phi::SelectedRows>()) {
+    auto* tmp_dst_slr = dst_var->GetMutable<phi::SelectedRows>();
+    auto* dst_t = tmp_dst_slr->mutable_value();
+    auto& src_slr = src_var->Get<phi::SelectedRows>();
+    auto& src_t = src_slr.value();
+    dst_t->ShareBufferWith(src_t);
+    return;
+  } else if (src_var->IsType<VariableRefArray>()) {
+    auto src_var_array = src_var->Get<VariableRefArray>();
+    auto* dst_var_array = dst_var->GetMutable<VariableRefArray>();
+    for (size_t i = 0; i < src_var_array.size(); ++i) {
+      Variable* copy_var = const_cast<Variable*>(dst_var_array->at(i));
+      ShareVarBuffer(src_var_array.at(i), copy_var);
+    }
+    return;
+  } else {
+    PADDLE_THROW(phi::errors::PreconditionNotMet(
+        "Output only support DenseTensorType "
+        "or SelectedRowsType or VariableRefArray"));
+  }
+  return;
+}
+
 }  // namespace paddle::framework
diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_util.h b/paddle/fluid/framework/new_executor/instruction/instruction_util.h
@@ -64,5 +64,10 @@ void InsertInplacedExternalInputsToOuts(
 
 bool GetCondData(const phi::DenseTensor& cond);
 
+void HandleForInplaceOp(pir::Operation* op,
+                        const ValueExecutionInfo* value_exe_info,
+                        InstructionBase* instr);
+
+void ShareVarBuffer(const Variable* src_var, Variable* dst_var);
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc
@@ -163,6 +163,11 @@ LegacyKernelInstruction::LegacyKernelInstruction(
 
   VLOG(6) << "finish process kernel context";
 
+  if (op->attributes().count("is_inplace") != 0 &&
+      op->attributes().at("is_inplace").dyn_cast<pir::BoolAttribute>().data()) {
+    HandleForInplaceOp(op, value_exec_info_, this);
+  }
+
   InitInputsOutputsIds(op, *value_exec_info);
   VLOG(6) << "finish process inputs outputs index";
 
@@ -185,6 +190,9 @@ void LegacyKernelInstruction::Run() {
   if (infer_meta_interface_) {
     infer_meta_interface_->infer_meta_(&(infer_meta_context_));
   }
+  for (auto& pair : this->InplaceInfo()) {
+    ShareVarBuffer(pair.first, pair.second);
+  }
   VLOG(6) << "Run op " << legacy_op_name_ << " kernel.";
   (*(phi_kernel_))((kernel_context_));
 }
diff --git a/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc
@@ -160,7 +160,10 @@ PhiKernelInstruction::PhiKernelInstruction(
 
   kernel_context_.SetDeviceContext(dev_ctx);
   VLOG(6) << "finish process kernel context";
-
+  if (op->attributes().count("is_inplace") != 0 &&
+      op->attributes().at("is_inplace").dyn_cast<pir::BoolAttribute>().data()) {
+    HandleForInplaceOp(op, value_exec_info_, this);
+  }
   InitInputsOutputsIds(op, *value_exec_info);
   VLOG(6) << "finish process inputs outputs index";
 
@@ -181,6 +184,9 @@ void PhiKernelInstruction::Run() {
     infer_meta_interface_->infer_meta_(&(infer_meta_context_));
   }
   VLOG(6) << "End run op " << phi_op_name_ << " infer meta.";
+  for (auto& pair : this->InplaceInfo()) {
+    ShareVarBuffer(pair.first, pair.second);
+  }
   VLOG(6) << "Begin run op " << phi_op_name_ << " kernel.";
   (*(phi_kernel_))(&(kernel_context_));
   VLOG(6) << "End run op " << phi_op_name_ << " kernel.";
diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc
@@ -306,12 +306,12 @@ const platform::DeviceContext& Instruction::DeviceContext() const {
   return dev_ctx_;
 }
 
-const std::vector<std::pair<Variable*, Variable*>>& Instruction::InplaceInfo()
-    const {
+const std::vector<std::pair<const Variable*, Variable*>>&
+Instruction::InplaceInfo() const {
   return vec_inplace_in_to_out_;
 }
 
-void Instruction::AddInplace(Variable* in, Variable* out) {
+void Instruction::AddInplace(const Variable* in, Variable* out) {
   vec_inplace_in_to_out_.emplace_back(in, out);
 }
 
diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h
@@ -295,9 +295,9 @@ class Instruction {
 
   const platform::DeviceContext& DeviceContext() const;
 
-  const std::vector<std::pair<Variable*, Variable*>>& InplaceInfo() const;
+  const std::vector<std::pair<const Variable*, Variable*>>& InplaceInfo() const;
 
-  void AddInplace(Variable* in, Variable* out);
+  void AddInplace(const Variable* in, Variable* out);
 
   void ClearInplace();
 
@@ -340,7 +340,7 @@ class Instruction {
 
   std::vector<size_t> gc_check_vars_;
 
-  std::vector<std::pair<Variable*, Variable*>> vec_inplace_in_to_out_;
+  std::vector<std::pair<const Variable*, Variable*>> vec_inplace_in_to_out_;
 
   bool pre_define_context_{false};
 };
diff --git a/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc b/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc
@@ -682,9 +682,21 @@ void HandleForSpecialOp(pir::Operation* op,
   }
 }
 
-void HandleForInplaceOp(pir::Operation* op,
-                        const std::string& var_name_prefix,
-                        ValueExecutionInfo* value_exe_info) {
+bool IsNeedVarInplace(pir::Operation* op,
+                      pir::Value value,
+                      std::string op_name) {
+  return (value.type().isa<paddle::dialect::DenseTensorArrayType>() ||
+          op_name == "pd_op.assign_value_");
+}
+
+// NOTE(chenxi67): Here, we only perform inplace processing for variables that
+// need to be inplaced by var (mostly, whose type is TensorArray or re-Allocated
+// Densetensor). For other types of variables, we only share the holder of
+// DenseTensor but not the var*. The reason is that vector<DenseTensor> in
+// TensorArray (or re-Allocated Densetensor) cannot be shared totally.
+void HandleForInplaceVarOp(pir::Operation* op,
+                           const std::string& var_name_prefix,
+                           ValueExecutionInfo* value_exe_info) {
   if (op->num_results() < 1) return;
   pir::IrContext* ctx = pir::IrContext::Instance();
   std::string op_name = op->name();
@@ -706,6 +718,10 @@ void HandleForInplaceOp(pir::Operation* op,
               << " is not invalid, so skip build a variable.";
       continue;
     }
+    if (!IsNeedVarInplace(op, value, op_name)) {
+      BuildValue(value, var_name_prefix, value_exe_info);
+      continue;
+    }
     std::string value_name = yaml_parser.OutputNames()[i];
     if (yaml_parser.HasInplace(value_name)) {
       const std::string& inplace_name = yaml_parser.InplaceName(value_name);
@@ -785,7 +801,7 @@ void BuildScope(const pir::Block& block,
             .at("is_inplace")
             .dyn_cast<pir::BoolAttribute>()
             .data()) {
-      HandleForInplaceOp(&op, var_name_prefix, value_exe_info);
+      HandleForInplaceVarOp(&op, var_name_prefix, value_exe_info);
       continue;
     } else {
       for (size_t i = 0; i < op.num_results(); ++i) {
diff --git a/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h b/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h
@@ -153,6 +153,10 @@ std::shared_ptr<OperatorBase> BuildOperatorBase(
     const ValueExecutionInfo& value_exec_info,
     const paddle::dialect::OpYamlInfoParser& op_yaml_info);
 
+bool IsNeedVarInplace(pir::Operation* op,
+                      pir::Value value,
+                      std::string op_name);
+
 template <typename Context,
           typename InType,
           typename OutType,
diff --git a/test/cpp/new_executor/standalone_executor_pir_test.cc b/test/cpp/new_executor/standalone_executor_pir_test.cc
@@ -211,7 +211,7 @@ TEST(StandaloneExecutor, run_inplace_sqrt) {
   bool res3 = simple_cmp(out_tensor.data<float>()[3], 2.0);
 
   EXPECT_EQ(scope.kids().size(), 1u);
-  EXPECT_EQ(scope.kids().front()->Size(), 1u);
+  EXPECT_EQ(scope.kids().front()->Size(), 2u);
   EXPECT_EQ(res0, true);
   EXPECT_EQ(res1, true);
   EXPECT_EQ(res2, true);

Original file line number	Diff line number	Diff line change
`@@ -273,12 +273,12 @@ const std::vector<Variable*>& InstructionBase::EagerGCVars() const {`
`273`	`273`
`274`	`274`	`void InstructionBase::ClearEagerGCVars() { eager_gc_vars_.clear(); }`
`275`	`275`
`276`		`-const std::vector<std::pair<Variable, Variable>>&`
	`276`	`+const std::vector<std::pair<const Variable, Variable>>&`
`277`	`277`	`InstructionBase::InplaceInfo() const {`
`278`	`278`	`return vec_inplace_in_to_out_;`
`279`	`279`	`}`
`280`	`280`
`281`		`-void InstructionBase::AddInplace(Variable* in, Variable* out) {`
	`281`	`+void InstructionBase::AddInplace(const Variable* in, Variable* out) {`
`282`	`282`	`vec_inplace_in_to_out_.emplace_back(in, out);`
`283`	`283`	`}`
`284`	`284`
`@@ -334,6 +334,17 @@ void InstructionBase::InitInputsOutputsIds(`
`334`	`334`	`outputs.emplace(value, outputs_id);`
`335`	`335`	`}`
`336`	`336`	`}`
	`337`	`+`
	`338`	`+ const auto value_2_var_name_map = value_exec_info.GetValue2VarName();`
	`339`	`+ for (auto inplace_var_pair : this->InplaceInfo()) {`
	`340`	`+ for (auto item : value_2_var_name_map) {`
	`341`	`+ if (item.second == value_exec_info.GetVarName(inplace_var_pair.first)) {`
	`342`	`+ std::vector<int> outputs_id = GetValueIds(item.first, value_exec_info);`
	`343`	`+ outputs.emplace(item.first, outputs_id);`
	`344`	`+ break;`
	`345`	`+ }`
	`346`	`+ }`
	`347`	`+ }`
`337`	`348`	`SetOutputs(outputs);`
`338`	`349`	`VLOG(8) << "finish process outputs_index";`
`339`	`350`	`}`
Original file line number	Diff line number	Diff line change
`@@ -306,12 +306,12 @@ const platform::DeviceContext& Instruction::DeviceContext() const {`
`306`	`306`	`return dev_ctx_;`
`307`	`307`	`}`
`308`	`308`
`309`		`-const std::vector<std::pair<Variable, Variable>>& Instruction::InplaceInfo()`
`310`		`- const {`
	`309`	`+const std::vector<std::pair<const Variable, Variable>>&`
	`310`	`+Instruction::InplaceInfo() const {`
`311`	`311`	`return vec_inplace_in_to_out_;`
`312`	`312`	`}`
`313`	`313`
`314`		`-void Instruction::AddInplace(Variable* in, Variable* out) {`
	`314`	`+void Instruction::AddInplace(const Variable* in, Variable* out) {`
`315`	`315`	`vec_inplace_in_to_out_.emplace_back(in, out);`
`316`	`316`	`}`
`317`	`317`