PaddlePaddle
diff --git a/‎paddle/fluid/imperative/all_reduce.cc‎
Lines changed: 2 additions & 2 deletions b/‎paddle/fluid/imperative/all_reduce.cc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/fluid/imperative/basic_engine.cc‎
Lines changed: 2 additions & 3 deletions b/‎paddle/fluid/imperative/basic_engine.cc‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎paddle/fluid/imperative/bkcl_context.cc‎
Lines changed: 4 additions & 4 deletions b/‎paddle/fluid/imperative/bkcl_context.cc‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎paddle/fluid/imperative/gradient_accumulator.cc‎
Lines changed: 16 additions & 16 deletions b/‎paddle/fluid/imperative/gradient_accumulator.cc‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎paddle/fluid/imperative/layer.cc‎
Lines changed: 6 additions & 6 deletions b/‎paddle/fluid/imperative/layer.cc‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎paddle/fluid/imperative/nccl_context.cc‎
Lines changed: 3 additions & 3 deletions b/‎paddle/fluid/imperative/nccl_context.cc‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎paddle/fluid/imperative/partial_grad_engine.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/imperative/partial_grad_engine.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/imperative/prepared_operator.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/imperative/prepared_operator.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/imperative/reducer.cc‎
Lines changed: 4 additions & 4 deletions b/‎paddle/fluid/imperative/reducer.cc‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎paddle/fluid/imperative/tracer.cc‎
Lines changed: 2 additions & 2 deletions b/‎paddle/fluid/imperative/tracer.cc‎
Lines changed: 2 additions & 2 deletions
@@ -96,7 +96,7 @@ static void AllReduce(const phi::SelectedRows &src,
   auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
   auto nccl_dtype = platform::ToNCCLDataType(dtype);
   auto *dev_ctx = static_cast<phi::GPUContext *>(
-      platform::DeviceContextPool::Instance().Get(place));
+      phi::DeviceContextPool::Instance().Get(place));
 
   bool use_calc_stream = (dev_ctx->stream() == stream);
   VLOG(4) << "Is use calculate stream: " << use_calc_stream;
@@ -221,7 +221,7 @@ void AllReduce(const framework::Variable &src,
                bool use_calc_stream) {
   const auto &place = GetVarPlace(src);
   auto *dev_ctx = static_cast<phi::GPUContext *>(
-      platform::DeviceContextPool::Instance().Get(place));
+      phi::DeviceContextPool::Instance().Get(place));
   platform::NCCLComm *comm =
       platform::NCCLCommContext::Instance().Get(ring_id, place);
   gpuStream_t stream = (use_calc_stream ? dev_ctx->stream() : comm->stream());
 
@@ -107,8 +107,7 @@ void BasicEngine::Init(
     VLOG(6) << "init loss grad:" << var->GradVarBase()->Name()
             << " as stop_gradient false";
     var->GradVarBase()->InnerSetOverriddenStopGradient(false);
-    auto* dev_ctx =
-        platform::DeviceContextPool::Instance().Get(fwd_var.place());
+    auto* dev_ctx = phi::DeviceContextPool::Instance().Get(fwd_var.place());
     if (grad_tensor == nullptr) {
       grad_var->Resize(fwd_var.dims());
       grad_var->mutable_data(fwd_var.place(), fwd_var.type());
@@ -158,7 +157,7 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) {
       }
 
       if (tensor && !tensor->IsInitialized()) {
-        auto* dev_ctx = platform::DeviceContextPool::Instance().Get(op.place());
+        auto* dev_ctx = phi::DeviceContextPool::Instance().Get(op.place());
         // NOTE(zhiqiu): since grad variable is ungenerated, so the dtype is not
         // correct. var->DataType() returns the default dtype, which is float32.
         // Here, we use the type of the corresponding forward datatype.
 
@@ -157,7 +157,7 @@ void BKCLParallelContext::AllReduceByStream(const framework::Variable &src,
   auto place = place_;
 
   auto *dev_ctx = static_cast<platform::XPUDeviceContext *>(
-      platform::DeviceContextPool::Instance().Get(place));
+      phi::DeviceContextPool::Instance().Get(place));
   platform::BKCLComm *comm =
       platform::BKCLCommContext::Instance().Get(ring_id, place);
   XPUStream stream =
@@ -223,7 +223,7 @@ void BKCLParallelContext::WaitCompute(int ring_id) {
                                    ring_id,
                                    strategy_.nrings_));
   auto compute_stream = static_cast<platform::XPUDeviceContext *>(
-                            platform::DeviceContextPool::Instance().Get(place_))
+                            phi::DeviceContextPool::Instance().Get(place_))
                             ->stream();
   auto comm_stream = platform::BKCLCommContext::Instance()
                          .Get(ring_id, place_)
@@ -253,7 +253,7 @@ void BKCLParallelContext::WaitComm(int ring_id) {
                          ->dev_context()
                          ->stream();
   auto compute_stream = static_cast<platform::XPUDeviceContext *>(
-                            platform::DeviceContextPool::Instance().Get(place_))
+                            phi::DeviceContextPool::Instance().Get(place_))
                             ->stream();
   auto event = compute_events_[ring_id].get();
 
@@ -264,7 +264,7 @@ void BKCLParallelContext::WaitComm(int ring_id) {
 
 void BKCLParallelContext::SynchronizeCompute() {
   auto compute_dev_ctx = static_cast<platform::XPUDeviceContext *>(
-      platform::DeviceContextPool::Instance().Get(place_));
+      phi::DeviceContextPool::Instance().Get(place_));
   compute_dev_ctx->Wait();
 }
 
 
@@ -84,7 +84,7 @@ void XPUTensorAddFunctor(const platform::Place& place,
                          phi::DenseTensor* dst) {
   using XPUType = typename XPUTypeTrait<T>::Type;
   platform::XPUDeviceContext* ctx = dynamic_cast<platform::XPUDeviceContext*>(
-      platform::DeviceContextPool::Instance().Get(place));
+      phi::DeviceContextPool::Instance().Get(place));
   const XPUType* x = reinterpret_cast<const XPUType*>(src.data<T>());
   XPUType* y = reinterpret_cast<XPUType*>(dst->mutable_data<T>(place));
   int r = -1;
@@ -201,8 +201,8 @@ void TensorAdd(const VarType& src, VarType* dst) {
   // check requiring input dtypes to be the same have been removed.
 #define PADDLE_TENSOR_ADD(T, CONTEXT)                                          \
   if (data_type == framework::DataTypeTrait<T>::DataType()) {                  \
-    auto cpu_ctx = static_cast<CONTEXT*>(                                      \
-        platform::DeviceContextPool::Instance().Get(place));                   \
+    auto cpu_ctx =                                                             \
+        static_cast<CONTEXT*>(phi::DeviceContextPool::Instance().Get(place));  \
     phi::AddKernel<T, CONTEXT>(*cpu_ctx, *dst_tensor, src_tensor, dst_tensor); \
     return;                                                                    \
   }
@@ -218,13 +218,13 @@ void TensorAdd(const VarType& src, VarType* dst) {
 #endif
   }
 
-#define TENSOR_ADD_EIGEN(T)                                \
-  auto cpu_ctx = static_cast<phi::CPUContext*>(            \
-      platform::DeviceContextPool::Instance().Get(place)); \
-  auto in = phi::EigenVector<T>::Flatten(src_tensor);      \
-  auto out = phi::EigenVector<T>::Flatten(*dst_tensor);    \
-  auto& p = *(cpu_ctx->eigen_device());                    \
-  out.device(p) = out + in;                                \
+#define TENSOR_ADD_EIGEN(T)                             \
+  auto cpu_ctx = static_cast<phi::CPUContext*>(         \
+      phi::DeviceContextPool::Instance().Get(place));   \
+  auto in = phi::EigenVector<T>::Flatten(src_tensor);   \
+  auto out = phi::EigenVector<T>::Flatten(*dst_tensor); \
+  auto& p = *(cpu_ctx->eigen_device());                 \
+  out.device(p) = out + in;                             \
   return;
 
   if (phi::is_cpu_place(place)) {
@@ -244,7 +244,7 @@ void TensorAdd(const VarType& src, VarType* dst) {
   if (data_type == framework::DataTypeTrait<T>::DataType()) {    \
     platform::CustomDeviceContext* ctx =                         \
         static_cast<platform::CustomDeviceContext*>(             \
-            platform::DeviceContextPool::Instance().Get(place)); \
+            phi::DeviceContextPool::Instance().Get(place));      \
     phi::stream::Stream stream(place, ctx->stream());            \
     auto device = phi::DeviceManager::GetDeviceWithPlace(place); \
     device->BlasAXPBY<T>(stream,                                 \
@@ -313,7 +313,7 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
   auto place = dst_tensor->place();
   auto data_type =
       framework::TransToProtoVarType(src_selected_rows.value().dtype());
-  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
 
 #define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type)       \
   if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) {     \
@@ -363,7 +363,7 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
 
   const auto& place = src_tensor.place();
   auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
-  auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
+  auto* dev_ctx = phi::DeviceContextPool::Instance().Get(place);
 
   phi::DenseTensor* dst_tensor =
       GetInnerMutableTensor<phi::DenseTensor>(dst_tensor_var);
@@ -426,7 +426,7 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
   auto place = src_selected_rows1.value().place();
   auto data_type =
       framework::TransToProtoVarType(src_selected_rows1.value().dtype());
-  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
 
   std::vector<const phi::SelectedRows*> src_selected_rows;
   src_selected_rows.emplace_back(&src_selected_rows1);
@@ -667,7 +667,7 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
     if (!dst_var->Var().IsInitialized() ||
         !dst_var->Var().Get<phi::DenseTensor>().IsInitialized()) {
       VLOG(6) << "Set StopGradient Grad: " << dst_var->Name() << " as zero ";
-      auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
+      auto* dev_ctx = phi::DeviceContextPool::Instance().Get(place);
       if (!dst_var->Var().IsInitialized()) {
         auto* tensor = dst_var->MutableVar()->GetMutable<phi::DenseTensor>();
         VLOG(6) << "Dims of " << dst_var->Name()
@@ -807,7 +807,7 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
     if (!dst_var->Var().IsInitialized() ||
         !dst_var->Var().Get<phi::DenseTensor>().IsInitialized()) {
       VLOG(6) << "Set StopGradient Grad: " << var->Name() << " as zero";
-      auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
+      auto* dev_ctx = phi::DeviceContextPool::Instance().Get(place);
       if (!dst_var->Var().IsInitialized()) {
         auto* tensor = dst_var->MutableVar()->GetMutable<phi::DenseTensor>();
         VLOG(6) << "Dims of " << dst_var->Name()
 
@@ -240,7 +240,7 @@ void VarBase::ClearGradient(bool set_to_zero) {
       if (grad_t->IsInitialized()) {
         if (set_to_zero) {
           auto* dev_ctx =
-              platform::DeviceContextPool::Instance().Get(grad_t->place());
+              phi::DeviceContextPool::Instance().Get(grad_t->place());
           phi::funcs::set_constant(*dev_ctx, grad_t, 0.0f);
         } else {
           grad_t->clear();
@@ -302,10 +302,10 @@ std::shared_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
     new_var->SetType(Type());
     framework::TensorCopy(src_tensor, dst_place, dst_tensor);
     if (blocking) {
-      platform::DeviceContextPool::Instance().Get(dst_place)->Wait();
+      phi::DeviceContextPool::Instance().Get(dst_place)->Wait();
       auto src_place = src_tensor.place();
       if (!(src_place == dst_place)) {
-        platform::DeviceContextPool::Instance().Get(src_place)->Wait();
+        phi::DeviceContextPool::Instance().Get(src_place)->Wait();
       }
     }
     VLOG(4) << "copy tensor " << Name() << " from " << Place() << " to "
@@ -323,10 +323,10 @@ std::shared_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
                           dst_place,
                           dst_selected_rows->mutable_value());
     if (blocking) {
-      platform::DeviceContextPool::Instance().Get(dst_place)->Wait();
+      phi::DeviceContextPool::Instance().Get(dst_place)->Wait();
       auto src_place = src_selected_rows.place();
       if (!(src_place == dst_place)) {
-        platform::DeviceContextPool::Instance().Get(src_place)->Wait();
+        phi::DeviceContextPool::Instance().Get(src_place)->Wait();
       }
     }
     dst_selected_rows->set_height(src_selected_rows.height());
@@ -413,7 +413,7 @@ void VarBase::CopyFrom(const VarBase& src, const bool blocking) {
     framework::TensorCopy(src_tensor, place, dst_tensor);
   }
   if (blocking) {
-    platform::DeviceContextPool::Instance().Get(place)->Wait();
+    phi::DeviceContextPool::Instance().Get(place)->Wait();
   }
 }
 
 
@@ -175,7 +175,7 @@ void NCCLParallelContext::WaitCompute(int ring_id) {
                         compute_events_.size()));
 
   auto compute_stream = static_cast<phi::GPUContext *>(
-                            platform::DeviceContextPool::Instance().Get(place_))
+                            phi::DeviceContextPool::Instance().Get(place_))
                             ->stream();
   auto comm_stream =
       platform::NCCLCommContext::Instance().Get(ring_id, place_)->stream();
@@ -205,7 +205,7 @@ void NCCLParallelContext::WaitComm(int ring_id) {
                         comm_events_.size()));
 
   auto compute_stream = static_cast<phi::GPUContext *>(
-                            platform::DeviceContextPool::Instance().Get(place_))
+                            phi::DeviceContextPool::Instance().Get(place_))
                             ->stream();
   auto comm_stream =
       platform::NCCLCommContext::Instance().Get(ring_id, place_)->stream();
@@ -223,7 +223,7 @@ void NCCLParallelContext::WaitComm(int ring_id) {
 
 void NCCLParallelContext::SynchronizeCompute() {
   auto *compute_dev_ctx = static_cast<phi::GPUContext *>(
-      platform::DeviceContextPool::Instance().Get(place_));
+      phi::DeviceContextPool::Instance().Get(place_));
   compute_dev_ctx->Wait();
 }
 
 
@@ -322,7 +322,7 @@ static void FillConstantLike(const VariableWrapper &ref_var,
                              float value) {
   auto &ref_tensor = ref_var.Var().Get<phi::DenseTensor>();
   auto *dst_tensor = dst_var->MutableVar()->GetMutable<phi::DenseTensor>();
-  auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place);
+  auto *dev_ctx = phi::DeviceContextPool::Instance().Get(place);
   dst_tensor->Resize(ref_tensor.dims());
   // TODO(jiabin): Ugly fix here we have fwd_data_type_ and data_type, since in
   // grad mission
 
@@ -161,7 +161,7 @@ PreparedOp PrepareImpl(
     const phi::KernelFactory& phi_kernel_factory,
     const phi::OpUtilsMap& phi_op_utils_map,
     const phi::DefaultKernelSignatureMap& default_phi_kernel_sig_map) {
-  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
   auto* dev_ctx = pool.Get(place);
 
 #ifdef PADDLE_WITH_DNNL
 
@@ -756,7 +756,7 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) {
 #ifdef PADDLE_WITH_XPU_BKCL
       if (phi::is_xpu_place(group_tensor.place())) {
         auto dev_ctx = static_cast<platform::XPUDeviceContext *>(
-            platform::DeviceContextPool::Instance().Get(place_));
+            phi::DeviceContextPool::Instance().Get(place_));
         if (HasGrad(var_index)) {
           auto var_base = vars_[var_index]->GradVarBase();
           auto tensor = var_base->MutableVar()->GetMutable<phi::DenseTensor>();
@@ -773,7 +773,7 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) {
         }
       }
 #else
-      auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place_);
+      auto *dev_ctx = phi::DeviceContextPool::Instance().Get(place_);
       if (HasGrad(var_index)) {
         auto var_base = vars_[var_index]->GradVarBase();
         auto tensor = var_base->MutableVar()->GetMutable<phi::DenseTensor>();
@@ -924,7 +924,7 @@ void Reducer::ProcessUnusedDenseVars() {
   // avoid conflicts with communication.
   VLOG(3) << "Local used vars : "
           << string::join_strings(local_used_vars_, ',');
-  const auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place_);
+  const auto *dev_ctx = phi::DeviceContextPool::Instance().Get(place_);
   // H2D is to allreduce the local_used_vars_
   auto *global_used_tensor = global_used_vars_.GetMutable<phi::DenseTensor>();
   framework::TensorFromVector<int>(
@@ -976,7 +976,7 @@ void Reducer::ProcessUnusedDenseVars() {
       // 4. set grad tensor
       auto *dest_grad_tensor =
           grad_var_base_tmp->MutableVar()->GetMutable<phi::DenseTensor>();
-      const auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place_);
+      const auto *dev_ctx = phi::DeviceContextPool::Instance().Get(place_);
       paddle::framework::TensorCopy(
           src_tensor, place_, *dev_ctx, dest_grad_tensor);
       dest_grad_tensor->Resize(dest_dims);
 
@@ -470,7 +470,7 @@ void Tracer::TraceOp(const std::string& type,
                                     default_attrs,
                                     use_default_attr_map);
 
-    auto dev_ctx = paddle::platform::DeviceContextPool::Instance().Get(place);
+    auto dev_ctx = phi::DeviceContextPool::Instance().Get(place);
     for (auto& iter : need_backup_inputs2outputs) {
       iter.first->ResetHolder(need_backup_inputs2holder[iter.first]);
       iter.first->set_strides(need_backup_inputs2strides[iter.first]);
@@ -613,7 +613,7 @@ phi::KernelSignature Tracer::GetExpectedKernelSignature(
     framework::AttributeMap attrs) const {
   auto op = framework::OpRegistry::CreateOp(type, {}, {}, {}, false);
   framework::RuntimeContext ctx({}, {});
-  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
   auto* dev_ctx = pool.Get(phi::CPUPlace());
   const auto& op_info = op->Info();
   auto* attr_checker = op_info.Checker();