PaddlePaddle · luotao1 · Jul 15, 2024 · Jul 12, 2024 · Jul 12, 2024
diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@@ -143,9 +143,9 @@ void AllReduceOpHandle::AllReduceImpl(
               in_var_handles[i]->name(),
               numel));
       dtype = framework::TransToProtoVarType(lod_tensor.dtype());
-      is_gpu_place = platform::is_gpu_place(lod_tensor.place());
+      is_gpu_place = phi::is_gpu_place(lod_tensor.place());
 #if defined(PADDLE_WITH_XPU_BKCL)
-      is_xpu_place = platform::is_xpu_place(lod_tensor.place());
+      is_xpu_place = phi::is_xpu_place(lod_tensor.place());
 #endif
     }
     PADDLE_ENFORCE_EQ(
@@ -162,13 +162,13 @@ void AllReduceOpHandle::AllReduceImpl(
             "scopes should be equal."));
 #if defined(PADDLE_WITH_XPU_BKCL)
     PADDLE_ENFORCE_EQ(is_xpu_place,
-                      platform::is_xpu_place(lod_tensor.place()),
+                      phi::is_xpu_place(lod_tensor.place()),
                       platform::errors::PreconditionNotMet(
                           "The place type of tensors of the same variable "
                           "in different local scopes should be equal."));
 #endif
     PADDLE_ENFORCE_EQ(is_gpu_place,
-                      platform::is_gpu_place(lod_tensor.place()),
+                      phi::is_gpu_place(lod_tensor.place()),
                       platform::errors::PreconditionNotMet(
                           "The place type of tensors of the same variable "
                           "in different local scopes should be equal."));
@@ -204,7 +204,7 @@ void AllReduceOpHandle::AllReduceFunc(
     int64_t numel,
     const std::vector<phi::Place> &places,
     const std::vector<std::string> &out_var_names) {
-  if (platform::is_gpu_place(places[0])) {
+  if (phi::is_gpu_place(places[0])) {
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
     PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_,
                             platform::errors::InvalidArgument(
@@ -223,7 +223,7 @@ void AllReduceOpHandle::AllReduceFunc(
     PADDLE_THROW(
         platform::errors::PreconditionNotMet("Not compiled with GPU."));
 #endif
-  } else if (platform::is_xpu_place(places[0])) {
+  } else if (phi::is_xpu_place(places[0])) {
 #if defined(PADDLE_WITH_XPU_BKCL)
     PADDLE_ENFORCE_NOT_NULL(bkcl_ctxs_,
                             platform::errors::InvalidArgument(

diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc
@@ -68,7 +68,7 @@ void BroadcastOpHandle::BroadcastOneVar(
 
   InitOutputValue(in_var_handle, out_var_handles);
 
-  if (platform::is_cpu_place(in_tensor.place())) {
+  if (phi::is_cpu_place(in_tensor.place())) {
     WaitInputVarGenerated();
     for (auto *out_var_handle : out_var_handles) {
       if (out_var_handle->IsTheSameVar(in_var_handle)) {
@@ -85,7 +85,7 @@ void BroadcastOpHandle::BroadcastOneVar(
             &VariableVisitor::GetMutableTensor(out_var));
       });
     }
-  } else if (platform::is_gpu_place(in_tensor.place())) {
+  } else if (phi::is_gpu_place(in_tensor.place())) {
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
     VarHandle *out_handle = nullptr;
     int root_id = in_tensor.place().device;  // NOLINT
@@ -249,8 +249,8 @@ void BroadcastOpHandle::InitOutputValue(
         out_var,
         platform::errors::NotFound("Variable %s is not found in scopes.",
                                    out_var_handle->name()));
-    if (platform::is_gpu_place(in_tensor.place())) {
-      PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p),
+    if (phi::is_gpu_place(in_tensor.place())) {
+      PADDLE_ENFORCE_EQ(phi::is_gpu_place(t_out_p),
                         true,
                         platform::errors::PreconditionNotMet(
                             "Places of input and output must be all on GPU."));

diff --git a/paddle/fluid/framework/details/eager_deletion_op_handle.cc b/paddle/fluid/framework/details/eager_deletion_op_handle.cc
@@ -42,9 +42,9 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
       gc_(gc),
       vars_() {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-  if (platform::is_gpu_place(place)) {
+  if (phi::is_gpu_place(place)) {
     dev_ctx_ = reinterpret_cast<phi::GPUContext *>(
-        platform::DeviceContextPool::Instance().Get(place));
+        phi::DeviceContextPool::Instance().Get(place));
     if (dynamic_cast<StreamGarbageCollector *>(gc_)) {
       platform::CUDADeviceGuard guard(place.device);
 #ifdef PADDLE_WITH_HIP

diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
@@ -91,7 +91,7 @@ void FusedAllReduceOpHandle::RunImpl() {
                       platform::errors::Unimplemented(
                           "FLAGS_allreduce_record_one_event=true is only valid "
                           "when using one GPU device per process."));
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(places_[0]),
+    PADDLE_ENFORCE_EQ(phi::is_gpu_place(places_[0]),
                       true,
                       platform::errors::Unimplemented(
                           "FLAGS_allreduce_record_one_event=true is only valid "
@@ -116,7 +116,7 @@ void FusedAllReduceOpHandle::RunImpl() {
   if (FLAGS_allreduce_record_one_event) {
     auto gpu_place = phi::GPUPlace(places_[0].GetDeviceId());
     compute_stream =
-        platform::DeviceContextPool::Instance().GetByPlace(gpu_place)->stream();
+        phi::DeviceContextPool::Instance().GetByPlace(gpu_place)->stream();
     auto flat_nccl_ctxs = nccl_ctxs_->GetFlatCtx(run_order_);
     auto &nccl_ctx = flat_nccl_ctxs->at(gpu_place.device);
     nccl_stream = nccl_ctx.stream();
@@ -320,7 +320,7 @@ bool FusedAllReduceOpHandle::InputIsInDifferentPlace(
           platform::errors::NotFound(
               "The variable '%s' is not found in local scope.", var_name));
       auto &lod_tensor = var->Get<phi::DenseTensor>();
-      if (!platform::is_same_place(lod_tensor.place(), places_.at(scope_idx))) {
+      if (!phi::is_same_place(lod_tensor.place(), places_.at(scope_idx))) {
         return true;
       }
     }
@@ -355,7 +355,7 @@ void FusedAllReduceOpHandle::GetGradLoDTensor(
     auto &lod_tensor = var->Get<phi::DenseTensor>();
 
     PADDLE_ENFORCE_EQ(
-        platform::is_same_place(lod_tensor.place(), places_.at(scope_idx)),
+        phi::is_same_place(lod_tensor.place(), places_.at(scope_idx)),
         true,
         platform::errors::InvalidArgument(
             "The variable '%s' at scope %d is not in the right place.",

diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc
@@ -182,7 +182,7 @@ void CheckVarHasNanOrInf(const std::string& op_type,
   VLOG(10) << "begin check " << op_type << " var_name:" << var_name
            << ", place:" << tensor->place() << ", numel:" << tensor->numel();
 
-  if (platform::is_gpu_place(tensor->place())) {
+  if (phi::is_gpu_place(tensor->place())) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
     tensor_check<phi::GPUContext>(op_type, var_name, *tensor, place);
 #else
@@ -192,7 +192,7 @@ void CheckVarHasNanOrInf(const std::string& op_type,
         var_name));
 #endif
     return;
-  } else if (platform::is_xpu_place(tensor->place())) {
+  } else if (phi::is_xpu_place(tensor->place())) {
 #ifdef PADDLE_WITH_XPU
     if (framework::TransToProtoVarType(tensor->dtype()) !=
         proto::VarType::FP32) {

diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.h b/paddle/fluid/framework/details/nan_inf_utils_detail.h
@@ -59,7 +59,7 @@ struct TensorCheckerVisitor {
           std::is_same<T, ::paddle::platform::complex<double>>::value>::type* =
           0) const {
     auto* dev_ctx = reinterpret_cast<Context*>(
-        platform::DeviceContextPool::Instance().Get(tensor.place()));
+        phi::DeviceContextPool::Instance().Get(tensor.place()));
 
     phi::DenseTensor stats;
     phi::DenseTensor values;

diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc
@@ -79,7 +79,7 @@ void OpHandleBase::InitCUDA() {
       auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
       if (out_var_handle) {
         PADDLE_ENFORCE_EQ(
-            platform::is_same_place(place, out_var_handle->place()),
+            phi::is_same_place(place, out_var_handle->place()),
             true,
             platform::errors::InvalidArgument(
                 "The place of output(%s) is not consistent with the "
@@ -118,7 +118,7 @@ void OpHandleBase::InitXPU() {
       auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
       if (out_var_handle) {
         PADDLE_ENFORCE_EQ(
-            platform::is_same_place(place, out_var_handle->place()),
+            phi::is_same_place(place, out_var_handle->place()),
             true,
             platform::errors::InvalidArgument(
                 "The place of output(%s) is not consistent with the "
@@ -176,7 +176,7 @@ void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) {
   PADDLE_ENFORCE_NOT_NULL(
       waited_ctx,
       platform::errors::InvalidArgument("Argument waited_ctx is NULL."));
-  if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) {
+  if (phi::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) {
     for (auto &dev_ctx : dev_ctxes_) {
       PADDLE_ENFORCE_NOT_NULL(
           dev_ctx.second,
@@ -220,7 +220,7 @@ void OpHandleBase::WaitInputVarGenerated(bool wait_for_feed) {
       auto *in_var_handle = dynamic_cast<VarHandle *>(in_var);
       if (in_var_handle) {
         auto &place = in_var_handle->place();
-        if (platform::is_gpu_place(place)) {
+        if (phi::is_gpu_place(place)) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
           auto stream =
               static_cast<phi::GPUContext *>(dev_ctxes_.at(place))->stream();
@@ -247,10 +247,9 @@ void OpHandleBase::WaitInputVarGenerated(bool wait_for_feed) {
         auto *in_var_handle = dynamic_cast<VarHandle *>(in_var);
         if (in_var_handle) {
           auto &place = in_var_handle->place();
-          if (platform::is_gpu_place(place)) {
+          if (phi::is_gpu_place(place)) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-            platform::DeviceContextPool &pool =
-                platform::DeviceContextPool::Instance();
+            phi::DeviceContextPool &pool = phi::DeviceContextPool::Instance();
             auto stream =
                 static_cast<phi::GPUContext *>(pool.Get(place))->stream();
             platform::GpuStreamSync(stream);
@@ -272,7 +271,7 @@ void OpHandleBase::WaitInputVarGenerated(const phi::Place &place) {
       // so there doesn't add event for it.
       auto *in_var_handle = dynamic_cast<VarHandle *>(in_var);
       if (in_var_handle) {
-        if (platform::is_gpu_place(in_var_handle->place())) {
+        if (phi::is_gpu_place(in_var_handle->place())) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
           auto stream = static_cast<phi::GPUContext *>(
                             dev_ctxes_.at(in_var_handle->place()))
@@ -332,7 +331,7 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
 void OpHandleBase::RunAndRecordEvent(phi::Place p,
                                      const std::function<void()> &callback) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-  if (platform::is_cpu_place(p) || events_.empty()) {
+  if (phi::is_cpu_place(p) || events_.empty()) {
     callback();
   } else {
     auto *ctx = dev_ctxes_.at(p);

diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc
@@ -112,8 +112,8 @@ void ReduceOpHandle::RunImpl() {
   // CPU.
   auto in_p = VariableVisitor::GetMutableTensor(pre_in_var).place();
   phi::Place t_out_p;
-  if (platform::is_gpu_place(in_p)) {
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_var_handle->place()),
+  if (phi::is_gpu_place(in_p)) {
+    PADDLE_ENFORCE_EQ(phi::is_gpu_place(out_var_handle->place()),
                       true,
                       platform::errors::PreconditionNotMet(
                           "Places of input and output must be all on GPU."));
@@ -134,8 +134,7 @@ void ReduceOpHandle::RunImpl() {
 
       // TODO(gongwb): add cpu support
       if (collective_context.endpoints_.size() <= 1 ||
-          platform::is_cpu_place(in_places[0]) ||
-          platform::is_cpu_place(t_out_p)) {
+          phi::is_cpu_place(in_places[0]) || phi::is_cpu_place(t_out_p)) {
         GatherLocalSelectedRowsFunctor functor(
             in_selected_rows,
             in_places,
@@ -151,7 +150,7 @@ void ReduceOpHandle::RunImpl() {
     std::vector<const phi::DenseTensor *> lod_tensors =
         GetInputValues<phi::DenseTensor>(in_var_handles, var_scopes);
 
-    if (paddle::platform::is_cpu_place(lod_tensors[0]->place())) {
+    if (phi::is_cpu_place(lod_tensors[0]->place())) {
       WaitInputVarGenerated();
       this->RunAndRecordEvent([&] {
         // FIXME(zcd): The order of summing is important,
@@ -179,7 +178,7 @@ void ReduceOpHandle::RunImpl() {
           }
         }
       });
-    } else if (paddle::platform::is_gpu_place(lod_tensors[0]->place())) {
+    } else if (phi::is_gpu_place(lod_tensors[0]->place())) {
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
       auto pre_in = pre_in_var->Get<phi::DenseTensor>();
       VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
@@ -231,7 +230,7 @@ void ReduceOpHandle::RunImpl() {
       PADDLE_THROW(
           platform::errors::PreconditionNotMet("Not compiled with CUDA."));
 #endif
-    } else if (paddle::platform::is_xpu_place(lod_tensors[0]->place())) {
+    } else if (phi::is_xpu_place(lod_tensors[0]->place())) {
 #if defined(PADDLE_WITH_XPU_BKCL)
       auto pre_in = pre_in_var->Get<phi::DenseTensor>();
       VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);

diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
@@ -57,9 +57,9 @@ struct ScaleLossGradFunctor {
   template <typename OutT>
   void apply() const {
     auto *out_data = out_->mutable_data<OutT>(place_);
-    if (platform::is_cpu_place(place_)) {
+    if (phi::is_cpu_place(place_)) {
       *out_data = static_cast<OutT>(coeff_);
-    } else if (platform::is_xpu_place(place_)) {
+    } else if (phi::is_xpu_place(place_)) {
 #if defined(PADDLE_WITH_XPU)
       OutT cast_coeff = static_cast<OutT>(coeff_);
       memory::Copy(place_,

diff --git a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
@@ -133,13 +133,13 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
   }
 
   PADDLE_ENFORCE_EQ(
-      platform::is_gpu_place(ins[0]->place()),
+      phi::is_gpu_place(ins[0]->place()),
       true,
       platform::errors::InvalidArgument(
           "The place of input variable should be CUDAPlace, but got %s.",
           ins[0]->place()));
   PADDLE_ENFORCE_EQ(
-      platform::is_gpu_place(outs[0]->place()),
+      phi::is_gpu_place(outs[0]->place()),
       true,
       platform::errors::InvalidArgument(
           "The place of input variable should be CUDAPlace, but got %s.",

diff --git a/paddle/fluid/framework/fleet/box_wrapper.cu b/paddle/fluid/framework/fleet/box_wrapper.cu
@@ -152,7 +152,7 @@ void BoxWrapper::CopyForPull(const phi::Place& place,
                              const int expand_embed_dim,
                              const int64_t total_length) {
   auto stream = dynamic_cast<phi::GPUContext*>(
-                    platform::DeviceContextPool::Instance().Get(place))
+                    phi::DeviceContextPool::Instance().Get(place))
                     ->stream();
   auto buf_value = memory::Alloc(place, values.size() * sizeof(float*));
   float** gpu_values = reinterpret_cast<float**>(buf_value->ptr());
@@ -236,7 +236,7 @@ void BoxWrapper::CopyKeys(const phi::Place& place,
                           int slot_num,
                           int total_len) {
   auto stream = dynamic_cast<phi::GPUContext*>(
-                    platform::DeviceContextPool::Instance().Get(place))
+                    phi::DeviceContextPool::Instance().Get(place))
                     ->stream();
 #ifdef PADDLE_WITH_HIP
   hipLaunchKernelGGL(CopyKeysKernel,
@@ -266,7 +266,7 @@ void BoxWrapper::CopyForPush(const phi::Place& place,
                              const int64_t total_length,
                              const int batch_size) {
   auto stream = dynamic_cast<phi::GPUContext*>(
-                    platform::DeviceContextPool::Instance().Get(place))
+                    phi::DeviceContextPool::Instance().Get(place))
                     ->stream();
   auto slot_lengths_lod = slot_lengths;
   for (int i = 1; i < slot_lengths_lod.size(); i++) {

diff --git a/paddle/fluid/framework/fleet/box_wrapper.h b/paddle/fluid/framework/fleet/box_wrapper.h
@@ -441,7 +441,7 @@ class BoxWrapper {
       for (int i = 0; i < platform::GetGPUDeviceCount(); ++i) {
         VLOG(3) << "before get context i[" << i << "]";
         phi::GPUContext* context = dynamic_cast<phi::GPUContext*>(
-            platform::DeviceContextPool::Instance().Get(phi::GPUPlace(i)));
+            phi::DeviceContextPool::Instance().Get(phi::GPUPlace(i)));
         stream_list_[i] = context->stream();
         stream_list.push_back(&stream_list_[i]);
       }

diff --git a/paddle/fluid/framework/fleet/box_wrapper_impl.h b/paddle/fluid/framework/fleet/box_wrapper_impl.h
@@ -40,10 +40,10 @@ void BoxWrapper::PullSparseCase(const phi::Place& place,
       reinterpret_cast<boxps::FeatureValueGpu<EMBEDX_DIM, EXPAND_EMBED_DIM>*>(
           buf->ptr());
 
-  if (platform::is_cpu_place(place)) {
+  if (phi::is_cpu_place(place)) {
     PADDLE_THROW(platform::errors::Unimplemented(
         "Warning:: CPUPlace is not supported in PaddleBox now."));
-  } else if (platform::is_gpu_place(place)) {
+  } else if (phi::is_gpu_place(place)) {
 #if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && !defined(_WIN32)
     VLOG(3) << "Begin copy keys, key_num[" << total_length << "]";
     int device_id = place.GetDeviceId();
@@ -149,10 +149,10 @@ void BoxWrapper::PushSparseGradCase(
       total_grad_values_gpu = reinterpret_cast<
           boxps::FeaturePushValueGpu<EMBEDX_DIM, EXPAND_EMBED_DIM>*>(
           buf->ptr());
-  if (platform::is_cpu_place(place)) {
+  if (phi::is_cpu_place(place)) {
     PADDLE_THROW(platform::errors::Unimplemented(
         "Warning:: CPUPlace is not supported in PaddleBox now."));
-  } else if (platform::is_gpu_place(place)) {
+  } else if (phi::is_gpu_place(place)) {
 #if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && !defined(_WIN32)
     int device_id = place.GetDeviceId();
     phi::DenseTensor& cached_total_keys_tensor = keys_tensor[device_id];