Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions paddle/fluid/pybind/eager_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ static PyObject* eager_api_scale(PyObject* self,
paddle::Tensor ret;
{
eager_gil_scoped_release guard;
EagerSetDeviceId();
ret = egr::scale(tensor, scale, bias, bias_after_scale, trace_backward);
}
return ToPyObject(ret);
Expand All @@ -159,6 +160,7 @@ static PyObject* eager_api_run_backward(PyObject* self,
}
{
eager_gil_scoped_release guard;
EagerSetDeviceId();
egr::Backward(tensors, grad_tensors, retain_graph);
}
RETURN_PY_NONE
Expand Down Expand Up @@ -189,6 +191,7 @@ static PyObject* eager_api_run_partial_grad(PyObject* self,
std::vector<paddle::Tensor> result;
{
eager_gil_scoped_release guard;
EagerSetDeviceId();
result = egr::Grad(tensors,
inputs,
grad_tensors,
Expand Down Expand Up @@ -216,6 +219,7 @@ static PyObject* eager_api_tensor_copy(PyObject* self,

{
eager_gil_scoped_release guard;
EagerSetDeviceId();
dst = src.copy_to(place, blocking);
egr::EagerUtils::autograd_meta(&dst)->SetStopGradient(
egr::EagerUtils::autograd_meta(&(src))->StopGradient());
Expand Down Expand Up @@ -461,6 +465,7 @@ static PyObject* eager_api_jit_function_call(PyObject* self,
std::vector<paddle::Tensor> outs;
{
eager_gil_scoped_release guard;
EagerSetDeviceId();
outs = (*function)(ins);
}
return ToPyObject(outs);
Expand Down Expand Up @@ -672,6 +677,7 @@ PyObject* eager_api_run_custom_op(PyObject* self,

{
eager_gil_scoped_release guard;
EagerSetDeviceId();
ctx.ConstructInplaceIndex(inputs, outputs, inplace_map);
const auto& inplace_reverse_idx_map = ctx.GetInplaceReverseIndexMap();
for (size_t out_idx = 0; out_idx < outputs.size(); ++out_idx) {
Expand Down Expand Up @@ -872,6 +878,7 @@ static PyObject* eager_api_sparse_coo_tensor(PyObject* self,
paddle::Tensor tensor;
{
eager_gil_scoped_release guard;
EagerSetDeviceId();
PADDLE_ENFORCE(
non_zero_indices.is_dense_tensor(),
common::errors::Fatal("the non-zero indices must be a DenseTensor."));
Expand Down Expand Up @@ -916,6 +923,7 @@ static PyObject* eager_api_sparse_csr_tensor(PyObject* self,
paddle::Tensor tensor;
{
eager_gil_scoped_release guard;
EagerSetDeviceId();
PADDLE_ENFORCE(non_zero_crows.is_dense_tensor(),
common::errors::Fatal(
"the compressed non-zero rows must be a DenseTensor."));
Expand Down Expand Up @@ -996,6 +1004,7 @@ static PyObject* eager_api_async_read(PyObject* self,

{
eager_gil_scoped_release guard;
EagerSetDeviceId();
PADDLE_ENFORCE_EQ(
src.is_gpu_pinned(),
true,
Expand Down Expand Up @@ -1175,6 +1184,7 @@ static PyObject* eager_api_async_write(PyObject* self,
}
{
eager_gil_scoped_release guard;
EagerSetDeviceId();
PADDLE_ENFORCE_EQ(
src.is_gpu(),
true,
Expand Down
16 changes: 16 additions & 0 deletions paddle/fluid/pybind/eager_method.cc
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,9 @@ static PyObject* tensor_method__copy_to(TensorObject* self,
paddle::Tensor cp_tensor;
{
eager_gil_scoped_release guard;

EagerSetDeviceId();

cp_tensor = self->tensor.copy_to(place, blocking);
if (!blocking) {
IncreaseTensorReferenceCountUntilCopyComplete(self->tensor, place);
Expand Down Expand Up @@ -690,6 +693,9 @@ static PyObject* tensor_method_copy_(TensorObject* self,
<< self->tensor.name();
if (!self->tensor.initialized()) {
eager_gil_scoped_release guard;

EagerSetDeviceId();

egr::EagerUtils::autograd_meta(&(self->tensor))
->SetStopGradient(
egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient());
Expand All @@ -702,6 +708,9 @@ static PyObject* tensor_method_copy_(TensorObject* self,
} else {
if (src_tensor.has_allocation()) {
eager_gil_scoped_release guard;

EagerSetDeviceId();

self->tensor.copy_(src_tensor, self->tensor.place(), blocking);
}
}
Expand Down Expand Up @@ -766,6 +775,9 @@ static PyObject* tensor_method_clone(TensorObject* self,
paddle::Tensor out;
{
eager_gil_scoped_release guard;

EagerSetDeviceId();

PADDLE_ENFORCE_EQ(
self->tensor.initialized(),
true,
Expand Down Expand Up @@ -922,6 +934,7 @@ static PyObject* tensor_clear_gradient(TensorObject* self,
->unsafe_mutable_value();
}
if (set_to_zero) {
EagerSetDeviceId();
auto* dev_ctx =
phi::DeviceContextPool::Instance().Get(grad_t->place());
phi::funcs::set_constant(*dev_ctx, grad_t, 0.0);
Expand Down Expand Up @@ -952,6 +965,7 @@ static PyObject* tensor__zero_grads(TensorObject* self,

if (egr::EagerUtils::IsLeafTensor(self->tensor)) {
eager_gil_scoped_release guard;
EagerSetDeviceId();
// Add RetainGrad as PostHook to AccumulationNode
paddle::Tensor* grad = egr::EagerUtils::mutable_grad(self->tensor);
PADDLE_ENFORCE(
Expand All @@ -976,6 +990,7 @@ static PyObject* tensor__zero_grads(TensorObject* self,
}
} else {
eager_gil_scoped_release guard;
EagerSetDeviceId();
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor);
if (meta->MutableGrad()->initialized()) {
if (meta->MutableGrad()->is_dense_tensor() ||
Expand Down Expand Up @@ -3250,6 +3265,7 @@ static PyObject* tensor_contiguous(TensorObject* self,
return reinterpret_cast<PyObject*>(self);
} else {
eager_gil_scoped_release guard;
EagerSetDeviceId();
*dense_tensor = paddle::experimental::Trans2Contiguous(*dense_tensor);
Py_INCREF(self);
return reinterpret_cast<PyObject*>(self);
Expand Down
35 changes: 35 additions & 0 deletions paddle/fluid/pybind/eager_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2797,4 +2797,39 @@ CvtPlacements(Placements placements, int ndim) {
return {dim_map, partial_status};
}

void EagerSetDeviceId() {
auto expected_place = egr::Controller::Instance().GetExpectedPlace();

if (phi::is_gpu_place(expected_place)) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
phi::backends::gpu::SetDeviceId(expected_place.device);
VLOG(4) << "CurrentDeviceId: " << phi::backends::gpu::GetCurrentDeviceId()
<< " from " << (int)expected_place.device; // NOLINT
#else
PADDLE_THROW(common::errors::PreconditionNotMet(
"PaddlePaddle should compile with GPU if use CUDAPlace."));
#endif
} else if (phi::is_custom_place(expected_place)) {
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
phi::DeviceManager::SetDevice(expected_place);
VLOG(4) << "CurrentDeviceId: "
<< phi::DeviceManager::GetDevice(expected_place.GetDeviceType())
<< " from " << (int)expected_place.device; // NOLINT
#else
PADDLE_THROW(common::errors::PreconditionNotMet(
"PaddlePaddle should compile with CUSTOM_DEVICE if use CustomPlace."));
#endif
} else if (phi::is_xpu_place(expected_place)) {
#if defined(PADDLE_WITH_XPU)
phi::backends::xpu::SetXPUDeviceId(expected_place.device);
VLOG(4) << "CurrentDeviceId: "
<< phi::backends::xpu::GetXPUCurrentDeviceId() << " from "
<< (int)expected_place.device; // NOLINT
#else
PADDLE_THROW(common::errors::PreconditionNotMet(
"PaddlePaddle should compile with XPU if use XPUPlace."));
#endif
}
}

} // namespace paddle::pybind
2 changes: 2 additions & 0 deletions paddle/fluid/pybind/eager_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,5 +449,7 @@ std::tuple<std::vector<int64_t>,
paddle::flat_hash_map<int64_t, phi::ReduceType>>
CvtPlacements(phi::distributed::Placements placements, int ndim);

void EagerSetDeviceId();

} // namespace pybind
} // namespace paddle