-
Couldn't load subscription status.
- Fork 5.9k
Call sparse op from python #40608
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Call sparse op from python #40608
Changes from 1 commit
8aa7778
f1170e9
44f121c
448e8ce
7454a9b
877e873
6523913
cec442f
1fd2649
ba632f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,29 +25,46 @@ namespace paddle { | |
| namespace experimental { | ||
| namespace sparse { | ||
|
|
||
| Tensor to_sparse_coo_impl(const Tensor& x, | ||
| Backend backend, | ||
| const int64_t sparse_dim) { | ||
| Tensor to_sparse_coo_impl(const Tensor& x, const int64_t sparse_dim) { | ||
| if (x.layout() == phi::DataLayout::SPARSE_COO) { | ||
| return x; | ||
| } | ||
|
|
||
| Backend kernel_backend = Backend::UNDEFINED; | ||
| DataLayout kernel_layout = DataLayout::UNDEFINED; | ||
| DataType kernel_data_type = DataType::UNDEFINED; | ||
| if (kernel_backend == Backend::UNDEFINED || | ||
| kernel_layout == DataLayout::UNDEFINED || | ||
| kernel_data_type == DataType::UNDEFINED) { | ||
| auto kernel_key_set = ParseKernelKeyByInputArgs(x); | ||
| auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); | ||
| if (kernel_backend == Backend::UNDEFINED) { | ||
| kernel_backend = kernel_key.backend(); | ||
| } | ||
| if (kernel_layout == DataLayout::UNDEFINED) { | ||
| kernel_layout = kernel_key.layout(); | ||
| } | ||
| if (kernel_data_type == DataType::UNDEFINED) { | ||
| kernel_data_type = kernel_key.dtype(); | ||
| } | ||
| } | ||
|
||
| // 1. Get kernel signature and kernel | ||
| auto kernel_key_set = ParseKernelKeyByInputArgs(x); | ||
| kernel_key_set.backend_set = kernel_key_set.backend_set | BackendSet(backend); | ||
| auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); | ||
| std::string kernel_name = "dense_to_sparse_coo"; | ||
| if (x.layout() == phi::DataLayout::SPARSE_CSR) { | ||
| kernel_name = "sparse_csr_to_coo"; | ||
| } | ||
|
|
||
| VLOG(6) << "kernel_name: " << kernel_name; | ||
|
|
||
| auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( | ||
| kernel_name, kernel_key); | ||
| kernel_name, {kernel_backend, kernel_layout, kernel_data_type}); | ||
|
|
||
| VLOG(6) << "to API kernel key: " << kernel_key; | ||
| VLOG(6) << "add API kernel key: [" << kernel_backend << ", " << kernel_layout | ||
| << ", " << kernel_data_type << "]"; | ||
| VLOG(6) << "to API kernel: " << kernel; | ||
|
|
||
| // 2. Get Device Context | ||
| auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); | ||
| auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); | ||
| auto kernel_context = phi::KernelContext(dev_ctx); | ||
|
|
||
| // 3. Auto data transform | ||
|
|
@@ -61,19 +78,21 @@ Tensor to_sparse_coo_impl(const Tensor& x, | |
| } | ||
|
|
||
| // 4. InferMeta | ||
| VLOG(6) << "infer meta."; | ||
| auto indices_meta = | ||
| phi::DenseTensorMeta(phi::DataType::INT64, {-1}, phi::DataLayout::NCHW); | ||
| auto elements_meta = phi::DenseTensorMeta(x.dtype(), {-1}, x.layout()); | ||
| phi::DenseTensorMeta(phi::DataType::INT64, {1}, phi::DataLayout::NCHW); | ||
| auto elements_meta = phi::DenseTensorMeta(x.dtype(), {1}, x.layout()); | ||
|
|
||
| // 5. Prepare outputs | ||
| // create empty SparseCooTensor | ||
| VLOG(6) << "create empty SparseCooTensor."; | ||
| phi::DenseTensor non_zero_indices( | ||
| phi::make_intrusive<paddle::experimental::SharedStorage>( | ||
| phi::TransToPhiPlace(backend)), | ||
| phi::TransToPhiPlace(kernel_backend)), | ||
| std::move(indices_meta)); | ||
| phi::DenseTensor non_zero_elements( | ||
| phi::make_intrusive<paddle::experimental::SharedStorage>( | ||
| phi::TransToPhiPlace(backend)), | ||
| phi::TransToPhiPlace(kernel_backend)), | ||
| std::move(elements_meta)); | ||
| auto coo = std::make_shared<phi::SparseCooTensor>( | ||
| non_zero_indices, non_zero_elements, x.dims()); | ||
|
|
@@ -83,32 +102,50 @@ Tensor to_sparse_coo_impl(const Tensor& x, | |
| out.set_impl(coo); | ||
|
|
||
| // 6. Call kernel | ||
| VLOG(6) << "call kernel "; | ||
|
||
|
|
||
| kernel(&kernel_context); | ||
|
|
||
| return out; | ||
| } | ||
|
|
||
| Tensor to_sparse_csr_impl(const Tensor& x, Backend backend) { | ||
| Tensor to_sparse_csr_impl(const Tensor& x) { | ||
| if (x.layout() == phi::DataLayout::SPARSE_CSR) { | ||
| return x; | ||
| } | ||
| Backend kernel_backend = Backend::UNDEFINED; | ||
| DataLayout kernel_layout = DataLayout::UNDEFINED; | ||
| DataType kernel_data_type = DataType::UNDEFINED; | ||
| if (kernel_backend == Backend::UNDEFINED || | ||
| kernel_layout == DataLayout::UNDEFINED || | ||
| kernel_data_type == DataType::UNDEFINED) { | ||
| auto kernel_key_set = ParseKernelKeyByInputArgs(x); | ||
| auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); | ||
| if (kernel_backend == Backend::UNDEFINED) { | ||
| kernel_backend = kernel_key.backend(); | ||
| } | ||
| if (kernel_layout == DataLayout::UNDEFINED) { | ||
| kernel_layout = kernel_key.layout(); | ||
| } | ||
| if (kernel_data_type == DataType::UNDEFINED) { | ||
| kernel_data_type = kernel_key.dtype(); | ||
| } | ||
| } | ||
| // 1. Get kernel signature and kernel | ||
| auto kernel_key_set = ParseKernelKeyByInputArgs(x); | ||
| kernel_key_set.backend_set = kernel_key_set.backend_set | BackendSet(backend); | ||
| auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); | ||
| std::string kernel_name = "dense_to_sparse_csr"; | ||
| if (x.layout() == phi::DataLayout::SPARSE_COO) { | ||
| kernel_name = "sparse_coo_to_csr"; | ||
| } | ||
|
|
||
| auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( | ||
| kernel_name, kernel_key); | ||
| kernel_name, {kernel_backend, kernel_layout, kernel_data_type}); | ||
|
|
||
| VLOG(6) << "to API kernel key: " << kernel_key; | ||
| VLOG(6) << "add API kernel key: [" << kernel_backend << ", " << kernel_layout | ||
| << ", " << kernel_data_type << "]"; | ||
| VLOG(6) << "to API kernel: " << kernel; | ||
|
|
||
| // 2. Get Device Context | ||
| auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); | ||
| auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); | ||
| auto kernel_context = phi::KernelContext(dev_ctx); | ||
|
|
||
| // 3. Auto data transform | ||
|
|
@@ -122,24 +159,24 @@ Tensor to_sparse_csr_impl(const Tensor& x, Backend backend) { | |
|
|
||
| // 4. InferMeta | ||
| auto crows_meta = | ||
| phi::DenseTensorMeta(phi::DataType::INT64, {-1}, phi::DataLayout::NCHW); | ||
| phi::DenseTensorMeta(phi::DataType::INT64, {1}, phi::DataLayout::NCHW); | ||
| auto cols_meta = | ||
| phi::DenseTensorMeta(phi::DataType::INT64, {-1}, phi::DataLayout::NCHW); | ||
| auto elements_meta = phi::DenseTensorMeta(x.dtype(), {-1}, x.layout()); | ||
| phi::DenseTensorMeta(phi::DataType::INT64, {1}, phi::DataLayout::NCHW); | ||
| auto elements_meta = phi::DenseTensorMeta(x.dtype(), {1}, x.layout()); | ||
|
|
||
| // 5. Prepare outputs | ||
| // create empty SparseCooTensor | ||
| phi::DenseTensor non_zero_crows( | ||
| phi::make_intrusive<paddle::experimental::SharedStorage>( | ||
| phi::TransToPhiPlace(backend)), | ||
| phi::TransToPhiPlace(kernel_backend)), | ||
| std::move(crows_meta)); | ||
| phi::DenseTensor non_zero_cols( | ||
| phi::make_intrusive<paddle::experimental::SharedStorage>( | ||
| phi::TransToPhiPlace(backend)), | ||
| phi::TransToPhiPlace(kernel_backend)), | ||
| std::move(cols_meta)); | ||
| phi::DenseTensor non_zero_elements( | ||
| phi::make_intrusive<paddle::experimental::SharedStorage>( | ||
| phi::TransToPhiPlace(backend)), | ||
| phi::TransToPhiPlace(kernel_backend)), | ||
| std::move(elements_meta)); | ||
| auto csr = std::make_shared<phi::SparseCsrTensor>( | ||
| non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); | ||
|
|
@@ -154,28 +191,44 @@ Tensor to_sparse_csr_impl(const Tensor& x, Backend backend) { | |
| return out; | ||
| } | ||
|
|
||
| Tensor to_dense_impl(const Tensor& x, Backend backend) { | ||
| Tensor to_dense_impl(const Tensor& x) { | ||
| if (x.layout() != phi::DataLayout::SPARSE_CSR && | ||
| x.layout() != phi::DataLayout::SPARSE_COO) { | ||
| return x; | ||
| } | ||
| Backend kernel_backend = Backend::UNDEFINED; | ||
| DataLayout kernel_layout = DataLayout::UNDEFINED; | ||
| DataType kernel_data_type = DataType::UNDEFINED; | ||
| if (kernel_backend == Backend::UNDEFINED || | ||
| kernel_layout == DataLayout::UNDEFINED || | ||
| kernel_data_type == DataType::UNDEFINED) { | ||
| auto kernel_key_set = ParseKernelKeyByInputArgs(x); | ||
| auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); | ||
| if (kernel_backend == Backend::UNDEFINED) { | ||
| kernel_backend = kernel_key.backend(); | ||
| } | ||
| if (kernel_layout == DataLayout::UNDEFINED) { | ||
| kernel_layout = kernel_key.layout(); | ||
| } | ||
| if (kernel_data_type == DataType::UNDEFINED) { | ||
| kernel_data_type = kernel_key.dtype(); | ||
| } | ||
| } | ||
| // 1. Get kernel signature and kernel | ||
| auto kernel_key_set = ParseKernelKeyByInputArgs(x); | ||
| kernel_key_set.backend_set = kernel_key_set.backend_set | BackendSet(backend); | ||
| auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); | ||
| std::string kernel_name = "sparse_coo_to_dense"; | ||
| if (x.layout() == phi::DataLayout::SPARSE_CSR) { | ||
| kernel_name = "sparse_csr_to_dense"; | ||
| } | ||
|
|
||
| auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( | ||
| kernel_name, kernel_key); | ||
| kernel_name, {kernel_backend, kernel_layout, kernel_data_type}); | ||
|
|
||
| VLOG(6) << "to API kernel key: " << kernel_key; | ||
| VLOG(6) << "add API kernel key: [" << kernel_backend << ", " << kernel_layout | ||
| << ", " << kernel_data_type << "]"; | ||
| VLOG(6) << "to API kernel: " << kernel; | ||
|
|
||
| // 2. Get Device Context | ||
| auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); | ||
| auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); | ||
| auto kernel_context = phi::KernelContext(dev_ctx); | ||
|
|
||
| // 3. Auto data transform | ||
|
|
@@ -194,7 +247,7 @@ Tensor to_dense_impl(const Tensor& x, Backend backend) { | |
| // create empty SparseCooTensor | ||
| auto dense_out = std::make_shared<phi::DenseTensor>( | ||
| phi::make_intrusive<paddle::experimental::SharedStorage>( | ||
| phi::TransToPhiPlace(backend)), | ||
| phi::TransToPhiPlace(kernel_backend)), | ||
| std::move(dense_meta)); | ||
|
|
||
| kernel_context.EmplaceBackOutput(dense_out.get()); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ limitations under the License. */ | |
| #include <thrust/execution_policy.h> | ||
| #include <thrust/remove.h> | ||
|
|
||
| #include "glog/logging.h" | ||
| #include "paddle/phi/backends/gpu/gpu_context.h" | ||
| #include "paddle/phi/backends/gpu/gpu_launch_config.h" | ||
| #include "paddle/phi/core/kernel_registry.h" | ||
|
|
@@ -93,6 +94,7 @@ void DenseToSparseCooKernel(const Context& dev_ctx, | |
| const DenseTensor& x, | ||
| const int64_t sparse_dim, | ||
| SparseCooTensor* out) { | ||
| VLOG(6) << "enter DenseToSparseCooKernel."; | ||
| const T* x_data = x.data<T>(); | ||
| const auto& x_dims = x.dims(); | ||
| auto dims_2d = flatten_to_2d(x_dims, sparse_dim); | ||
|
|
@@ -123,6 +125,7 @@ void DenseToSparseCooKernel(const Context& dev_ctx, | |
| phi::DenseTensorMeta(DataType::INT32, {rows}, phi::DataLayout::NCHW); | ||
| DenseTensor temp_indexs = phi::Empty(dev_ctx, std::move(temp_indexs_meta)); | ||
| int* temp_indexs_ptr = temp_indexs.mutable_data<int>(place); | ||
| VLOG(6) << "get the number of non-zero elements."; | ||
| GetNonZeroNums<<<config.block_per_grid.x, | ||
| config.thread_per_block.x, | ||
| 0, | ||
|
|
@@ -171,6 +174,8 @@ void DenseToSparseCooKernel(const Context& dev_ctx, | |
|
|
||
| dev_ctx.Wait(); // wait the copy | ||
|
|
||
| VLOG(6) << "alloc SparseCooTensor"; | ||
|
|
||
| const auto values_dims = | ||
| phi::funcs::sparse::InferDenseDims(x_dims, sparse_dim, non_zero_num); | ||
| DenseTensorMeta indices_meta(DataType::INT64, | ||
|
|
@@ -189,6 +194,7 @@ void DenseToSparseCooKernel(const Context& dev_ctx, | |
| T* sparse_data = values.mutable_data<T>(place); | ||
|
|
||
| // 3. calc indices by indexs and get values by indexs | ||
| VLOG(6) << "calc indices.."; | ||
| config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, non_zero_num, 1); | ||
| GetNonZeroElementsAndIndices<<<config.block_per_grid.x, | ||
| config.thread_per_block.x, | ||
|
|
@@ -201,7 +207,9 @@ void DenseToSparseCooKernel(const Context& dev_ctx, | |
| temp_indexs_ptr, | ||
| indices_data, | ||
| sparse_data); | ||
| VLOG(6) << "set member"; | ||
|
||
| out->SetMember(indices, values, x_dims, true); | ||
| VLOG(6) << "leave DenseToSparseCoo"; | ||
| } | ||
|
|
||
| __global__ void GetBatchSizes(const int64_t* crows, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
多了行注释
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done