PaddlePaddle · zyfncg · Nov 24, 2021 · Nov 22, 2021 · Nov 22, 2021 · Nov 22, 2021
diff --git a/paddle/fluid/operators/cumsum_op.cu b/paddle/fluid/operators/cumsum_op.cu
@@ -254,7 +254,7 @@ class CumCUDAKernel : public framework::OpKernel<T> {
     dim3 transpose_grids((width + tile_size - 1) / tile_size,
                          (height + tile_size - 1) / tile_size);
     auto& dev_ctx = context.template device_context<DeviceContext>();
-    Tensor tmp;
+    framework::Tensor tmp;
     tmp.Resize(out_dims);
     auto* tmp_data = tmp.mutable_data<T>(context.GetPlace());
     T* next_in_data = out_data;

diff --git a/paddle/fluid/operators/math/tree2col.cc b/paddle/fluid/operators/math/tree2col.cc
@@ -19,7 +19,6 @@
 namespace paddle {
 namespace operators {
 namespace math {
-using Tensor = framework::Tensor;
 std::vector<TreeNode> Tree2ColUtil::construct_patch(
     size_t root, int max_depth, const std::vector<std::vector<int>> &tr) {
   std::stack<TreeNode, std::deque<TreeNode>> stack;
@@ -51,7 +50,7 @@ std::vector<TreeNode> Tree2ColUtil::construct_patch(
   return patch;
 }
 
-void Tree2ColUtil::construct_tree(const paddle::Tensor &EdgeSet,
+void Tree2ColUtil::construct_tree(const framework::Tensor &EdgeSet,
                                   std::vector<std::vector<int>> *tr,
                                   size_t *node_count) {
   auto edge_set_dims = EdgeSet.dims();

diff --git a/paddle/fluid/operators/math/tree2col.h b/paddle/fluid/operators/math/tree2col.h
@@ -21,8 +21,6 @@
 #include "paddle/fluid/operators/math/math_function.h"
 
 namespace paddle {
-using Tensor = framework::Tensor;
-using DDim = framework::DDim;
 namespace operators {
 namespace math {
 class TreeNode {
@@ -64,7 +62,7 @@ class Tree2ColUtil {
   static std::vector<TreeNode> construct_patch(
       size_t root, int max_depth, const std::vector<std::vector<int>> &tr);
 
-  static void construct_tree(const Tensor &EdgeSet,
+  static void construct_tree(const framework::Tensor &EdgeSet,
                              std::vector<std::vector<int>> *tr,
                              size_t *node_count);
 };

diff --git a/paddle/pten/api/all.h b/paddle/pten/api/all.h
@@ -37,6 +37,7 @@ limitations under the License. */
 #include "paddle/pten/common/data_type.h"
 #include "paddle/pten/common/layout.h"
 #include "paddle/pten/common/scalar.h"
+#include "paddle/pten/common/scalar_array.h"
 
 // original custom op headers
 #include "paddle/pten/api/ext/dispatch.h"

diff --git a/paddle/pten/api/include/creation.h b/paddle/pten/api/include/creation.h
@@ -18,6 +18,7 @@
 #include "paddle/pten/common/backend.h"
 #include "paddle/pten/common/data_type.h"
 #include "paddle/pten/common/scalar.h"
+#include "paddle/pten/common/scalar_array.h"
 
 namespace paddle {
 namespace experimental {
@@ -28,6 +29,12 @@ PD_DLL_DECL Tensor full(const std::vector<int64_t>& shape,
                         Backend backend = Backend::CPU,
                         DataLayout layout = DataLayout::NCHW);
 
+PD_DLL_DECL Tensor full_new(const ScalarArray& shape,
+                            const Scalar& value,
+                            DataType dtype = DataType::FLOAT32,
+                            Backend backend = Backend::CPU,
+                            DataLayout layout = DataLayout::NCHW);
+
 PD_DLL_DECL Tensor full_like(const Tensor& x,
                              const Scalar& value,
                              DataType dtype = DataType::UNDEFINED,

diff --git a/paddle/pten/api/lib/creation.cc b/paddle/pten/api/lib/creation.cc
@@ -42,7 +42,43 @@ PD_DLL_DECL Tensor full(const std::vector<int64_t>& shape,
   auto kernel_context = pten::KernelContext(dev_ctx);
 
   // 3. Auto data transform
-  kernel_context.EmplaceBackAttr(value);
+  kernel_context.EmplaceBackAttr(pten::Scalar(value));
+
+  // 4. InferShape
+  auto out_meta = pten::FullInferShape(shape, dtype, layout);
+
+  // 5. Prepare outputs
+  const auto allocator =
+      std::make_shared<paddle::experimental::DefaultAllocator>(
+          pten::TransToFluidPlace(kernel_key.backend()));
+  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
+  kernel_context.EmplaceBackOutput(dense_out);
+  Tensor out;
+  out.set_impl(dense_out);
+
+  // 6. Call kernel
+  kernel(&kernel_context);
+
+  return out;
+}
+
+PD_DLL_DECL Tensor full_new(const ScalarArray& shape,
+                            const Scalar& value,
+                            DataType dtype,
+                            Backend backend,
+                            DataLayout layout) {
+  // 1. Get kernel signature and kernel
+  pten::KernelKey kernel_key{backend, layout, dtype};
+  auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
+      "fill_constant.new", kernel_key);
+
+  // 2. Get Device Context
+  auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
+  auto kernel_context = pten::KernelContext(dev_ctx);
+
+  // 3. Auto data transform
+  kernel_context.EmplaceBackAttr(pten::ScalarArray(shape));
+  kernel_context.EmplaceBackAttr(pten::Scalar(value));
 
   // 4. InferShape
   auto out_meta = pten::FullInferShape(shape, dtype, layout);
@@ -87,7 +123,7 @@ PD_DLL_DECL Tensor full_like(const Tensor& x,
 
   // 3. Auto data transform
   auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
-  kernel_context.EmplaceBackAttr(value);
+  kernel_context.EmplaceBackAttr(pten::Scalar(value));
 
   // 4. InferShape
   auto out_meta = FullLikeInferShape(dense_x->meta(), dtype, layout);

diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc
@@ -214,17 +214,22 @@ const T *Tensor::data() const {
 template PD_DLL_DECL const float *Tensor::data<float>() const;
 template PD_DLL_DECL const double *Tensor::data<double>() const;
 template PD_DLL_DECL const int64_t *Tensor::data<int64_t>() const;
+template PD_DLL_DECL const uint64_t *Tensor::data<uint64_t>() const;
 template PD_DLL_DECL const int32_t *Tensor::data<int32_t>() const;
+template PD_DLL_DECL const uint32_t *Tensor::data<uint32_t>() const;
 template PD_DLL_DECL const uint8_t *Tensor::data<uint8_t>() const;
 template PD_DLL_DECL const int8_t *Tensor::data<int8_t>() const;
 template PD_DLL_DECL const int16_t *Tensor::data<int16_t>() const;
+template PD_DLL_DECL const uint16_t *Tensor::data<uint16_t>() const;
 template PD_DLL_DECL const bool *Tensor::data<bool>() const;
 template PD_DLL_DECL const paddle::platform::complex<float>
     *Tensor::data<paddle::platform::complex<float>>() const;
 template PD_DLL_DECL const paddle::platform::complex<double>
     *Tensor::data<paddle::platform::complex<double>>() const;
 template PD_DLL_DECL const paddle::platform::float16 *
 Tensor::data<paddle::platform::float16>() const;
+template PD_DLL_DECL const paddle::platform::bfloat16 *
+Tensor::data<paddle::platform::bfloat16>() const;
 
 template <typename T>
 T *Tensor::data() {