PaddlePaddle · chenjiaoAngel · Jul 14, 2021 · Jul 13, 2021 · Jul 14, 2021
@@ -26,8 +26,8 @@ template <typename T, PrecisionType Ptype>
 void ReduceSumCompute<T, Ptype>::Run() {
   auto& param = this->template Param<operators::ReduceParam>();
   auto* input = param.X->template data<T>();
-  auto x_dims = param.X->dims();
-  int x_rank = x_dims.size();
+  auto x_vec = param.X->dims().Vectorize();
+  int x_rank = param.X->dims().size();
   auto* Out = param.Out->template mutable_data<T>();
   std::vector<int> dim = param.dim;
   bool keep_dim = param.keep_dim;
@@ -41,6 +41,15 @@ void ReduceSumCompute<T, Ptype>::Run() {
     }
   }
 
+  for (;;) {
+    if (x_vec.size() >= 5 && x_vec[0] == 1) {
+      x_vec.erase(x_vec.begin());
+      for (auto& val : dim) val--;
+    } else
+      break;
+  }
+  auto x_dims = lite::DDim(x_vec);
+
   if (reduce_all) {
     lite::arm::math::reduce_sum_all(input, Out, x_dims.production());
   } else {

@@ -69,17 +69,20 @@ void TileCompute<T, PType>::Run() {
   auto tmp_dst = tmp_dst_tensor.mutable_data<T>();
   for (int i = 0; i < in_dims.production(); i++) {
     tmp_src[i] = in_data[i];
+    tmp_dst[i] = in_data[i];
   }
 
+  int right = 1;
   for (int i = bcast_dims.size() - 1; i >= 0; i--) {
+    right *= bcast_dims[i];
     if (bcast_dims[i] > 1) {
       int num = in_stride[1] / in_stride[i + 1];
-      int dst_stride = in_stride[i + 1] * bcast_dims[i + 1];
+      int dst_stride = in_stride[i + 1] * right;
       for (int m = 0; m < num; m++) {
         for (int j = 0; j < bcast_dims[i]; j++) {
-          std::memcpy(tmp_dst + j * dst_stride + m * bcast_dims[i] * dst_stride,
-                      tmp_src + m * in_stride[i + 1],
-                      dst_stride * sizeof(T));
+          std::memcpy(tmp_dst + j * dst_stride / bcast_dims[i] + m * dst_stride,
+                      tmp_src + m * dst_stride / bcast_dims[i],
+                      dst_stride / bcast_dims[i] * sizeof(T));
         }
       }
       tmp_src_tensor.CopyDataFrom(tmp_dst_tensor);