PaddlePaddle
diff --git a/‎paddle/cinn/common/ir_util.h‎
Lines changed: 1 addition & 1 deletion b/‎paddle/cinn/common/ir_util.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/cinn/hlir/pe/ir_schedule_pe.cc‎
Lines changed: 44 additions & 45 deletions b/‎paddle/cinn/hlir/pe/ir_schedule_pe.cc‎
Lines changed: 44 additions & 45 deletions
diff --git a/‎paddle/cinn/ir/ir_printer.h‎
Lines changed: 1 addition & 1 deletion b/‎paddle/cinn/ir/ir_printer.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/cinn/ir/schedule/impl/compute_location.cc‎
Lines changed: 2 additions & 2 deletions b/‎paddle/cinn/ir/schedule/impl/compute_location.cc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/cinn/ir/schedule/ir_schedule.h‎
Lines changed: 6 additions & 6 deletions b/‎paddle/cinn/ir/schedule/ir_schedule.h‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎paddle/cinn/ir/schedule/ir_schedule_util.h‎
Lines changed: 1 addition & 1 deletion b/‎paddle/cinn/ir/schedule/ir_schedule_util.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/cinn/lang/lower_impl.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/cinn/lang/lower_impl.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/pybind/auto_parallel_py.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/pybind/auto_parallel_py.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/phi/core/distributed/auto_parallel/process_mesh.h‎
Lines changed: 2 additions & 2 deletions b/‎paddle/phi/core/distributed/auto_parallel/process_mesh.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/phi/core/distributed/auto_parallel/reshard/global_and_sub_mesh_reshard_function.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/phi/core/distributed/auto_parallel/reshard/global_and_sub_mesh_reshard_function.cc‎
Lines changed: 1 addition & 1 deletion
@@ -191,7 +191,7 @@ inline void UnpackReduction(const ir::IndexExpr &expr, FLeaf fleaf) {
 }
 
 /*!
- * \brief Flatten the expression into a vector of expressions splited by `Add`
+ * \brief Flatten the expression into a vector of expressions split by `Add`
  * or `Mul`.
  *
  * For example (Add):
 
@@ -83,9 +83,9 @@ void IRElementwiseSchedule(ir::IRSchedule &ir_sch,  // NOLINT
     if (size <= target.max_num_threads()) {
       ir_sch.Bind(loop, "threadIdx.x");
     } else {
-      auto splited = ir_sch.Split(loop, {-1, target.max_num_threads()});
-      ir_sch.Bind(splited[0], "blockIdx.x");
-      ir_sch.Bind(splited[1], "threadIdx.x");
+      auto split = ir_sch.Split(loop, {-1, target.max_num_threads()});
+      ir_sch.Bind(split[0], "blockIdx.x");
+      ir_sch.Bind(split[1], "threadIdx.x");
     }
   };
   target.arch.Match(
@@ -117,9 +117,9 @@ void IRInjectiveSchedule(ir::IRSchedule &ir_sch,  // NOLINT
     if (size <= target.max_num_threads()) {
       ir_sch.Bind(loop, "threadIdx.x");
     } else {
-      auto splited = ir_sch.Split(loop, {-1, target.max_num_threads()});
-      ir_sch.Bind(splited[0], "blockIdx.x");
-      ir_sch.Bind(splited[1], "threadIdx.x");
+      auto split = ir_sch.Split(loop, {-1, target.max_num_threads()});
+      ir_sch.Bind(split[0], "blockIdx.x");
+      ir_sch.Bind(split[1], "threadIdx.x");
     }
   };
   target.arch.Match(
@@ -172,10 +172,10 @@ void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch,  // NOLINT
       auto loops      = ir_sch.GetLoops(all_blocks[0]);
       int last_shape  = ir::GetLoopExtent(loops.back());
       factor          = GetVectorizeFactor(last_shape, factor);
-      auto splited    = ir_sch.Split(loops.back(), {-1, factor});
-      ir_sch.Vectorize(splited[1], factor);
+      auto split    = ir_sch.Split(loops.back(), {-1, factor});
+      ir_sch.Vectorize(split[1], factor);
       if (dims == 1) {
-        ir_sch.Parallel(splited[0]);
+        ir_sch.Parallel(split[0]);
       }
     } */
   VLOG(3) << "After IRScheduleInjectiveCPU, new ir is : "
@@ -195,9 +195,9 @@ void IRGpuScheduleInjective(ir::IRSchedule &ir_sch,  // NOLINT
   int prod_size = std::accumulate(
       output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
   if (prod_size > num_thread) {
-    auto splited = ir_sch.Split(fused, {-1, num_thread});
-    ir_sch.Bind(splited[0], "blockIdx.x");
-    ir_sch.Bind(splited[1], "threadIdx.x");
+    auto split = ir_sch.Split(fused, {-1, num_thread});
+    ir_sch.Bind(split[0], "blockIdx.x");
+    ir_sch.Bind(split[1], "threadIdx.x");
   } else {
     ir_sch.Bind(fused, "threadIdx.x");
   }
@@ -242,9 +242,9 @@ std::vector<cinn::common::CINNValue> IRGpuScheduleMatMul(
     auto loops = ir_sch.GetLoops(init_block);
     if (loops.size() == 1) {
       if (ir::GetLoopExtent(loops[0]) > num_thread) {
-        auto splited = ir_sch.Split(loops[0], {-1, num_thread});
-        ir_sch.Bind(splited[0], "blockIdx.x");
-        ir_sch.Bind(splited[1], "threadIdx.x");
+        auto split = ir_sch.Split(loops[0], {-1, num_thread});
+        ir_sch.Bind(split[0], "blockIdx.x");
+        ir_sch.Bind(split[1], "threadIdx.x");
       } else {
         ir_sch.Bind(loops[0], "threadIdx.x");
       }
@@ -273,7 +273,7 @@ void IRCudaScheduleMul(ir::IRSchedule &ir_sch,  // NOLINT
                     2U,
                     ::common::errors::InvalidArgument(
                         "The size of loops should be greater than 2."));
-  auto splited = ir_sch.Split(loops[1], {-1, 2});
+  auto split = ir_sch.Split(loops[1], {-1, 2});
   all_blocks = ir_sch.GetAllBlocks();
   loops = ir_sch.GetLoops(all_blocks.back());
   ir_sch.Bind(loops[0], "blockIdx.x");
@@ -349,15 +349,14 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch,  // NOLINT
 
         if (tsize > target.max_num_threads()) {
           // split [-1, 256]
-          auto splited = ir_sch.Split(ir_sch.GetLoops(block_name)[0],
-                                      {-1, target.max_num_threads() / 4});
-          ir_sch.Bind(splited[0], "blockIdx.x");
-          ir_sch.Bind(splited[1], "threadIdx.x");
+          auto split = ir_sch.Split(ir_sch.GetLoops(block_name)[0],
+                                    {-1, target.max_num_threads() / 4});
+          ir_sch.Bind(split[0], "blockIdx.x");
+          ir_sch.Bind(split[1], "threadIdx.x");
         } else {
-          auto splited =
-              ir_sch.Split(ir_sch.GetLoops(block_name)[0], {1, tsize});
-          ir_sch.Bind(splited[0], "blockIdx.x");
-          ir_sch.Bind(splited[1], "threadIdx.x");
+          auto split = ir_sch.Split(ir_sch.GetLoops(block_name)[0], {1, tsize});
+          ir_sch.Bind(split[0], "blockIdx.x");
+          ir_sch.Bind(split[1], "threadIdx.x");
         }
       }
     } else {
@@ -373,15 +372,15 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch,  // NOLINT
           auto tsize = first_loop.As<ir::For>()->extent.as_int32();
           if (tsize > target.max_num_threads()) {
             // split [-1, 256]
-            auto splited = ir_sch.Split(ir_sch.GetLoops(block_names[idx])[0],
-                                        {-1, target.max_num_threads() / 4});
-            ir_sch.Bind(splited[0], "blockIdx.x");
-            ir_sch.Bind(splited[1], "threadIdx.x");
+            auto split = ir_sch.Split(ir_sch.GetLoops(block_names[idx])[0],
+                                      {-1, target.max_num_threads() / 4});
+            ir_sch.Bind(split[0], "blockIdx.x");
+            ir_sch.Bind(split[1], "threadIdx.x");
           } else {
-            auto splited =
+            auto split =
                 ir_sch.Split(ir_sch.GetLoops(block_names[idx])[0], {1, tsize});
-            ir_sch.Bind(splited[0], "blockIdx.x");
-            ir_sch.Bind(splited[1], "threadIdx.x");
+            ir_sch.Bind(split[0], "blockIdx.x");
+            ir_sch.Bind(split[1], "threadIdx.x");
           }
         }
       }
@@ -1180,9 +1179,9 @@ void IRPoolScheduleGPU(ir::IRSchedule &ir_sch,  // NOLINT
   // Blocks were changed after Fuse, so we have to get all blocks again.
   all_blocks = ir_sch.GetAllBlocks();
   loops = ir_sch.GetLoops(all_blocks[0]);
-  auto splited = ir_sch.Split(loops[0], {-1, 1024});
-  ir_sch.Bind(splited[0], "blockIdx.x");
-  ir_sch.Bind(splited[1], "threadIdx.x");
+  auto split = ir_sch.Split(loops[0], {-1, 1024});
+  ir_sch.Bind(split[0], "blockIdx.x");
+  ir_sch.Bind(split[1], "threadIdx.x");
   VLOG(3) << "End IRPoolScheduleGPU: " << ir_sch.GetModule().GetExprs().at(0);
 }
 
@@ -1198,14 +1197,14 @@ void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch,  // NOLINT
   auto loops = ir_sch.GetLoops(all_blocks[1]);
   if (loops.size() > 1) {
     auto fused = ir_sch.Fuse(all_blocks[0], {0, 1});
-    auto splited = ir_sch.Split(fused, {-1, 32});
+    auto split = ir_sch.Split(fused, {-1, 32});
     all_blocks = ir_sch.GetAllBlocks();
     fused = ir_sch.Fuse(all_blocks[1], {0, 1});
-    splited = ir_sch.Split(fused, {-1, 32});
-    ir_sch.Bind(splited[0], "blockIdx.x");
-    ir_sch.Bind(splited[1], "threadIdx.y");
+    split = ir_sch.Split(fused, {-1, 32});
+    ir_sch.Bind(split[0], "blockIdx.x");
+    ir_sch.Bind(split[1], "threadIdx.y");
     all_blocks = ir_sch.GetAllBlocks();
-    ir_sch.SimpleComputeAt(all_blocks[0], splited[1]);
+    ir_sch.SimpleComputeAt(all_blocks[0], split[1]);
     all_blocks = ir_sch.GetAllBlocks();
     ir_sch.SetBuffer(all_blocks[0], "local", true);
     loops = ir_sch.GetLoops(all_blocks[0]);
@@ -1218,15 +1217,15 @@ void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch,  // NOLINT
     ir_sch.Bind(loops[2], "threadIdx.x");
   } else {
     loops = ir_sch.GetLoops(all_blocks[0]);
-    auto splited = ir_sch.Split(loops[0], {-1, 32});
+    auto split = ir_sch.Split(loops[0], {-1, 32});
     all_blocks = ir_sch.GetAllBlocks();
     loops = ir_sch.GetLoops(all_blocks[1]);
-    splited = ir_sch.Split(loops[0], {-1, 32});
-    ir_sch.Bind(splited[0], "blockIdx.x");
-    ir_sch.Bind(splited[1], "threadIdx.y");
+    split = ir_sch.Split(loops[0], {-1, 32});
+    ir_sch.Bind(split[0], "blockIdx.x");
+    ir_sch.Bind(split[1], "threadIdx.y");
     all_blocks = ir_sch.GetAllBlocks();
-    splited = ir_sch.GetLoops(all_blocks[1]);
-    ir_sch.SimpleComputeAt(all_blocks[0], splited[1]);
+    split = ir_sch.GetLoops(all_blocks[1]);
+    ir_sch.SimpleComputeAt(all_blocks[0], split[1]);
     all_blocks = ir_sch.GetAllBlocks();
     ir_sch.SetBuffer(all_blocks[0], "local", true);
     loops = ir_sch.GetLoops(all_blocks[0]);
 
@@ -41,7 +41,7 @@ struct IrPrinter : public IRVisitorRequireReImpl<void>,
   void Print(const stmt::StmtRef &stmt);
   //! Emit a block on the output stream.
   void Print(const stmt::BlockRef &block);
-  //! Emit a expression list with , splitted.
+  //! Emit a expression list with , split.
   void Print(const std::vector<Expr> &exprs,
              const std::string &splitter = ", ");
   //! Emit a binary operator
 
@@ -123,8 +123,8 @@ void DyScheduleImpl::SimpleComputeAt(const Expr& block, const Expr& loop) {
               GetLoopExtent(loops[0]) != 1) &&
              block_loops[0].As<ir::For>()->extent.is_constant() &&
              GetLoopExtent(block_loops[0]) == 1) {
-    auto splited = this->Split(loops[0], {1, -1});
-    this_loop = splited[1];
+    auto split = this->Split(loops[0], {1, -1});
+    this_loop = split[1];
   }
 
   block_loops = this->GetLoops(this_block);
 
@@ -105,18 +105,18 @@ class IRSchedule {
 
   /**
    * \brief Split a for loop into multiple loops, based on the factors.
-   * @param loop The loop to be splited.
+   * @param loop The loop to be split.
    * @param factors The factors we used to split the loop.
-   * @return The splited loops.
+   * @return The split loops.
    */
   std::vector<Expr> Split(const Expr& loop, const std::vector<int>& factors);
 
   /**
    * \brief Split a for loop into multiple loops, based on the factors.
    * @param block_name Name of the block we want to modify.
-   * @param loop_index Index of the loop to be splited.
+   * @param loop_index Index of the loop to be split.
    * @param factors The factors we used to split the loop.
-   * @return The splited loops.
+   * @return The split loops.
    */
   std::vector<Expr> Split(const std::string& block_name,
                           int loop_index,
@@ -125,9 +125,9 @@ class IRSchedule {
   /**
    * \brief Split a for loop into multiple loops, based on the factors, only
    * used for deserialization of trace.
-   * @param loop The loop to be splited.
+   * @param loop The loop to be split.
    * @param factors The factors we used to split the loop.
-   * @return The splited loops.
+   * @return The split loops.
    */
   std::vector<Expr> Split(const Expr& loop, const std::vector<Expr>& factors);
 
 
@@ -134,7 +134,7 @@ void ReplaceExpr(Expr* source,
  * Validate the factors param of Split. We will check if factors are validate
  * and change -1 to positive integer.
  * @param factors The original factors.
- * @param total_extent The extent of the loop to be splitted.
+ * @param total_extent The extent of the loop to be split.
  * @return return The validated factors.
  */
 std::vector<int> ValidateFactors(const std::vector<int>& factors,
 
@@ -149,7 +149,7 @@ std::vector<ir::Argument> LowerImpl::GenerateFunctionArgumentList(
 
   return args;
 }
-// Generate Function Arguments for splitted kernel.
+// Generate Function Arguments for split kernel.
 std::vector<ir::Argument> LowerImpl::GenFuncArgForSplitKernel(
     Expr func_iterator, std::vector<ir::Tensor> temp_tensors) {
   CheckArgsUnique();
 
@@ -451,7 +451,7 @@ void BindAutoParallel(py::module *m) {
   auto Shard = py::class_<phi::distributed::Shard,
                           std::shared_ptr<phi::distributed::Shard>>(
                    *m, "Shard", Placement, R"DOC(
-               The `Shard` describes how `Tensor` splitted across multiple devices according to specified dimensions.
+               The `Shard` describes how `Tensor` split across multiple devices according to specified dimensions.
 
                Parameters:
                    dim (int): specify the slicing dimension of the tensor.
 
@@ -95,13 +95,13 @@ inline bool operator!=(const ProcessMesh& lhs, const ProcessMesh& rhs) {
 // split the mesh into sub-meshes at the given axis
 std::vector<ProcessMesh> SplitMesh(const ProcessMesh& mesh, int axis);
 
-// return which dimension that the sub_mesh is splitted from the global_mesh,
+// return which dimension that the sub_mesh is split from the global_mesh,
 // if sub_mesh is not a subset of global_mesh, return -1
 int SubMeshDim(const ProcessMesh& global_mesh, const ProcessMesh& sub_mesh);
 
 // when the shapes of two meshes are different and their process_ids
 // are the same, check whether the only difference is that mesh 'a'
-// has an additional '1' on the splitted dim of its shape.
+// has an additional '1' on the split dim of its shape.
 // e.g. a.shape = [2], b.shape = [2, 1], and the process_ids are the
 // same, then they are equal.
 bool mesh_equal_ignore_shape1(const ProcessMesh& a,
 
@@ -33,7 +33,7 @@ bool GlobalToSubMeshReshardFunction::IsSuitable(
 
   int sub_mesh_dim = SubMeshDim(in_process_mesh, out_process_mesh);
   RESHARD_SHORTCUT_IF_FALSE(sub_mesh_dim != -1);
-  // 1. the splitted dimension must be replicated
+  // 1. the split dimension must be replicated
   // 2. out mesh is the value of a certain dimension of global mesh
   // e.g. global_mesh = [[1, 2], [3, 4]], out_mesh = [1, 2] or [3, 4]
   //      global_mesh = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
Original file line number	Diff line number	Diff line change
`@@ -191,7 +191,7 @@ inline void UnpackReduction(const ir::IndexExpr &expr, FLeaf fleaf) {`
`191`	`191`	`}`
`192`	`192`
`193`	`193`	`/*!`
`194`		- * \brief Flatten the expression into a vector of expressions splited by `Add`
	`194`	+ * \brief Flatten the expression into a vector of expressions split by `Add`
`195`	`195`	* or `Mul`.
`196`	`196`	`*`
`197`	`197`	`* For example (Add):`
Original file line number	Diff line number	Diff line change
`@@ -149,7 +149,7 @@ std::vector<ir::Argument> LowerImpl::GenerateFunctionArgumentList(`
`149`	`149`
`150`	`150`	`return args;`
`151`	`151`	`}`
`152`		`-// Generate Function Arguments for splitted kernel.`
	`152`	`+// Generate Function Arguments for split kernel.`
`153`	`153`	`std::vector<ir::Argument> LowerImpl::GenFuncArgForSplitKernel(`
`154`	`154`	`Expr func_iterator, std::vector<ir::Tensor> temp_tensors) {`
`155`	`155`	`CheckArgsUnique();`