PaddlePaddle · SigureMo · Dec 26, 2024 · Dec 26, 2024
@@ -32,22 +32,9 @@ UE = "UE"
 unpacket = "unpacket"
 
 # These words need to be fixed
-cahe = 'cahe'
-Caculate = 'Caculate'
-caculate = 'caculate'
-calcualtion = 'calcualtion'
-checkings = 'checkings'
 childs = 'childs'
-Cound = 'Cound'
-coule = 'coule'
-craete = 'craete'
-craeted = 'craeted'
 Creater = 'Creater'
 creater = 'creater'
-Currenly = 'Currenly'
-curent = 'curent'
-currnt = 'currnt'
-Costum = 'Costum'
 dateset = 'dateset'
 dota = 'dota'
 Datas = 'Datas'

diff --git a/paddle/cinn/operator_fusion/policy/iters_fusion_policy.cc b/paddle/cinn/operator_fusion/policy/iters_fusion_policy.cc
@@ -160,7 +160,7 @@ ItersFusionPolicy::SearchTransformRouteFromReduce2Reduce(
   VLOG(4) << "Start search transform Route from reduce to reduce.";
   if (source.loop_iters.size() == target.loop_iters.size() &&
       source.reduce_iter_nums == target.reduce_iter_nums) {
-    // Currenly only support fusion with same iter_nums and same reduce axis
+    // Currently only support fusion with same iter_nums and same reduce axis
     // TODO(huangjiyi): Analysis fusion with different non reduce axis
     auto [source_flatten_iters, source_reduce_iters] = SplitReduceIters(source);
     auto [target_flatten_iters, target_reduce_iters] = SplitReduceIters(target);

diff --git a/paddle/fluid/framework/data_device_transform.cc b/paddle/fluid/framework/data_device_transform.cc
@@ -41,7 +41,7 @@ void TransDataDevice(const phi::DenseTensor &in,
   }
 
   // FIXME(zcd): TransDataDevice is used to transform data from GPU to CPU and
-  // the enforced checkings have been done in GetDeviceContext, so the
+  // the enforced checks have been done in GetDeviceContext, so the
   // `dev_ctx->Wait()` is necessary. But `dev_ctx->Wait()` will make the program
   // slow, especially when the number of elements is little, for example,
   // the elements of learning rate are one and it's CPU side.

diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -690,7 +690,7 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
     dst_var->SetType(framework::proto::VarType::SELECTED_ROWS);
   }
 
-  // Increase curent count
+  // Increase current count
   IncreaseCurCnt();
 }
 

diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc
@@ -222,7 +222,7 @@ class ReduceGradOp : public framework::OperatorWithKernel {
 };
 
 // NOTE(dengkaipeng): Input(Out) is unnecessary in reduce_mean_grad
-// calcualtion, but will incur a reduce_mean_grad op after
+// calculation, but will incur a reduce_mean_grad op after
 // reduce_mean_grad_grad, delete Input(Out) here.
 // This change has no effect on reduce_mean_grad calculations.
 template <typename T>

diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
@@ -3487,7 +3487,7 @@ bool SplitWithNumOpInferSymbolicShape(
       }
     }
     if (count == 1) {
-      // caculate the axis of split_with_num_op
+      // calculate the axis of split_with_num_op
       symbol::TensorListShapeOrDataDimExprs res_list_s_d(
           num, out_s_d(candidate_axis, num));
       infer_context->SetShapeOrDataForValue(

diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc
@@ -235,7 +235,7 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor,
 #endif
 
   // FIXME(zcd): TransDataPlace is used to transform data from GPU to CPU and
-  // the enforced checkings have been done in GetDeviceContext, so the
+  // the enforced checks have been done in GetDeviceContext, so the
   // `dev_ctx->Wait()` is necessary. But `dev_ctx->Wait()` will make the program
   // slow, especially when the number of elements is little, for example,
   // the elements of learning rate are one and it's CPU side.

diff --git a/paddle/phi/kernels/funcs/segment_pooling.cc b/paddle/phi/kernels/funcs/segment_pooling.cc
@@ -33,25 +33,25 @@ class SegmentPoolFunctor<phi::CPUContext, T, IndexT> {
                   DenseTensor* index UNUSED,
                   const std::string pooltype = "SUM") {
     const IndexT* segment_ids = segments.data<IndexT>();
-    auto curent_id = segment_ids[0];
+    auto current_id = segment_ids[0];
     int64_t last_idx = 0;
     int64_t w = input.numel() / input.dims()[0];
     auto& place = *dev_ctx.eigen_device();
     for (int64_t idx = 1; idx <= segments.numel(); ++idx) {
       if (idx < segments.numel()) {
-        if (segment_ids[idx] == curent_id) continue;
+        if (segment_ids[idx] == current_id) continue;
         PADDLE_ENFORCE_GE(segment_ids[idx],
-                          curent_id,
+                          current_id,
                           common::errors::InvalidArgument(
                               "The segment ids should be sorted, but got "
                               "segment_ids[%d]:%d > segment_ids[%d]:%d.",
                               idx - 1,
-                              curent_id,
+                              current_id,
                               idx,
                               segment_ids[idx]));
       }
 
-      Tensor out_t = output->Slice(curent_id, curent_id + 1);
+      Tensor out_t = output->Slice(current_id, current_id + 1);
       Tensor in_t = input.Slice(last_idx, idx);
 
       int64_t h = idx - last_idx;
@@ -75,7 +75,7 @@ class SegmentPoolFunctor<phi::CPUContext, T, IndexT> {
       }
 
       last_idx = idx;
-      if (idx < segments.numel()) curent_id = segment_ids[idx];
+      if (idx < segments.numel()) current_id = segment_ids[idx];
     }
   }
 };
@@ -93,24 +93,24 @@ class SegmentPoolGradFunctor<phi::CPUContext, T, IndexT> {
                   const std::string pooltype = "SUM") {
     const IndexT* segment_ids = segments.data<IndexT>();
     auto& place = *dev_ctx.eigen_device();
-    auto curent_id = segment_ids[0];
+    auto current_id = segment_ids[0];
     int64_t last_idx = 0;
     int64_t w = in_grad->numel() / in_grad->dims()[0];
     for (int64_t idx = 1; idx <= segments.numel(); ++idx) {
       if (idx < segments.numel()) {
-        if (segment_ids[idx] == curent_id) continue;
+        if (segment_ids[idx] == current_id) continue;
         PADDLE_ENFORCE_GE(segment_ids[idx],
-                          curent_id,
+                          current_id,
                           common::errors::InvalidArgument(
                               "The segment ids should be sorted, but got "
                               "segment_ids[%d]:%d > segment_ids[%d]:%d.",
                               idx - 1,
-                              curent_id,
+                              current_id,
                               idx,
                               segment_ids[idx]));
       }
 
-      Tensor out_g_t = out_grad.Slice(curent_id, curent_id + 1);
+      Tensor out_g_t = out_grad.Slice(current_id, current_id + 1);
       Tensor in_g_t = in_grad->Slice(last_idx, idx);
 
       int64_t h = idx - last_idx;
@@ -123,7 +123,7 @@ class SegmentPoolGradFunctor<phi::CPUContext, T, IndexT> {
       } else if (pooltype == "SUM") {
         in_g_e.device(place) = out_g_e.broadcast(bcast);
       } else if (pooltype == "MAX" || pooltype == "MIN") {
-        Tensor out_t = output.Slice(curent_id, curent_id + 1);
+        Tensor out_t = output.Slice(current_id, current_id + 1);
         Tensor in_t = input.Slice(last_idx, idx);
         auto in_e = EigenMatrix<T>::From(in_t, {h, w});
         auto out_e = EigenMatrix<T>::From(out_t, {1, w});
@@ -138,7 +138,7 @@ class SegmentPoolGradFunctor<phi::CPUContext, T, IndexT> {
       }
 
       last_idx = idx;
-      if (idx < segments.numel()) curent_id = segment_ids[idx];
+      if (idx < segments.numel()) current_id = segment_ids[idx];
     }
   }
 };

diff --git a/paddle/phi/kernels/fusion/gpu/block_attn.h b/paddle/phi/kernels/fusion/gpu/block_attn.h
@@ -3977,7 +3977,7 @@ void qkv_transpose_split(const phi::GPUContext &dev_ctx,
 }
 
 template <typename T, int VecSize>
-__global__ void write_pre_cahe_to_kv_buffer(
+__global__ void write_pre_cache_to_kv_buffer(
     T *k_buf,  // [bsz, num_head, seq_len + pre_cache_length, head_dim]
     T *v_buf,
     const T *pre_key_cache,  // [bsz, num_head, pre_cache_length, head_dim]
@@ -4150,7 +4150,7 @@ void qkv_transpose_split(
     elem_cnt = batch_size * q_head_num * pre_cache_length * size_per_head * 2;
     pack_num = elem_cnt / PackSize;
     GetNumBlocks(pack_num, &grid_size);
-    write_pre_cahe_to_kv_buffer<T, PackSize>
+    write_pre_cache_to_kv_buffer<T, PackSize>
         <<<grid_size, blocksize, 0, dev_ctx.stream()>>>(k_buf,
                                                         v_buf,
                                                         pre_key_cache,

diff --git a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu
@@ -35,15 +35,15 @@ static inline int64_t NumBlocks(const int64_t N) {
 }
 
 template <typename T, typename IndexT>
-__global__ void CaculateSoftLogitsGrad(T* logits_grad,
-                                       IndexT* is_ignore,
-                                       const IndexT* labels,
-                                       const IndexT ignore_index,
-                                       const int64_t start_index,
-                                       const int64_t end_index,
-                                       const int64_t N,
-                                       const int64_t D,
-                                       const int64_t C) {
+__global__ void CalculateSoftLogitsGrad(T* logits_grad,
+                                        IndexT* is_ignore,
+                                        const IndexT* labels,
+                                        const IndexT ignore_index,
+                                        const int64_t start_index,
+                                        const int64_t end_index,
+                                        const int64_t N,
+                                        const int64_t D,
+                                        const int64_t C) {
   const T prob = static_cast<T>(1.0 / C);
   CUDA_KERNEL_LOOP_TYPE(i, N, int64_t) {
     is_ignore[i] = labels[i * C];
@@ -145,7 +145,7 @@ void CSoftmaxWithCrossEntropyGradKernel(const Context& dev_ctx,
       is_ignore.Resize({N, 1});
       dev_ctx.template Alloc<int32_t>(&is_ignore);
 
-      CaculateSoftLogitsGrad<T, int32_t>
+      CalculateSoftLogitsGrad<T, int32_t>
           <<<blocks_cal, threads, 0, dev_ctx.stream()>>>(
               logit_grad_2d.data<T>(),
               is_ignore.data<int32_t>(),
@@ -183,7 +183,7 @@ void CSoftmaxWithCrossEntropyGradKernel(const Context& dev_ctx,
       is_ignore.Resize({N, 1});
       dev_ctx.template Alloc<int32_t>(&is_ignore);
 
-      CaculateSoftLogitsGrad<T, int64_t>
+      CalculateSoftLogitsGrad<T, int64_t>
           <<<blocks_cal, threads, 0, dev_ctx.stream()>>>(
               logit_grad_2d.data<T>(),
               is_ignore.data<int64_t>(),

diff --git a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu
@@ -111,12 +111,12 @@ __global__ void SoftMaskLabelByIndex(T* predicted_logits,
 }
 
 template <typename T, typename IndexT>
-__global__ void CaculateLoss(T* loss,
-                             const T* predict_logits,
-                             const T* sum_exp_logits,
-                             const IndexT* label,
-                             const int64_t ignore_index,
-                             const int64_t N) {
+__global__ void CalculateLoss(T* loss,
+                              const T* predict_logits,
+                              const T* sum_exp_logits,
+                              const IndexT* label,
+                              const int64_t ignore_index,
+                              const int64_t N) {
   CUDA_KERNEL_LOOP_TYPE(i, N, int64_t) {
     auto real_label = static_cast<int64_t>(label[i]);
     loss[i] = ignore_index == real_label
@@ -129,13 +129,13 @@ __global__ void CaculateLoss(T* loss,
 }
 
 template <typename T, typename IndexT>
-__global__ void CaculateSoftLoss(T* loss,
-                                 const T* predict_logits,
-                                 const T* sum_exp_logits,
-                                 const IndexT* label,
-                                 const int64_t ignore_index,
-                                 const int64_t N,
-                                 const int64_t C) {
+__global__ void CalculateSoftLoss(T* loss,
+                                  const T* predict_logits,
+                                  const T* sum_exp_logits,
+                                  const IndexT* label,
+                                  const int64_t ignore_index,
+                                  const int64_t N,
+                                  const int64_t C) {
   const T prob = static_cast<T>(1.0 / C);
 
   CUDA_KERNEL_LOOP_TYPE(i, N, int64_t) {
@@ -323,7 +323,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
 
     if (label_type == phi::DataType::INT32) {
       if (C > 1) {
-        CaculateSoftLoss<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
+        CalculateSoftLoss<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
             loss_2d.data<T>(),
             predicted_logits.data<T>(),
             sum_exp_logits.data<T>(),
@@ -332,7 +332,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
             N,
             C);
       } else {
-        CaculateLoss<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
+        CalculateLoss<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
             loss_2d.data<T>(),
             predicted_logits.data<T>(),
             sum_exp_logits.data<T>(),
@@ -343,7 +343,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
 
     } else {
       if (C > 1) {
-        CaculateSoftLoss<T, int64_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
+        CalculateSoftLoss<T, int64_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
             loss_2d.data<T>(),
             predicted_logits.data<T>(),
             sum_exp_logits.data<T>(),
@@ -352,7 +352,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
             N,
             C);
       } else {
-        CaculateLoss<T, int64_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
+        CalculateLoss<T, int64_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
             loss_2d.data<T>(),
             predicted_logits.data<T>(),
             sum_exp_logits.data<T>(),

diff --git a/paddle/phi/kernels/kps/reduce_kernel.cu b/paddle/phi/kernels/kps/reduce_kernel.cu
@@ -178,7 +178,7 @@ void ReduceSumEigen(const KPDevice& dev_ctx,
   }
   auto eigen_reduce_dim =
       EigenDim<ReducedDimSize>::From(common::make_ddim(*reduce_dims));
-  // Caculate
+  // Calculate
   eigen_out_tensor.device(*dev_ctx.eigen_device()) =
       eigen_x_tensor.sum(eigen_reduce_dim);
   out->Resize(origin_out_dims);

diff --git a/python/paddle/jit/sot/opcode_translator/executor/opcode_executor.py b/python/paddle/jit/sot/opcode_translator/executor/opcode_executor.py
@@ -2324,17 +2324,17 @@ def _break_graph_when_for_loop(
         for_iter_idx = self.indexof(for_iter)
         loop_body_start_idx = for_iter_idx + 1
         loop_body_end_idx = self.indexof(for_iter.jump_to)
-        curent_stack = 1
+        current_stack = 1
 
         while True:
             if loop_body_start_idx >= len(self._instructions):
                 raise InnerError("Can not balance stack in loop body.")
             cur_instr = self._instructions[loop_body_start_idx]
             # do not consider jump instr
             stack_effect = calc_stack_effect(cur_instr, jump=False)
-            curent_stack += stack_effect
+            current_stack += stack_effect
             loop_body_start_idx += 1
-            if curent_stack == 0:
+            if current_stack == 0:
                 break
 
         # 2. create loop body function

diff --git a/python/paddle/jit/sot/symbolic/export.py b/python/paddle/jit/sot/symbolic/export.py
@@ -211,7 +211,7 @@ def create_layer(self):
     def create_inputs(self):
         create_paddle_inputs = self.new_root("def create_paddle_inputs():")
         self.new_root("\n")
-        craete_numpy_inputs = self.new_root("def create_numpy_inputs():")
+        create_numpy_inputs = self.new_root("def create_numpy_inputs():")
 
         paddle_inputs = ["inputs = ("]
         numpy_inputs = ["inputs = ("]
@@ -257,7 +257,7 @@ def create_inputs(self):
         numpy_inputs.append("return inputs")
 
         create_paddle_inputs.add_sub(*paddle_inputs)
-        craete_numpy_inputs.add_sub(*numpy_inputs)
+        create_numpy_inputs.add_sub(*numpy_inputs)
 
     def create_test(self):
         test_class = self.new_root("class TestLayer(unittest.TestCase):")

diff --git a/python/paddle/jit/sot/translate.py b/python/paddle/jit/sot/translate.py
@@ -49,7 +49,7 @@ def symbolic_translate(fn: Callable[P, R], **kwargs) -> Callable[P, R]:
         Callable, The wrapped function.
 
     Examples:
-        >>> # doctest: +SKIP("Cound not get source code of function foo."")
+        >>> # doctest: +SKIP("Could not get source code of function foo."")
         >>> import paddle
         >>> import numpy as np
         >>> from sot.translate import symbolic_translate

diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py
@@ -630,7 +630,7 @@ class Conv2D(_ConvNd):
         stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must
             contain two integers, (stride_H, stride_W). Otherwise, the
             stride_H = stride_W = stride. The default value is 1.
-        padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
+        padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
             1. a string in ['valid', 'same'].
             2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
             3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
@@ -800,7 +800,7 @@ class Conv2DTranspose(_ConvNd):
         stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must
             contain two integers, (stride_H, stride_W). Otherwise, the
             stride_H = stride_W = stride. Default: 1.
-        padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
+        padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
             1. a string in ['valid', 'same'].
             2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides
             3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
@@ -960,7 +960,7 @@ class Conv3D(_ConvNd):
         stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must
             contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
             stride_D = stride_H = stride_W = stride. The default value is 1.
-        padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
+        padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
             1. a string in ['valid', 'same'].
             2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
             3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
@@ -1138,7 +1138,7 @@ class Conv3DTranspose(_ConvNd):
             If stride is a list/tuple, it must contain three integers, (stride_depth, stride_height,
             stride_width). Otherwise, stride_depth = stride_height = stride_width = stride.
             Default: 1.
-        padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
+        padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
             1. a string in ['valid', 'same'].
             2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
             3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].