Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions _typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,9 @@ UE = "UE"
unpacket = "unpacket"

# These words need to be fixed
cahe = 'cahe'
Caculate = 'Caculate'
caculate = 'caculate'
calcualtion = 'calcualtion'
checkings = 'checkings'
childs = 'childs'
Cound = 'Cound'
coule = 'coule'
craete = 'craete'
craeted = 'craeted'
Creater = 'Creater'
creater = 'creater'
Currenly = 'Currenly'
curent = 'curent'
currnt = 'currnt'
Costum = 'Costum'
dateset = 'dateset'
dota = 'dota'
Datas = 'Datas'
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/operator_fusion/policy/iters_fusion_policy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ ItersFusionPolicy::SearchTransformRouteFromReduce2Reduce(
VLOG(4) << "Start search transform Route from reduce to reduce.";
if (source.loop_iters.size() == target.loop_iters.size() &&
source.reduce_iter_nums == target.reduce_iter_nums) {
// Currenly only support fusion with same iter_nums and same reduce axis
// Currently only support fusion with same iter_nums and same reduce axis
// TODO(huangjiyi): Analysis fusion with different non reduce axis
auto [source_flatten_iters, source_reduce_iters] = SplitReduceIters(source);
auto [target_flatten_iters, target_reduce_iters] = SplitReduceIters(target);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/data_device_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void TransDataDevice(const phi::DenseTensor &in,
}

// FIXME(zcd): TransDataDevice is used to transform data from GPU to CPU and
// the enforced checkings have been done in GetDeviceContext, so the
// the enforced checks have been done in GetDeviceContext, so the
// `dev_ctx->Wait()` is necessary. But `dev_ctx->Wait()` will make the program
// slow, especially when the number of elements is little, for example,
// the elements of learning rate are one and it's CPU side.
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/imperative/gradient_accumulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
dst_var->SetType(framework::proto::VarType::SELECTED_ROWS);
}

// Increase curent count
// Increase current count
IncreaseCurCnt();
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/reduce_ops/reduce_mean_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ class ReduceGradOp : public framework::OperatorWithKernel {
};

// NOTE(dengkaipeng): Input(Out) is unnecessary in reduce_mean_grad
// calcualtion, but will incur a reduce_mean_grad op after
// calculation, but will incur a reduce_mean_grad op after
// reduce_mean_grad_grad, delete Input(Out) here.
// This change has no effect on reduce_mean_grad calculations.
template <typename T>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3487,7 +3487,7 @@ bool SplitWithNumOpInferSymbolicShape(
}
}
if (count == 1) {
// caculate the axis of split_with_num_op
// calculate the axis of split_with_num_op
symbol::TensorListShapeOrDataDimExprs res_list_s_d(
num, out_s_d(candidate_axis, num));
infer_context->SetShapeOrDataForValue(
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/api/lib/data_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor,
#endif

// FIXME(zcd): TransDataPlace is used to transform data from GPU to CPU and
// the enforced checkings have been done in GetDeviceContext, so the
// the enforced checks have been done in GetDeviceContext, so the
// `dev_ctx->Wait()` is necessary. But `dev_ctx->Wait()` will make the program
// slow, especially when the number of elements is little, for example,
// the elements of learning rate are one and it's CPU side.
Expand Down
26 changes: 13 additions & 13 deletions paddle/phi/kernels/funcs/segment_pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,25 @@ class SegmentPoolFunctor<phi::CPUContext, T, IndexT> {
DenseTensor* index UNUSED,
const std::string pooltype = "SUM") {
const IndexT* segment_ids = segments.data<IndexT>();
auto curent_id = segment_ids[0];
auto current_id = segment_ids[0];
int64_t last_idx = 0;
int64_t w = input.numel() / input.dims()[0];
auto& place = *dev_ctx.eigen_device();
for (int64_t idx = 1; idx <= segments.numel(); ++idx) {
if (idx < segments.numel()) {
if (segment_ids[idx] == curent_id) continue;
if (segment_ids[idx] == current_id) continue;
PADDLE_ENFORCE_GE(segment_ids[idx],
curent_id,
current_id,
common::errors::InvalidArgument(
"The segment ids should be sorted, but got "
"segment_ids[%d]:%d > segment_ids[%d]:%d.",
idx - 1,
curent_id,
current_id,
idx,
segment_ids[idx]));
}

Tensor out_t = output->Slice(curent_id, curent_id + 1);
Tensor out_t = output->Slice(current_id, current_id + 1);
Tensor in_t = input.Slice(last_idx, idx);

int64_t h = idx - last_idx;
Expand All @@ -75,7 +75,7 @@ class SegmentPoolFunctor<phi::CPUContext, T, IndexT> {
}

last_idx = idx;
if (idx < segments.numel()) curent_id = segment_ids[idx];
if (idx < segments.numel()) current_id = segment_ids[idx];
}
}
};
Expand All @@ -93,24 +93,24 @@ class SegmentPoolGradFunctor<phi::CPUContext, T, IndexT> {
const std::string pooltype = "SUM") {
const IndexT* segment_ids = segments.data<IndexT>();
auto& place = *dev_ctx.eigen_device();
auto curent_id = segment_ids[0];
auto current_id = segment_ids[0];
int64_t last_idx = 0;
int64_t w = in_grad->numel() / in_grad->dims()[0];
for (int64_t idx = 1; idx <= segments.numel(); ++idx) {
if (idx < segments.numel()) {
if (segment_ids[idx] == curent_id) continue;
if (segment_ids[idx] == current_id) continue;
PADDLE_ENFORCE_GE(segment_ids[idx],
curent_id,
current_id,
common::errors::InvalidArgument(
"The segment ids should be sorted, but got "
"segment_ids[%d]:%d > segment_ids[%d]:%d.",
idx - 1,
curent_id,
current_id,
idx,
segment_ids[idx]));
}

Tensor out_g_t = out_grad.Slice(curent_id, curent_id + 1);
Tensor out_g_t = out_grad.Slice(current_id, current_id + 1);
Tensor in_g_t = in_grad->Slice(last_idx, idx);

int64_t h = idx - last_idx;
Expand All @@ -123,7 +123,7 @@ class SegmentPoolGradFunctor<phi::CPUContext, T, IndexT> {
} else if (pooltype == "SUM") {
in_g_e.device(place) = out_g_e.broadcast(bcast);
} else if (pooltype == "MAX" || pooltype == "MIN") {
Tensor out_t = output.Slice(curent_id, curent_id + 1);
Tensor out_t = output.Slice(current_id, current_id + 1);
Tensor in_t = input.Slice(last_idx, idx);
auto in_e = EigenMatrix<T>::From(in_t, {h, w});
auto out_e = EigenMatrix<T>::From(out_t, {1, w});
Expand All @@ -138,7 +138,7 @@ class SegmentPoolGradFunctor<phi::CPUContext, T, IndexT> {
}

last_idx = idx;
if (idx < segments.numel()) curent_id = segment_ids[idx];
if (idx < segments.numel()) current_id = segment_ids[idx];
}
}
};
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/fusion/gpu/block_attn.h
Original file line number Diff line number Diff line change
Expand Up @@ -3977,7 +3977,7 @@ void qkv_transpose_split(const phi::GPUContext &dev_ctx,
}

template <typename T, int VecSize>
__global__ void write_pre_cahe_to_kv_buffer(
__global__ void write_pre_cache_to_kv_buffer(
T *k_buf, // [bsz, num_head, seq_len + pre_cache_length, head_dim]
T *v_buf,
const T *pre_key_cache, // [bsz, num_head, pre_cache_length, head_dim]
Expand Down Expand Up @@ -4150,7 +4150,7 @@ void qkv_transpose_split(
elem_cnt = batch_size * q_head_num * pre_cache_length * size_per_head * 2;
pack_num = elem_cnt / PackSize;
GetNumBlocks(pack_num, &grid_size);
write_pre_cahe_to_kv_buffer<T, PackSize>
write_pre_cache_to_kv_buffer<T, PackSize>
<<<grid_size, blocksize, 0, dev_ctx.stream()>>>(k_buf,
v_buf,
pre_key_cache,
Expand Down
22 changes: 11 additions & 11 deletions paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ static inline int64_t NumBlocks(const int64_t N) {
}

template <typename T, typename IndexT>
__global__ void CaculateSoftLogitsGrad(T* logits_grad,
IndexT* is_ignore,
const IndexT* labels,
const IndexT ignore_index,
const int64_t start_index,
const int64_t end_index,
const int64_t N,
const int64_t D,
const int64_t C) {
__global__ void CalculateSoftLogitsGrad(T* logits_grad,
IndexT* is_ignore,
const IndexT* labels,
const IndexT ignore_index,
const int64_t start_index,
const int64_t end_index,
const int64_t N,
const int64_t D,
const int64_t C) {
const T prob = static_cast<T>(1.0 / C);
CUDA_KERNEL_LOOP_TYPE(i, N, int64_t) {
is_ignore[i] = labels[i * C];
Expand Down Expand Up @@ -145,7 +145,7 @@ void CSoftmaxWithCrossEntropyGradKernel(const Context& dev_ctx,
is_ignore.Resize({N, 1});
dev_ctx.template Alloc<int32_t>(&is_ignore);

CaculateSoftLogitsGrad<T, int32_t>
CalculateSoftLogitsGrad<T, int32_t>
<<<blocks_cal, threads, 0, dev_ctx.stream()>>>(
logit_grad_2d.data<T>(),
is_ignore.data<int32_t>(),
Expand Down Expand Up @@ -183,7 +183,7 @@ void CSoftmaxWithCrossEntropyGradKernel(const Context& dev_ctx,
is_ignore.Resize({N, 1});
dev_ctx.template Alloc<int32_t>(&is_ignore);

CaculateSoftLogitsGrad<T, int64_t>
CalculateSoftLogitsGrad<T, int64_t>
<<<blocks_cal, threads, 0, dev_ctx.stream()>>>(
logit_grad_2d.data<T>(),
is_ignore.data<int64_t>(),
Expand Down
34 changes: 17 additions & 17 deletions paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,12 @@ __global__ void SoftMaskLabelByIndex(T* predicted_logits,
}

template <typename T, typename IndexT>
__global__ void CaculateLoss(T* loss,
const T* predict_logits,
const T* sum_exp_logits,
const IndexT* label,
const int64_t ignore_index,
const int64_t N) {
__global__ void CalculateLoss(T* loss,
const T* predict_logits,
const T* sum_exp_logits,
const IndexT* label,
const int64_t ignore_index,
const int64_t N) {
CUDA_KERNEL_LOOP_TYPE(i, N, int64_t) {
auto real_label = static_cast<int64_t>(label[i]);
loss[i] = ignore_index == real_label
Expand All @@ -129,13 +129,13 @@ __global__ void CaculateLoss(T* loss,
}

template <typename T, typename IndexT>
__global__ void CaculateSoftLoss(T* loss,
const T* predict_logits,
const T* sum_exp_logits,
const IndexT* label,
const int64_t ignore_index,
const int64_t N,
const int64_t C) {
__global__ void CalculateSoftLoss(T* loss,
const T* predict_logits,
const T* sum_exp_logits,
const IndexT* label,
const int64_t ignore_index,
const int64_t N,
const int64_t C) {
const T prob = static_cast<T>(1.0 / C);

CUDA_KERNEL_LOOP_TYPE(i, N, int64_t) {
Expand Down Expand Up @@ -323,7 +323,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {

if (label_type == phi::DataType::INT32) {
if (C > 1) {
CaculateSoftLoss<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
CalculateSoftLoss<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
loss_2d.data<T>(),
predicted_logits.data<T>(),
sum_exp_logits.data<T>(),
Expand All @@ -332,7 +332,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
N,
C);
} else {
CaculateLoss<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
CalculateLoss<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
loss_2d.data<T>(),
predicted_logits.data<T>(),
sum_exp_logits.data<T>(),
Expand All @@ -343,7 +343,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {

} else {
if (C > 1) {
CaculateSoftLoss<T, int64_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
CalculateSoftLoss<T, int64_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
loss_2d.data<T>(),
predicted_logits.data<T>(),
sum_exp_logits.data<T>(),
Expand All @@ -352,7 +352,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
N,
C);
} else {
CaculateLoss<T, int64_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
CalculateLoss<T, int64_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
loss_2d.data<T>(),
predicted_logits.data<T>(),
sum_exp_logits.data<T>(),
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/kps/reduce_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void ReduceSumEigen(const KPDevice& dev_ctx,
}
auto eigen_reduce_dim =
EigenDim<ReducedDimSize>::From(common::make_ddim(*reduce_dims));
// Caculate
// Calculate
eigen_out_tensor.device(*dev_ctx.eigen_device()) =
eigen_x_tensor.sum(eigen_reduce_dim);
out->Resize(origin_out_dims);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2324,17 +2324,17 @@ def _break_graph_when_for_loop(
for_iter_idx = self.indexof(for_iter)
loop_body_start_idx = for_iter_idx + 1
loop_body_end_idx = self.indexof(for_iter.jump_to)
curent_stack = 1
current_stack = 1

while True:
if loop_body_start_idx >= len(self._instructions):
raise InnerError("Can not balance stack in loop body.")
cur_instr = self._instructions[loop_body_start_idx]
# do not consider jump instr
stack_effect = calc_stack_effect(cur_instr, jump=False)
curent_stack += stack_effect
current_stack += stack_effect
loop_body_start_idx += 1
if curent_stack == 0:
if current_stack == 0:
break

# 2. create loop body function
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/jit/sot/symbolic/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def create_layer(self):
def create_inputs(self):
create_paddle_inputs = self.new_root("def create_paddle_inputs():")
self.new_root("\n")
craete_numpy_inputs = self.new_root("def create_numpy_inputs():")
create_numpy_inputs = self.new_root("def create_numpy_inputs():")

paddle_inputs = ["inputs = ("]
numpy_inputs = ["inputs = ("]
Expand Down Expand Up @@ -257,7 +257,7 @@ def create_inputs(self):
numpy_inputs.append("return inputs")

create_paddle_inputs.add_sub(*paddle_inputs)
craete_numpy_inputs.add_sub(*numpy_inputs)
create_numpy_inputs.add_sub(*numpy_inputs)

def create_test(self):
test_class = self.new_root("class TestLayer(unittest.TestCase):")
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/jit/sot/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def symbolic_translate(fn: Callable[P, R], **kwargs) -> Callable[P, R]:
Callable, The wrapped function.

Examples:
>>> # doctest: +SKIP("Cound not get source code of function foo."")
>>> # doctest: +SKIP("Could not get source code of function foo."")
>>> import paddle
>>> import numpy as np
>>> from sot.translate import symbolic_translate
Expand Down
8 changes: 4 additions & 4 deletions python/paddle/nn/layer/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ class Conv2D(_ConvNd):
stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. The default value is 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
Expand Down Expand Up @@ -800,7 +800,7 @@ class Conv2DTranspose(_ConvNd):
stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
Expand Down Expand Up @@ -960,7 +960,7 @@ class Conv3D(_ConvNd):
stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must
contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. The default value is 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
Expand Down Expand Up @@ -1138,7 +1138,7 @@ class Conv3DTranspose(_ConvNd):
If stride is a list/tuple, it must contain three integers, (stride_depth, stride_height,
stride_width). Otherwise, stride_depth = stride_height = stride_width = stride.
Default: 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
Expand Down
Loading
Loading