Skip to content

Commit 756ca9c

Browse files
committed
Fix
2 parents 4233d37 + 8f96a48 commit 756ca9c

File tree

982 files changed

+29184
-8998
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

982 files changed

+29184
-8998
lines changed

.github/CODEOWNERS

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# This file is migrated from CI script, it's an effort of modenizing our dev infra.
22
# Code owners are expected to take responsibility for review patches to respective file.
33

4-
/CMakeLists.txt @wanghuancoder @Aurelius84 @XiaoguangHu01
4+
/CMakeLists.txt @wanghuancoder @XiaoguangHu01
55
paddle/fluid/distributed/collective @sneaxiy @ForFishes
66
paddle/fluid/eager/autograd_meta.cc @JiabinYang @phlrain
77
paddle/fluid/eager/autograd_meta.h @JiabinYang @phlrain
@@ -12,20 +12,20 @@ paddle/fluid/eager/grad_node_info.h @JiabinYang @phlrain
1212
paddle/fluid/eager/grad_tensor_holder.cc @JiabinYang @phlrain
1313
paddle/fluid/eager/grad_tensor_holder.h @JiabinYang @phlrain
1414
paddle/fluid/eager/tensor_wrapper.h @JiabinYang @phlrain
15-
paddle/fluid/framework/block_desc.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
16-
paddle/fluid/framework/details/op_registry.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
17-
paddle/fluid/framework/framework.proto @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
18-
paddle/fluid/framework/grad_op_desc_maker.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
19-
paddle/fluid/framework/ir/graph.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
20-
paddle/fluid/framework/ir/node.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
21-
paddle/fluid/framework/lod_tensor.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
22-
paddle/fluid/framework/op_desc.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
23-
paddle/fluid/framework/operator.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
24-
paddle/fluid/framework/scope.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
25-
paddle/fluid/framework/selected_rows.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
26-
paddle/fluid/framework/tensor.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
15+
paddle/fluid/framework/block_desc.h @XiaoguangHu01 @zhiqiu @Xreki
16+
paddle/fluid/framework/details/op_registry.h @XiaoguangHu01 @zhiqiu @Xreki
17+
paddle/fluid/framework/framework.proto @XiaoguangHu01 @zhiqiu @Xreki
18+
paddle/fluid/framework/grad_op_desc_maker.h @XiaoguangHu01 @zhiqiu @Xreki
19+
paddle/fluid/framework/ir/graph.h @XiaoguangHu01 @zhiqiu @Xreki
20+
paddle/fluid/framework/ir/node.h @XiaoguangHu01 @zhiqiu @Xreki
21+
paddle/fluid/framework/lod_tensor.h @XiaoguangHu01 @zhiqiu @Xreki
22+
paddle/fluid/framework/op_desc.h @XiaoguangHu01 @zhiqiu @Xreki
23+
paddle/fluid/framework/operator.h @XiaoguangHu01 @zhiqiu @Xreki
24+
paddle/fluid/framework/scope.h @XiaoguangHu01 @zhiqiu @Xreki
25+
paddle/fluid/framework/selected_rows.h @XiaoguangHu01 @zhiqiu @Xreki
26+
paddle/fluid/framework/tensor.h @XiaoguangHu01 @zhiqiu @Xreki
2727
paddle/fluid/framework/unused_var_check.cc @zhiqiu @phlrain
28-
paddle/fluid/framework/var_desc.h @XiaoguangHu01 @zhiqiu @Xreki @Aurelius84
28+
paddle/fluid/framework/var_desc.h @XiaoguangHu01 @zhiqiu @Xreki
2929
paddle/fluid/operators/distributed/send_recv.proto.in @gongweibao @seiriosPlus
3030
paddle/fluid/prim/api/api.yaml @xiaoguoguo626807 @JiabinYang @phlrain
3131
paddle/fluid/prim/api/composite_backward/composite_backward_api.h @xiaoguoguo626807 @JiabinYang
@@ -44,33 +44,33 @@ paddle/phi/core/meta_tensor.h @phlrain @zyfncg @YuanRisheng
4444
paddle/phi/core/tensor_base.h @phlrain @zyfncg @YuanRisheng
4545
paddle/phi/core/tensor_meta.h @phlrain @zyfncg @YuanRisheng
4646
paddle/phi/infermeta/spmd_rules @LiYuRio @ForFishes @zhiqiu
47-
paddle/scripts/paddle_build.bat @zhwesky2010 @wanghuancoder @Aurelius84
47+
paddle/scripts/paddle_build.bat @zhwesky2010 @wanghuancoder
4848
paddle/scripts/paddle_build.sh @risemeup1 @zhangbo9674 @XieYunshen
4949
pyproject.toml @SigureMo @gouzil
50-
python/paddle/autograd/backward_utils.py @Aurelius84 @xiaoguoguo626807 @changeyoung98 @phlrain
51-
python/paddle/autograd/ir_backward.py @Aurelius84 @xiaoguoguo626807 @changeyoung98
52-
python/paddle/base/backward.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84
53-
python/paddle/base/compiler.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84
50+
python/paddle/autograd/backward_utils.py @xiaoguoguo626807 @changeyoung98 @phlrain
51+
python/paddle/autograd/ir_backward.py @xiaoguoguo626807 @changeyoung98
52+
python/paddle/base/backward.py @XiaoguangHu01 @zhiqiu @Xreki @qili93
53+
python/paddle/base/compiler.py @XiaoguangHu01 @zhiqiu @Xreki @qili93
5454
python/paddle/base/dygraph/layers.py @JiabinYang @phlrain
55-
python/paddle/base/framework.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84
56-
python/paddle/base/__init__.py @phlrain @Aurelius84 @qili93
57-
python/paddle/base/tests/unittests/white_list/check_op_sequence_batch_1_input_white_list.py @Aurelius84 @phlrain
58-
python/paddle/base/tests/unittests/white_list/check_op_sequence_instance_0_input_white_list.py @Aurelius84 @phlrain
59-
python/paddle/base/tests/unittests/white_list/check_shape_white_list.py @hong19860320 @Aurelius84 @phlrain
60-
python/paddle/base/tests/unittests/white_list/compile_vs_runtime_white_list.py @Aurelius84 @phlrain
61-
python/paddle/base/tests/unittests/white_list/no_check_set_white_list.py @Aurelius84 @phlrain
62-
python/paddle/base/tests/unittests/white_list/no_grad_set_white_list.py @Aurelius84 @phlrain
63-
python/paddle/base/tests/unittests/white_list/op_accuracy_white_list.py @juncaipeng @zhangting2020 @Aurelius84
64-
python/paddle/base/tests/unittests/white_list/op_threshold_white_list.py @juncaipeng @zhangting2020 @Aurelius84
55+
python/paddle/base/framework.py @XiaoguangHu01 @zhiqiu @Xreki @qili93
56+
python/paddle/base/__init__.py @phlrain @qili93
57+
test/white_list/check_op_sequence_batch_1_input_white_list.py @phlrain
58+
test/white_list/check_op_sequence_instance_0_input_white_list.py @phlrain
59+
test/white_list/check_shape_white_list.py @hong19860320 @phlrain
60+
test/white_list/compile_vs_runtime_white_list.py @phlrain
61+
test/white_list/no_check_set_white_list.py @phlrain
62+
test/white_list/no_grad_set_white_list.py @phlrain
63+
test/white_list/op_accuracy_white_list.py @juncaipeng @zhangting2020
64+
test/white_list/op_threshold_white_list.py @juncaipeng @zhangting2020
6565
python/paddle/distributed/fleet/__init__.py @sneaxiy @raindrops2sea
6666
python/paddle/distributed/fleet/launch.py @sneaxiy @raindrops2sea
6767
python/paddle/distributed/__init__.py @sneaxiy @raindrops2sea
6868
python/paddle/incubate/autograd/composite_rules.py @xiaoguoguo626807 @JiabinYang
6969
python/paddle/incubate/autograd/primitives.py @xiaoguoguo626807 @JiabinYang @phlrain
7070
python/paddle/_typing @SigureMo @zrr1999 @gouzil
7171
python/requirements.txt @phlrain @jzhang533 @kolinwei
72-
test/dygraph_to_static @SigureMo @Aurelius84 @gouzil
73-
test/sot @SigureMo @Aurelius84 @gouzil
74-
tools/parallel_UT_rule.py @zhwesky2010 @wanghuancoder @Aurelius84
75-
tools/windows/run_unittests.sh @zhwesky2010 @wanghuancoder @Aurelius84
72+
test/dygraph_to_static @SigureMo @zrr1999 @gouzil
73+
test/sot @SigureMo @zrr1999 @gouzil
74+
tools/parallel_UT_rule.py @zhwesky2010 @wanghuancoder
75+
tools/windows/run_unittests.sh @zhwesky2010 @wanghuancoder
7676
.pre-commit-config.yaml @SigureMo @gouzil

cmake/external/xpu.cmake

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@ set(XPU_XFT_LIB_NAME "libxft.so")
2525
set(XPU_XPTI_LIB_NAME "libxpti.so")
2626
set(XPU_XBLAS_LIB_NAME "libxpu_blas.so")
2727
set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so")
28+
set(XPU_XPUDNN_LIB_NAME "libxpu_dnn.so")
2829

2930
if(NOT DEFINED XPU_XRE_BASE_VERSION)
3031
set(XPU_XRE_BASE_VERSION "4.32.0.1")
3132
endif()
3233
if(NOT DEFINED XPU_XHPC_BASE_DATE)
33-
set(XPU_XHPC_BASE_DATE "eb35/20240927")
34+
set(XPU_XHPC_BASE_DATE "eb35/20241015")
3435
endif()
3536
set(XPU_XCCL_BASE_VERSION "1.2.11e")
3637
if(NOT DEFINED XPU_XFT_BASE_VERSION)
@@ -139,6 +140,7 @@ set(XPU_XBLAS_LIB "${XPU_LIB_DIR}/${XPU_XBLAS_LIB_NAME}")
139140
set(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}")
140141
set(XPU_CUDA_LIB "${XPU_LIB_DIR}/${XPU_CUDA_LIB_NAME}")
141142
set(XPU_XFA_LIB "${XPU_LIB_DIR}/${XPU_XFA_LIB_NAME}")
143+
set(XPU_XPUDNN_LIB "${XPU_LIB_DIR}/${XPU_XPUDNN_LIB_NAME}")
142144

143145
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")
144146

@@ -175,7 +177,9 @@ ExternalProject_Add(
175177
UPDATE_COMMAND ""
176178
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
177179
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT}
178-
BUILD_BYPRODUCTS ${XPU_API_LIB} BUILD_BYPORDUCTS ${XPU_XBLAS_LIB}
180+
BUILD_BYPRODUCTS ${XPU_API_LIB}
181+
BUILD_BYPRODUCTS ${XPU_XBLAS_LIB}
182+
BUILD_BYPRODUCTS ${XPU_XPUDNN_LIB}
179183
BUILD_BYPRODUCTS ${XPU_XFA_LIB}
180184
BUILD_BYPRODUCTS ${XPU_RT_LIB}
181185
BUILD_BYPRODUCTS ${XPU_BKCL_LIB})
@@ -203,6 +207,8 @@ set(XPU_XHPC_INC_DIR "${XPU_INC_DIR}/xhpc")
203207
include_directories(${XPU_XHPC_INC_DIR})
204208
set(XPU_XBLAS_INC_DIR "${XPU_INC_DIR}/xhpc/xblas")
205209
include_directories(${XPU_XBLAS_INC_DIR})
210+
set(XPU_XPUDNN_INC_DIR "${XPU_INC_DIR}/xhpc/xpudnn")
211+
include_directories(${XPU_XPUDNN_INC_DIR})
206212

207213
if(WITH_XPU_XRE5)
208214
add_definitions(-DPADDLE_WITH_XPU_XRE5)
@@ -227,8 +233,14 @@ if(WITH_XPTI)
227233
endif()
228234

229235
if(WITH_XPU_XRE5)
230-
target_link_libraries(xpulib ${XPU_RT_LIB} ${XPU_BKCL_LIB} ${XPU_XBLAS_LIB}
231-
${XPU_API_LIB} ${XPU_XFA_LIB})
236+
target_link_libraries(
237+
xpulib
238+
${XPU_RT_LIB}
239+
${XPU_BKCL_LIB}
240+
${XPU_XBLAS_LIB}
241+
${XPU_API_LIB}
242+
${XPU_XFA_LIB}
243+
${XPU_XPUDNN_LIB})
232244
else()
233245
target_link_libraries(xpulib ${XPU_RT_LIB} ${XPU_BKCL_LIB} ${XPU_XBLAS_LIB}
234246
${XPU_API_LIB})

cmake/inference_lib.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ endif()
304304

305305
copy(
306306
inference_lib_dist
307-
SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
307+
SRCS ${CMAKE_BINARY_DIR}/paddle/phi/core/framework/framework.pb.h
308308
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/internal)
309309
copy(
310310
inference_lib_dist

paddle/cinn/adt/equation_solver.cc

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ std::unordered_map<Variable, Value> InferValuesImpl(
3737
PADDLE_ENFORCE_EQ(
3838
ctx->HasValue(in_variable),
3939
true,
40-
phi::errors::NotFound("The param id's out_iter must contain "
41-
"its in_iter's value"));
40+
::common::errors::NotFound("The param id's out_iter must contain "
41+
"its in_iter's value"));
4242
return {{out_iter.value(), ctx->GetValue(in_variable)}};
4343
}
4444

@@ -49,8 +49,8 @@ std::unordered_map<Variable, Value> InferValuesImpl(
4949
PADDLE_ENFORCE_EQ(
5050
ctx->HasValue(in_variable),
5151
true,
52-
phi::errors::NotFound("The param id's out_iter must contain "
53-
"its in_iter's value"));
52+
::common::errors::NotFound("The param id's out_iter must contain "
53+
"its in_iter's value"));
5454
return {{out_index.value(), ctx->GetValue(in_variable)}};
5555
}
5656

@@ -215,7 +215,7 @@ std::unordered_map<Variable, Value> InferValuesImpl(
215215
PADDLE_ENFORCE_EQ(
216216
ret.emplace(out_msg_in_indexes.value()->at(i), value).second,
217217
true,
218-
phi::errors::AlreadyExists([&]() {
218+
::common::errors::AlreadyExists([&]() {
219219
std::ostringstream oss;
220220
oss << "Failed to insert the variable '"
221221
<< "out_msg_in_indexes.value()->at(" << i
@@ -229,7 +229,7 @@ std::unordered_map<Variable, Value> InferValuesImpl(
229229
if (out_index.has_value()) {
230230
PADDLE_ENFORCE_EQ(ret.emplace(out_index.value(), value).second,
231231
true,
232-
phi::errors::AlreadyExists([&]() {
232+
::common::errors::AlreadyExists([&]() {
233233
std::ostringstream oss;
234234
oss << "Failed to insert the variable '"
235235
<< "out_index.value()"
@@ -306,7 +306,9 @@ void SolveEquations(
306306
tValueInferSuccess<bool> has_unique_value =
307307
MergeInferedValuesIntoCtx(function, ctx);
308308
PADDLE_ENFORCE_EQ(
309-
has_unique_value.value(), true, phi::errors::InvalidArgument([&]() {
309+
has_unique_value.value(),
310+
true,
311+
::common::errors::InvalidArgument([&]() {
310312
std::ostringstream oss;
311313
oss << "Failed to merge inferred values into the context for "
312314
"function '"

paddle/cinn/backends/codegen_device_util.cc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,24 @@ void detail::CollectBucketStrategyHostFunctionVisitor::ProcessLoweredFunc(
262262
ir::CallType::Extern,
263263
ir::FunctionRef(),
264264
0);
265+
266+
// create memset calls for temp_spaces if needed
267+
std::vector<ir::Expr> call_kernel_stmts;
268+
for (auto &temp_space : func_node->temp_spaces) {
269+
if (temp_space.need_zero_init()) {
270+
ir::Expr size = common::cast(temp_space.size(), common::UInt(64));
271+
ir::Expr call_get_arg =
272+
lang::CallExtern(runtime::intrinsic::get_item_in_cuda_kernel_args,
273+
{kernel_args_, ir::Expr(temp_space.arg_idx())});
274+
ir::Expr call_memset = lang::CallExtern(
275+
runtime::intrinsic::call_cuda_memset,
276+
{call_get_arg, ir::Expr(1), ir::Expr(0), size, kernel_stream_});
277+
call_kernel_stmts.push_back(call_memset);
278+
}
279+
}
280+
call_kernel_stmts.push_back(call_extern_api);
281+
call_extern_api = ir::Block::Make(call_kernel_stmts);
282+
265283
if (buckets_.empty()) {
266284
buckets_.emplace_back(ir::IfThenElse::Make(predicate, call_extern_api));
267285
} else {
@@ -270,6 +288,26 @@ void detail::CollectBucketStrategyHostFunctionVisitor::ProcessLoweredFunc(
270288
buckets_.emplace_back(
271289
ir::IfThenElse::Make(predicate, call_extern_api, false_expr));
272290
}
291+
292+
// create infer shape calls for temp_spaces
293+
std::vector<ir::Expr> temp_space_infer_shape_stmts;
294+
for (int i = 0; i < func_node->temp_spaces.size(); ++i) {
295+
ir::Var tensor_shape_args(TENSOR_SHAPE_ARGS, type_of<int64_t **>());
296+
ir::Expr size =
297+
common::cast(func_node->temp_spaces[i].size(), common::Int(64));
298+
ir::Expr call_set_value =
299+
lang::CallExtern(runtime::intrinsic::infer_shape_set_value,
300+
{ir::Expr(func_node->num_output_tensors + i),
301+
ir::Expr(0),
302+
size,
303+
tensor_shape_args});
304+
temp_space_infer_shape_stmts.push_back(call_set_value);
305+
}
306+
if (!temp_space_infer_shape_stmts.empty()) {
307+
ir::Expr if_body = ir::Block::Make(temp_space_infer_shape_stmts);
308+
temp_space_infer_shape_body_ =
309+
ir::IfThenElse::Make(predicate, if_body, temp_space_infer_shape_body_);
310+
}
273311
}
274312

275313
void detail::CollectBucketStrategyHostFunctionVisitor::ProcessArgs(

paddle/cinn/backends/codegen_device_util.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,9 @@ struct CollectBucketStrategyHostFunctionVisitor
280280
infer_shape_func_body_stmts.insert(
281281
infer_shape_func_body_stmts.end(),
282282
op->infer_shape_func.as_lowered_func()->body);
283+
if (temp_space_infer_shape_body_.defined()) {
284+
infer_shape_func_body_stmts.push_back(temp_space_infer_shape_body_);
285+
}
283286

284287
std::vector<ir::Argument> infer_shape_arguments = {
285288
ir::Argument(kernel_args_, ir::Argument::IO::kOutput),
@@ -307,6 +310,7 @@ struct CollectBucketStrategyHostFunctionVisitor
307310
private:
308311
std::vector<ir::Expr> buckets_;
309312
std::vector<ir::Expr> arg_defs_;
313+
ir::Expr temp_space_infer_shape_body_;
310314

311315
ir::Var kernel_args_;
312316
ir::Var kernel_args_num_;

paddle/cinn/common/const_fold.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,5 +71,41 @@ inline std::optional<ir::Expr> TryConstFold<ir::Mul>(ir::Expr a, ir::Expr b) {
7171
return std::nullopt;
7272
}
7373

74+
template <>
75+
inline std::optional<ir::Expr> TryConstFold<ir::Div>(ir::Expr a, ir::Expr b) {
76+
const ir::IntImm* pa = a.As<ir::IntImm>();
77+
const ir::IntImm* pb = b.As<ir::IntImm>();
78+
const auto& rtype = a.type();
79+
if (pa && pb) {
80+
int64_t res = pa->value / pb->value;
81+
return cinn::common::make_shared<ir::IntImm>(rtype, res);
82+
}
83+
if (pa) {
84+
if (pa->value == 0) return a;
85+
}
86+
if (pb) {
87+
if (pb->value == 1) return a;
88+
}
89+
return std::nullopt;
90+
}
91+
92+
template <>
93+
inline std::optional<ir::Expr> TryConstFold<ir::Mod>(ir::Expr a, ir::Expr b) {
94+
const ir::IntImm* pa = a.As<ir::IntImm>();
95+
const ir::IntImm* pb = b.As<ir::IntImm>();
96+
const auto& rtype = a.type();
97+
if (pa && pb) {
98+
int64_t res = pa->value % pb->value;
99+
return cinn::common::make_shared<ir::IntImm>(rtype, res);
100+
}
101+
if (pa) {
102+
if (pa->value == 0) return a;
103+
}
104+
if (pb) {
105+
if (pb->value == 1) return ir::Zero(rtype);
106+
}
107+
return std::nullopt;
108+
}
109+
74110
} // namespace common
75111
} // namespace cinn

paddle/cinn/common/dim_expr_converter.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ struct DimExprConverterWithSymbolBindings::
153153
return inputs_[input_idx]->sym_shape[input_dim_idx]->GetDimExpr();
154154
}
155155
// for data binding [S0, a, b], inputs[a] is Tensor A, return A(b)
156-
return inputs_[input_idx](cinn::ir::Expr(input_dim_idx));
156+
return ir::Cast::Make(cinn::common::I64(),
157+
inputs_[input_idx](cinn::ir::Expr(input_dim_idx)));
157158
}
158159

159160
DimExprToIrExprVisitorWithSymbolBinding(

0 commit comments

Comments
 (0)