Skip to content

Commit 103b30e

Browse files
authored
Merge branch 'develop' into is_same_shape
2 parents d16e02d + 4e3d222 commit 103b30e

File tree

2,032 files changed

+72149
-34416
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,032 files changed

+72149
-34416
lines changed

.gitignore

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ paddle/fluid/API_PR.spec
55
paddle/fluid/eager/api/generated/*
66
paddle/fluid/op_use_default_grad_maker_DEV.spec
77
paddle/fluid/op_use_default_grad_maker_PR.spec
8-
paddle/fluid/operators/ops_extra_info.h
8+
paddle/fluid/operators/ops_extra_info.cc
99
paddle/phi/api/backward/backward_api.h
1010
paddle/phi/api/backward/sparse_bw_api.h
1111
paddle/phi/api/include/api.h
@@ -38,6 +38,7 @@ build_doc/
3838
CMakeSettings.json
3939
Makefile
4040
.test_env/
41+
.cache/
4142
third_party/
4243

4344
*~
@@ -65,9 +66,14 @@ paddle/infrt/dialect/pd/common/pd_ops_info.h
6566
paddle/infrt/tests/dialect/Output
6667
paddle/infrt/tests/lit.cfg.py
6768
paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.cc
68-
paddle/fluid/pybind/eager_final_state_op_function.cc
69+
paddle/fluid/pybind/eager_op_function.cc
6970

7071
# these files (directories) are generated before build system generation
7172
paddle/fluid/operators/generated_op.cc
7273
paddle/phi/ops/compat/generated_sig.cc
7374
paddle/phi/api/yaml/parsed_apis/
75+
python/paddle/utils/code_gen/
76+
paddle/fluid/pybind/tmp_eager_op_function_impl.h
77+
paddle/fluid/pybind/eager_op_function_impl.h
78+
paddle/fluid/pybind/eager_op_function_impl.h
79+
paddle/fluid/pybind/op_function_impl.h

.pre-commit-config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ repos:
6464
(?x)^(
6565
paddle/utils/.*
6666
)$
67+
- repo: local
68+
hooks:
69+
- id: auto-generate-cmakelists
70+
name: auto-generate-cmakelists
71+
entry: bash ./tools/gen_ut_cmakelists.hook
72+
language: system
73+
files: testslist.csv$
6774
- repo: https://github.com/cheshirekow/cmake-format-precommit
6875
rev: v0.6.13
6976
hooks:

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,18 @@ if(WIN32)
364364
endif()
365365
endif()
366366

367+
if(NOT WITH_TESTING AND WITH_MULTINODE_TESTING)
368+
message(
369+
WARNING
370+
"Disable WITH_MULTINODE_TESTING when compiling without TESTING. Force WITH_MULTINODE_TESTING=OFF."
371+
)
372+
set(WITH_MULTINODE_TESTING
373+
OFF
374+
CACHE STRING
375+
"Disable WITH_MULTINODE_TESTING when compiling without TESTING"
376+
FORCE)
377+
endif()
378+
367379
if(NOT WITH_GPU AND WITH_NCCL)
368380
message(
369381
WARNING "Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF.")

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Now our developers can acquire Tesla V100 online computing resources for free. I
5151

5252
- **High-Performance Inference Engines for Comprehensive Deployment Environments**
5353

54-
PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html): Native inference library for high-performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT environments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini-apps. Furthermore, by great amounts of optimization with leading hardware in each scenario, Paddle inference engines outperform most of the other mainstream frameworks.
54+
PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/master/guides/introduction/index_intro.html): Native inference library for high-performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT environments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini-apps. Furthermore, by great amounts of optimization with leading hardware in each scenario, Paddle inference engines outperform most of the other mainstream frameworks.
5555

5656

5757
- **Industry-Oriented Models and Libraries with Open Source Repositories**

README_cn.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型
4949

5050
- **支持多端多平台的高性能推理部署工具**
5151

52-
飞桨不仅广泛兼容第三方开源框架训练的模型部署,并且为不同的场景的生产环境提供了完备的推理引擎,包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html),面向分布式、流水线生产环境下自动上云、A/B测试等高阶功能的服务化推理框架 [Paddle Serving](https://github.com/PaddlePaddle/Serving),针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite),以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时,透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
52+
飞桨不仅广泛兼容第三方开源框架训练的模型部署,并且为不同的场景的生产环境提供了完备的推理引擎,包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://www.paddlepaddle.org.cn/inference/product_introduction/inference_intro.html),面向分布式、流水线生产环境下自动上云、A/B测试等高阶功能的服务化推理框架 [Paddle Serving](https://github.com/PaddlePaddle/Serving),针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite),以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时,透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
5353

5454

5555
- **面向产业应用,开源开放覆盖多领域的工业级模型库。**

cmake/external/brpc.cmake

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,8 @@ set(prefix_path
4545
ExternalProject_Add(
4646
extern_brpc
4747
${EXTERNAL_PROJECT_LOG_ARGS}
48-
# TODO(gongwb): change to de newst repo when they changed
49-
GIT_REPOSITORY "https://github.com/wangjiawei04/brpc"
50-
GIT_TAG "e203afb794caf027da0f1e0776443e7d20c0c28e"
48+
GIT_REPOSITORY "https://github.com/apache/incubator-brpc"
49+
GIT_TAG 1.2.0
5150
PREFIX ${BRPC_PREFIX_DIR}
5251
UPDATE_COMMAND ""
5352
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@@ -60,8 +59,8 @@ ExternalProject_Add(
6059
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
6160
-DCMAKE_PREFIX_PATH=${prefix_path}
6261
-DWITH_GLOG=ON
63-
-DIOBUF_WITH_HUGE_BLOCK=ON
64-
-DBRPC_WITH_RDMA=${WITH_BRPC_RDMA}
62+
-DBUILD_BRPC_TOOLS=ON
63+
-DBUILD_SHARED_LIBS=ON
6564
${EXTERNAL_OPTIONAL_ARGS}
6665
LIST_SEPARATOR |
6766
CMAKE_CACHE_ARGS

cmake/external/xpu.cmake

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
1010
if(NOT DEFINED XPU_BASE_URL)
1111
set(XPU_BASE_URL_WITHOUT_DATE
1212
"https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
13-
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220810")
13+
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220831")
1414
else()
1515
set(XPU_BASE_URL "${XPU_BASE_URL}")
1616
endif()
@@ -19,7 +19,7 @@ endif()
1919
if(NOT DEFINED XPU_XDNN_BASE_URL)
2020
set(XPU_XDNN_BASE_URL_WITHOUT_DATE
2121
"https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
22-
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220810")
22+
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220831")
2323
else()
2424
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
2525
endif()
@@ -52,7 +52,7 @@ elseif(WITH_BDCENTOS)
5252
elseif(WITH_UBUNTU)
5353
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
5454
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
55-
set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
55+
set(XPU_XCCL_DIR_NAME "xccl-ubuntu_x86_64")
5656
# ubuntu and centos: use output by XDNN API team
5757
set(XPU_XDNN_URL
5858
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
@@ -68,7 +68,7 @@ elseif(WITH_CENTOS)
6868
else()
6969
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
7070
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
71-
set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
71+
set(XPU_XCCL_DIR_NAME "xccl-ubuntu_x86_64")
7272
# default: use output by XDNN API team
7373
set(XPU_XDNN_URL
7474
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"

paddle/fluid/distributed/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
add_subdirectory(auto_parallel)
12
add_subdirectory(collective)
23
add_subdirectory(store)
34
if(WITH_PYTHON)
@@ -47,4 +48,3 @@ add_subdirectory(ps)
4748
add_subdirectory(test)
4849
add_subdirectory(index_dataset)
4950
add_subdirectory(fleet_executor)
50-
add_subdirectory(auto_parallel)
Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,23 @@
1+
proto_library(auto_parallel_proto SRCS auto_parallel.proto)
2+
13
cc_library(
24
device_mesh
35
SRCS device_mesh.cc
4-
DEPS auto_parallel_proto)
5-
cc_test(
6-
device_mesh_test
7-
SRCS device_mesh_test.cc
8-
DEPS device_mesh)
6+
DEPS auto_parallel_proto phi_enforce)
97

108
cc_library(
119
process_mesh
1210
SRCS process_mesh.cc
13-
DEPS auto_parallel_proto)
14-
cc_test(
15-
process_mesh_test
16-
SRCS process_mesh_test.cc
17-
DEPS process_mesh)
11+
DEPS auto_parallel_proto phi_enforce)
1812

1913
cc_library(
2014
dist_attr
2115
SRCS dist_attr.cc
22-
DEPS process_mesh auto_parallel_proto proto_desc)
23-
cc_test(
24-
dist_attr_test
25-
SRCS dist_attr_test.cc
26-
DEPS dist_attr)
16+
DEPS process_mesh auto_parallel_proto proto_desc phi_enforce)
2717

2818
cc_library(
2919
dist_mapper
3020
SRCS dist_mapper.cc
31-
DEPS device_mesh auto_parallel_proto)
32-
cc_test(
33-
dist_mapper_test
34-
SRCS dist_mapper_test.cc
35-
DEPS dist_mapper)
21+
DEPS device_mesh auto_parallel_proto phi_enforce)
3622

37-
proto_library(auto_parallel_proto SRCS auto_parallel.proto)
23+
cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper)

paddle/fluid/distributed/auto_parallel/auto_parallel.proto

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,59 @@ message ProcessMeshProto {
3030

3131
}
3232

33+
// This distributed attribute describes how to distribute the corresponding tensor,
34+
// and store any other information needed by auto parallel.
35+
message TensorDistAttrProto {
36+
// The process mesh where a tensor is distributed.
37+
optional ProcessMeshProto process_mesh = 1;
38+
39+
// The length of dims_mapping is same as the length of the tensor shape.
40+
// The i-th dimension of the tensor will be sharded by the dims_mapping[i]-th dimension
41+
// of the above process mesh. If dims_mapping[i] is -1, the i-th dimension of the tensor
42+
// will not be sharded. For example, given a tensor shape [2, 6, 12], a process mesh
43+
// shape [2, 3] and a dims_mapping [-1, 1, 0], each sharded tensor will have a shape [2, 2, 6].
44+
repeated int64 dims_mapping = 2;
45+
46+
// The batch dimension of the corresponding tensor.
47+
optional int64 batch_dim = 3;
48+
49+
// If the dynamic_dims[i] is True, the i-th dimension of the corresponding tensor
50+
// is dynamic changed. Otherwise, the i-th dimension of the tensor is static determined.
51+
repeated bool dynamic_dims = 4;
52+
}
53+
54+
// This distributed attribute describes how to distribute the corresponding operator,
55+
// and store any other information needed by auto parallel.
56+
message OperatorDistAttrProto {
57+
message TensorDistAttrMappingEntryProto {
58+
optional string name = 1;
59+
optional TensorDistAttrProto tensor_dist_attr = 2;
60+
}
61+
// The key of this map is the input tensor name and the value is the distributed attribute
62+
// of the input tensor required by this corresponding operator.
63+
// The distributed attribute of the actual tensor may be not the same as that within
64+
// the distributed attribute of the operator.
65+
repeated TensorDistAttrMappingEntryProto input_dist_attrs = 1;
66+
67+
// The key of this map is the output tensor name and the value is the distributed attribute
68+
// of the output tensor required by this corresponding operator.
69+
// The distributed attribute of the actual tensor may be not the same as that within
70+
// the distributed attribute of the operator.
71+
repeated TensorDistAttrMappingEntryProto output_dist_attrs = 2;
72+
73+
// The process mesh where a op is distributed.
74+
optional ProcessMeshProto process_mesh = 3;
75+
76+
// A operator ideally has a distributed operator which may have multiple distributed implementations.
77+
// This filed is usually same as the operator type. However, some operators such as the element-wise operators
78+
// may shared the same distributed operator, the field is use for this scenario.
79+
optional string impl_type = 4;
80+
81+
// This field tells which distributed implementations of this corresponding operator
82+
// will be selected for the actual computation.
83+
optional int64 impl_idx = 5;
84+
}
85+
3386
// This proto describes the capability of one device such as the computation and memory.
3487
message DeviceCapabilityProto {
3588
optional double single_precision_flops = 1;

0 commit comments

Comments
 (0)