Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
193 commits
Select commit Hold shift + click to select a range
77d7638
[DimExpr] DimExpr support hash (#60471)
jiahy0825 Jan 2, 2024
8fcf35b
open warning with `paddle.utils.deprecated` (#60458)
ooooo-create Jan 2, 2024
a08580e
[AutoParallel] Auto Trans PP to VPP (#60467)
zhaoyinglia Jan 2, 2024
b56d140
【PIR OpTest Fix No.23】 fix test_distribute_fpn_proposals_op (#60335)
xingmingyyj Jan 2, 2024
7041276
fix test_lookup_table_v2_bf16_op (#60332)
xingmingyyj Jan 2, 2024
1761931
Fix shape error in combined-indexing setitem (#60447)
zoooo0820 Jan 2, 2024
c0d6d7d
[auto parallel] Add pp lazy init, bug fix for xavier (#60441)
FeixLiu Jan 2, 2024
df4ca85
[PIR] add slice_array_dense api (#60433)
zhangbo9674 Jan 2, 2024
7c7446f
Set value with scalar (#60452)
zoooo0820 Jan 2, 2024
cfad7d2
[PIR]Support custom op in PIR (#59790)
YuanRisheng Jan 2, 2024
5e2a3db
[Prim][PIR] support roll, gather, scatter, scatter_nd_add op backward…
kevincheng2 Jan 2, 2024
8550d4c
[PIR] delete dense_tensor mem_desc_ (#60024)
wanghuancoder Jan 2, 2024
1aa6851
[PIR] Complement op defs (#60475)
kangguangli Jan 2, 2024
617b8ad
[pir]Supporting constant_folding_pass for train (#60355)
zhangyuqin1998 Jan 2, 2024
fea90ed
[Dynamic Shape] Fuse shape ops into generate shape op pass (#60490)
jiahy0825 Jan 2, 2024
58ca933
Fix _hiden_size to _hidden_size (#60485)
co63oc Jan 2, 2024
5376caa
[DimExpr] Add substitute DimExpr util (#60493)
jiahy0825 Jan 2, 2024
290bf41
[xpu]add sine_pos fuse pass and sine_pos xpu kernel (#60025)
NeroLoh Jan 2, 2024
bd29981
add split with variable in factors and rewrite vectorize,unroll,bind …
Courtesy-Xs Jan 2, 2024
8b2b953
[CodeStyle] Fix regression of Ruff in sot (#60483)
SigureMo Jan 2, 2024
48b7279
support cast op from FP32 to low precision (#60385)
heavyrain-lzy Jan 3, 2024
40fa8bc
test=document_fix (#60399)
tianshuo78520a Jan 3, 2024
619ca11
[XPU] refine flash attention ut (#60474)
houj04 Jan 3, 2024
c7a3f63
[Inference] support collect shape in sub block (#60451)
yuanlehome Jan 3, 2024
ddd29d2
fix process mesh incorrect set in converter (#60504)
LiYuRio Jan 3, 2024
be8bc1e
【CMake opt No.13】Remove CINN DEPS in test/cpp/pir/shape_dialect/CMake…
Liyulingyue Jan 3, 2024
deb5397
【pir】 add tensorarray op createarrylike, add_n (#60460)
xiaoguoguo626807 Jan 3, 2024
698bb42
Add align iter space tactic (#60498)
BiynXu Jan 3, 2024
54b95ae
[Dynamic Shape] Add helper function MakeGenerateShapeOpAttribute (#60…
jiahy0825 Jan 3, 2024
99af9f7
[Prim][PIR] Set prim gflag for pure cpp (#60505)
cyber-pioneer Jan 3, 2024
8e280ea
[PIR] Refine and fix pir exe (#60443)
zhangbo9674 Jan 3, 2024
4b22e3e
update 2023 security advisory, test=document_fix (#60527)
VigiZhang Jan 3, 2024
d890019
[Inference] refine common/*.h for inference lib (#60513)
yuanlehome Jan 3, 2024
5d01382
【complex op】No.19 add complex support for triangular_solve (#59529)
zbt78 Jan 3, 2024
2ad9e24
fix reshard dist_attr (#60535)
LiYuRio Jan 3, 2024
353cb27
【auto parallel】剔除切分推导相关的头文件对proto 的依赖 (#60543)
liuzhenhai93 Jan 4, 2024
6b2d74c
[PIR] Support Operation::Clone Interface (#60536)
Aurelius84 Jan 4, 2024
a05f195
[Dynamic Shape] Add FullyInsertBroadcastPass and Broadcast Op (#60511)
jiahy0825 Jan 4, 2024
488bd17
Fix OpTranslatorTest name (#60518)
xingmingyyj Jan 4, 2024
d307890
[PIR] migrate DataFeeder into pir (#60434)
MarioLulab Jan 4, 2024
e397b29
【PIR API adaptor No.90,92】Migrate some ops into pir (#59801)
longranger2 Jan 4, 2024
1b26966
[DimExpr] Convert Broadcast to BroadcastTree (#60440)
jiahy0825 Jan 4, 2024
aacdc4d
[Dynamic Shape] Erase expand (#60525)
jiahy0825 Jan 4, 2024
193fea3
[inference] Support wint4 groupwise with cutlass gemm (#60422)
freeliuzc Jan 4, 2024
7b616c4
simplify extent of loop after fuse and add corresponding test case (#…
Courtesy-Xs Jan 4, 2024
cd0bd57
Merge remote-tracking branch 'upstream/develop' into auto_mixed_preci…
Xinyu302 Jan 4, 2024
f460723
fix bug of put_along_axis (#60551)
YibinLiu666 Jan 4, 2024
f07846a
remove clearPass to allow custom device use fusion under fp16 (#60541)
engineer1109 Jan 4, 2024
5f1727b
fix fleetutil get_online_pass_interval bug2; test=develop (#60544)
danleifeng Jan 4, 2024
9018371
fix vs2017 limit (#60528)
xuxinyi389 Jan 4, 2024
08ed2a5
【Hackathon 5th No.20】为 Paddle 新增 Exponential 和 Gamma API (#57899)
MayYouBeProsperous Jan 4, 2024
9c24a2a
[CINN] Add IntrinsicOps into ir_codes_collector (#60556)
zhhsplendid Jan 4, 2024
8ed3d18
【auto parallel】custom op spmd rule register (#60509)
liuzhenhai93 Jan 4, 2024
f84fbdd
【AutoParallel】Add master grad in AMP-O2 of AutoParallel (#59987)
heavyrain-lzy Jan 4, 2024
6dfb15e
[Dy2St] Fix `NameloadJstTransformer` missing transform call kwargs (#…
SigureMo Jan 4, 2024
0ebae80
cinn(backends): generate infer shape kernel to infer shape of output …
6clc Jan 4, 2024
a6cd3bd
fix tensor math method inplace converter (#60546)
LiYuRio Jan 4, 2024
51c869e
[xpu]Add vis_decoder_attention_xpu_pass && modify qkv_attention_xpu_k…
TR666 Jan 4, 2024
0033033
[Prim][PIR] support abs, instance_norm op backward in prim pir (#60444)
kevincheng2 Jan 4, 2024
c2dd202
[PIR] remove log simply name mechnism from phi to common. (#60507)
winter-wang Jan 4, 2024
1d59851
[InferSymbolicShape] Delete redundent value_id_to_shapeordata_ (#60554)
jiahy0825 Jan 4, 2024
09544f6
【Hackathon 5th No.25】add gammaln api (#60553)
GreatV Jan 5, 2024
c3106c4
fix (#60570)
zhangbo9674 Jan 5, 2024
bc13117
[CINN] Add tile tactic and bind cuda tactic (#60534)
BiynXu Jan 5, 2024
58689d3
【PIR OpTest Fix No.8】 fix test_shuffle_batch_op (#59631)
xingmingyyj Jan 5, 2024
2033381
【PIR OpTest Fix No.14】 fix test_nce (#60255)
xingmingyyj Jan 5, 2024
1874d1c
【PIR OpTest Fix No.19】 fix test_ftrl_op (#60329)
xingmingyyj Jan 5, 2024
a9712d1
[auto parallel] Lazy init for MP. Add reshard infer shape. (#60563)
FeixLiu Jan 5, 2024
75e62a2
[PIR] Add unittest for Operation::Clone and Group::Clone (#60577)
Aurelius84 Jan 5, 2024
488f367
[PIR] dce pass disable custom op (#60578)
yuanlehome Jan 5, 2024
2b86637
[Inference] Fix bug of RunWithExternalStream API in new executor (#60…
ming1753 Jan 5, 2024
a11aabd
Resubmit PR-58859 (#60310)
eee4017 Jan 5, 2024
116c892
tensor_array slice in PIR (#60503)
zoooo0820 Jan 5, 2024
57feb0a
Set DistModel state_dict keys to structure_names (#60478)
pangengzheng Jan 5, 2024
ed6f32d
fix sm75 build bug (#60583)
freeliuzc Jan 5, 2024
53ca3e8
replace LOG(INFO) with VLOG(6)
Xinyu302 Jan 5, 2024
ee3d2fc
Add CanProveDivisible for symbolic calculation (#60572)
Courtesy-Xs Jan 6, 2024
7c7c5b1
[PIR][DynamicShape] make shape pass default and fix some bugs (#60548)
lanxianghit Jan 6, 2024
5e4f499
Fix words (#60603)
co63oc Jan 8, 2024
0cb8368
【auto parallel】custom op use spmd rule (#60571)
liuzhenhai93 Jan 8, 2024
be98374
[auto parallel] add lazy init ut to llama (#60585)
FeixLiu Jan 8, 2024
1646a83
【pir】 modify array_write and array_read vjp , add a simple while with…
xiaoguoguo626807 Jan 8, 2024
385ec43
[Prim][PIR] add leaky_relu, sigmoid, instance_norm op forward prim (…
kevincheng2 Jan 8, 2024
e2b4247
[CINN]Add bucket context (#60549)
BiynXu Jan 8, 2024
41679e4
Add CUDNNv8 max pooling (#59413)
Tom-Zheng Jan 8, 2024
fa1f901
update lbfgs to avoid the randomness caused by paddle.dot() temporari…
lijialin03 Jan 8, 2024
5df9cdf
set_pir_tests_properties for some tests (#60401)
xingmingyyj Jan 8, 2024
5bb661d
Add tests to whitelist (#60522)
xingmingyyj Jan 8, 2024
54ca31c
fix double grad without convert inplace (#60614)
LiYuRio Jan 8, 2024
311c0ea
fix fleetutil get_online_pass_interval bug3 (#60615)
danleifeng Jan 8, 2024
21f0c78
[PIR][DynamicShape] Add an example for broadcast in dynamic shape inf…
lanxianghit Jan 8, 2024
b180800
fix_convert_all_blocks (#60613)
Wangzheee Jan 9, 2024
7a363e7
[Paddle-TRT] support set_value dynamic shape (#60508)
zhink Jan 9, 2024
fbb5801
fix (#60625)
zhangbo9674 Jan 9, 2024
47ecd81
[PIR] Support Region Clone in Operation::Clone (#60590)
huangjiyi Jan 9, 2024
9982819
deg2rad test passed (#60619)
changeyoung98 Jan 9, 2024
640c759
[PIR+CINN]Fix Pool2d Variant Attibute for kernel_size (#60623)
Aurelius84 Jan 9, 2024
33cb1be
[SOT] move_gpu_pinned_to_gpu (#60395)
feifei-111 Jan 9, 2024
fb4f0ef
PIR API adaptor No.35、40】 Migrate paddle.nn.ChannelShuffle/ClipGradBy…
fsczz Jan 9, 2024
6d85e70
add param name for dist_tensor parameter (#60574)
LiYuRio Jan 9, 2024
0dfba1a
Fix (#60631)
co63oc Jan 9, 2024
b80ece9
[PIR] Reify InferSymbolicShapeInterface (#60438)
zhangboSJTU Jan 9, 2024
0e13ae0
[Dynamic Shape] Remove ShapeBroadcastOp redundant codes (#60609)
jiahy0825 Jan 9, 2024
7eb6b0d
[Dy2St] fix `test_grad` in PIR mode (#60621)
SigureMo Jan 9, 2024
620e371
reconstruct llama ci cases (#60637)
haohongxiang Jan 9, 2024
504a590
【AutoParallel】Unify the fp16 and bf16 in auto-parallel (#60514)
heavyrain-lzy Jan 9, 2024
a98f715
[Dynamic Shape] Add SplitGenerateShapeIntoShapeOpsPass (#60624)
jiahy0825 Jan 9, 2024
6935d75
update pdsa-2023-019, test=document_fix (#60646)
VigiZhang Jan 9, 2024
b578350
[SOT] sot export test files (#60547)
feifei-111 Jan 9, 2024
10a11d8
Improve the performence of put_along_axis (#60618)
YibinLiu666 Jan 9, 2024
5e35868
[AutoParallel] Fit vpp for gradient_merge pass (#60560)
AndSonder Jan 9, 2024
44062f5
add test_semi_auto_parallel_hybrid_strategy (#60537)
Liujie0926 Jan 9, 2024
b1daab4
[PIR]Open uts for AdaptiveAvgPool3D (#60636)
0x45f Jan 9, 2024
958e281
test (#60654)
risemeup1 Jan 9, 2024
114f0b6
[CINN] Add OptimizeReductionTactic (#60661)
BiynXu Jan 9, 2024
a159cd1
[Paddle-Trt]update set_value cmakelist (#60664)
bukejiyu Jan 9, 2024
3324c9d
[auto parallel] fix reshape infer shape (#60632)
FeixLiu Jan 10, 2024
ce50c3d
[CINN+PIR]Clean Old GroupScheduler logic and switch into new_group_sc…
Aurelius84 Jan 10, 2024
7114658
[CINN]Fix HasDynamicShape Bug while Type is NULL (#60658)
Aurelius84 Jan 10, 2024
1b5c8f3
[PIR] pir onednn support legact istruction and lrn (#60502)
wanghuancoder Jan 10, 2024
452fe18
c_softmax_with_cross_entropy support bf16 for xpu (#60472)
zhangyk0314 Jan 10, 2024
8140e6b
enable custom device to use silu_fuse_pass (#60595)
engineer1109 Jan 10, 2024
b8b175c
[XPU] add empty_like op and test, update XHPC to 20240105 (#60617)
dynamicheart Jan 10, 2024
25a7b2b
[XPU] update XHPC date and refine FA ut (#60598)
houj04 Jan 10, 2024
fce77e3
correct adamw bf16 unit test and the way to get data type (#60565)
runzhech Jan 10, 2024
c767519
Fix some PADDLE_THROW error type and change test cases (#60487)
zrr1999 Jan 10, 2024
b1f4d5b
as_complex as_real check_grad (#60666)
changeyoung98 Jan 10, 2024
bba58af
[Fix Bug] Fix Bugs of Two Pass (#60626)
jiahy0825 Jan 10, 2024
b622e96
【Hackathon 5th No.34】为 Paddle 新增 bitwise_right_shift / bitwise_right_…
cocoshe Jan 10, 2024
da5399a
This PR enable offset of generator for custom device. (#60616)
zhaohaixu Jan 10, 2024
3bcff9e
[SOT] Convert dtype to `DataType` in PIR mode (#60627)
SigureMo Jan 10, 2024
35d445b
[PIR] Change output to block_arg from copy to a shared for the execut…
zhangbo9674 Jan 10, 2024
4dcb045
【auto parallel】custom op spmd infer add args check (#60633)
liuzhenhai93 Jan 10, 2024
a49fcba
[PIR] Open PIR flag for test_ifelse (#60685)
chen2016013 Jan 10, 2024
06fdeeb
[CIN+PIR]Fix SplitOpPattern Bug in pd_to_cinn_pass (#60669)
Aurelius84 Jan 10, 2024
da91813
fix uncontiguous tensor resize bug (#60684)
wanghuancoder Jan 10, 2024
97e4bdf
[PIR]Support inplace custom op in pir (#60529)
YuanRisheng Jan 10, 2024
233d3d7
fix (#60634)
zhangbo9674 Jan 10, 2024
a2c1580
[Docs] Update latest release version in README (#60691)
SigureMo Jan 10, 2024
2d9d46a
[CINN] Refine cmake for pass in cinn (#60683)
zyfncg Jan 10, 2024
c9e0afd
[PIR]Open uts for PReLU (#60645)
0x45f Jan 10, 2024
b9f3565
[PIR]Open uts for ReLU6 (#60650)
0x45f Jan 10, 2024
da71db0
[PIR]Open uts for RReLU (#60660)
0x45f Jan 10, 2024
80d7a86
[NPU] fix storage_properties type mismatch with OneDNN and NPU (#60566)
qili93 Jan 10, 2024
3611e26
fix ttfnet_darknet53_1x_coco in pir mode (#60663)
kangguangli Jan 10, 2024
f177fa6
[auto parallel] shard tensor stop gradient support (#60699)
FeixLiu Jan 10, 2024
bcefbaf
[PIR][DynamicShape] Polish some codes (#60651)
lanxianghit Jan 10, 2024
a1ef22c
[PIR] fix onednn double reg (#60720)
wanghuancoder Jan 11, 2024
57fff3a
【pir】modify add_n in while use blockarg instead of input value (#60668)
xiaoguoguo626807 Jan 11, 2024
55558f1
[PIR] Open test_case ut (#60721)
zhangbo9674 Jan 11, 2024
3179fdf
[PIR] rename data_layout (#60678)
wanghuancoder Jan 11, 2024
f178fb4
[xpu]: check op is null (#60656)
gitliuyf Jan 11, 2024
3184c3c
【Hackathon 5th No.1】 为 Paddle 新增 copysign API (#57785)
cocoshe Jan 11, 2024
2c56dd4
rms_norm_infer_spmd (#60709)
liuzhenhai93 Jan 11, 2024
bed33c3
[PIR]Open more tests for bernoulli and celu (#60706)
changeyoung98 Jan 11, 2024
78eec98
[PIR]Open uts for scatter_nd_add (#60698)
0x45f Jan 11, 2024
f555263
[PIR]Open uts for sinh (#60714)
0x45f Jan 11, 2024
7c1e662
[PIR]Open uts for Softshrink and Softsign (#60716)
0x45f Jan 11, 2024
04ab9a6
[PIR] polish the ir_mapping implimentation. (#60675)
winter-wang Jan 11, 2024
ec174f3
[PIR] fix onednn layout transform yaml format (#60680)
wanghuancoder Jan 11, 2024
2549c61
【CINN】Complete error handler mechanism of dynamic schedule (#60718)
Courtesy-Xs Jan 11, 2024
ccf7bd4
fix windows C++17 bug (#60736)
risemeup1 Jan 11, 2024
839b682
[XPU] fc pass and delete pass nodes check (#60314)
gitliuyf Jan 11, 2024
92343a0
fix_local_windows_compile (#60682)
xuxinyi389 Jan 11, 2024
f8eff51
[PIR] fix onednn dialect name (#60665)
wanghuancoder Jan 11, 2024
c173503
【pir】add tesnor to array kernel etc (#60703)
xiaoguoguo626807 Jan 11, 2024
0ac9c29
Fix defition definition (#60679)
co63oc Jan 11, 2024
aef9d6d
cholesky and cholesky_solve tests (#60726)
changeyoung98 Jan 11, 2024
2a5903f
[PIR]Open uts for searchsorted (#60700)
0x45f Jan 11, 2024
098cb1f
[PIR]Open uts for selu (#60702)
0x45f Jan 11, 2024
0aa416d
[PIR]Open uts for sequence_mask (#60704)
0x45f Jan 11, 2024
fd53653
[PIR] adjust pir pass log printing (#60723)
yuanlehome Jan 11, 2024
9c0f417
Fix Throughtput Throughput (#60741)
co63oc Jan 11, 2024
bbb7497
please last md (#60749)
wenxiaohahaha Jan 11, 2024
bcd5e37
[CINN+PIR]Fix Fetch XShape Variable logic (#60722)
Aurelius84 Jan 11, 2024
a576356
[PIR][DynamicShape] Remove redundant code for shapeAnalysis and shape…
lanxianghit Jan 11, 2024
c62a554
【PIR Dist Op Reg No.1】 reg push_sparse_v2 (#60473)
enkilee Jan 11, 2024
f968050
[Dynamic Shape] Provide operator<< For BroadcastTree (#60730)
jiahy0825 Jan 11, 2024
d604bcd
[PIR] change IR clone to const and support clone operation successors…
huangjiyi Jan 12, 2024
e328cf7
[CINN] Refine fully_insert_broadcast_pass (#60676)
zyfncg Jan 12, 2024
35daff5
[PIR] einsum's inner_cache and xshape set to optional (#60748)
yuanlehome Jan 12, 2024
d000b47
reduce runtime of unit-tests in windows-trt (#60731)
lizexu123 Jan 12, 2024
c526bbb
[Paddle-TRT] upgrade EnqueueV2 to EnqueueV3 (#59950)
lizexu123 Jan 12, 2024
dab5512
【Hackathon 5th No.110】为 Paddle 增强 sparse.matmul API (#59890)
MayYouBeProsperous Jan 12, 2024
823b94e
Fix rank_relatvie rank_relative (#60770)
co63oc Jan 12, 2024
600fc2f
add graph_key to specific graph's varmap (#60567)
GGBond8488 Jan 12, 2024
fcb2137
【Hackathon 5th No.38】为 Paddle 新增 FractionalMaxPool2d / FractionalMaxP…
megemini Jan 12, 2024
125a671
[Prim][PIR] Recover pir bn (#60689)
cyber-pioneer Jan 12, 2024
277fe29
[PIR]fc_with_special_op_fuse_pass bug fix (#60751)
bukejiyu Jan 12, 2024
bc18062
Merge remote-tracking branch 'upstream/develop' into auto_mixed_preci…
Xinyu302 Jan 12, 2024
0ed2eb2
delete all debug message
Xinyu302 Jan 12, 2024
2bc8c08
add code deleted wrong at last commit
Xinyu302 Jan 12, 2024
dfda6ba
delete createAutoMixedPrecisionPass in analysis_predictor.cc
Xinyu302 Jan 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ paddle/fluid/pir/dialect/operator/ir/pd_api.*
paddle/fluid/pir/dialect/operator/ir/op_decomp.cc
paddle/fluid/pir/dialect/operator/ir/pd_op_vjp.cc
paddle/fluid/pir/dialect/operator/ir/pd_op.*
paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.*
paddle/fluid/pir/dialect/operator/ir/onednn_op.*
paddle/fluid/pir/dialect/operator/ir/pd_onednn_op_info.*
paddle/fluid/pir/dialect/operator/ir/pd_op_bwd.*
paddle/fluid/pir/dialect/operator/ir/pd_op_fused.*
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ English | [简体中文](./README_cn.md) | [日本語](./README_ja.md)
Welcome to the PaddlePaddle GitHub.

PaddlePaddle, as the first independent R&D deep learning platform in China, has been officially open-sourced to professional communities since 2016. It is an industrial platform with advanced technologies and rich features that cover core deep learning frameworks, basic model libraries, end-to-end development kits, tools & components as well as service platforms.
PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 8 million developers, 220,000 companies and generating 800,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 10.7 million developers, 235,000 companies and generating 860,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.

## Installation

### Latest PaddlePaddle Release: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
### Latest PaddlePaddle Release: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)

Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
Expand Down
6 changes: 3 additions & 3 deletions README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@

欢迎来到 PaddlePaddle GitHub

飞桨(PaddlePaddle)以百度多年的深度学习技术研究和业务应用为基础,是中国首个自主研发、功能完备、 开源开放的产业级深度学习平台,集深度学习核心训练和推理框架、基础模型库、端到端开发套件和丰富的工具组件于一体。目前,飞桨累计开发者800万,服务企业22万家,基于飞桨开源深度学习平台产生了80万个模型。飞桨助力开发者快速实现AI想法,快速上线AI业务。帮助越来越多的行业完成AI赋能,实现产业智能化升级。
飞桨(PaddlePaddle)以百度多年的深度学习技术研究和业务应用为基础,是中国首个自主研发、功能完备、 开源开放的产业级深度学习平台,集深度学习核心训练和推理框架、基础模型库、端到端开发套件和丰富的工具组件于一体。目前,飞桨累计开发者1070万,服务企业23.5万家,基于飞桨开源深度学习平台产生了86万个模型。飞桨助力开发者快速实现AI想法,快速上线AI业务。帮助越来越多的行业完成AI赋能,实现产业智能化升级。

## 安装

### PaddlePaddle最新版本: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
### PaddlePaddle 最新版本: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)

跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
跟进 PaddlePaddle 最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)

### 安装最新稳定版本

Expand Down
4 changes: 2 additions & 2 deletions README_ja.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
PaddlePaddle GitHub へようこそ。

PaddlePaddle は中国初の独立系 R&D ディープラーニングプラットフォームとして、2016年からプロのコミュニティに正式にオープンソース化されました。コアとなる深層学習フレームワーク、基本モデルライブラリ、エンドツーエンドの開発キット、ツール&コンポーネント、さらにサービスプラットフォームを網羅する、高度な技術と豊富な機能を備えた産業プラットフォームです。
PaddlePaddle は、工業化に対するコミットメントを持つ工業的実践から生まれたものです。製造業、農業、企業サービスなど幅広い分野で採用され、800万人以上の開発者、22万以上の企業、80万以上のモデルを生み出しています。それにより PaddlePaddle は、ますます多くのパートナーの AI 商用化を支援しています。
PaddlePaddle は、工業化に対するコミットメントを持つ工業的実践から生まれたものです。製造業、農業、企業サービスなど幅広い分野で採用され、1070万人以上の開発者、23.5万以上の企業、86万以上のモデルを生み出しています。それにより PaddlePaddle は、ますます多くのパートナーの AI 商用化を支援しています。

## インストール

### PaddlePaddle の最新リリース: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
### PaddlePaddle の最新リリース: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)

私たちのビジョンは、PaddlePaddle を通じて、誰もが深層学習を行えるようにすることです。
PaddlePaddle の最新機能を追跡するために、私たちの[リリースのお知らせ](https://github.com/PaddlePaddle/Paddle/releases)を参照してください。
Expand Down
4 changes: 2 additions & 2 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ set(XPU_XBLAS_LIB_NAME "libxpu_blas.so")
set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so")

if(NOT DEFINED XPU_BASE_DATE)
set(XPU_BASE_DATE "20231203")
set(XPU_BASE_DATE "20231218")
endif()
if(NOT DEFINED XPU_XHPC_BASE_DATE)
set(XPU_XHPC_BASE_DATE "20231229")
set(XPU_XHPC_BASE_DATE "20240105")
endif()
set(XPU_XCCL_BASE_VERSION "1.1.8.1")
if(NOT DEFINED XPU_XFT_BASE_VERSION)
Expand Down
1 change: 1 addition & 0 deletions cmake/flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ checkcompilercxx14flag()
if(NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
else()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17")
set(CMAKE_CXX_STANDARD 17)
endif()

Expand Down
13 changes: 11 additions & 2 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -328,10 +328,19 @@ copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/visit_type.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/)

copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/hostdevice.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/)
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/distributed/type_defs.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/distributed/
)

copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/distributed/auto_parallel/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/distributed/auto_parallel/
)

copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/init_phi.h
Expand Down
10 changes: 10 additions & 0 deletions paddle/cinn/backends/codegen_cuda_host.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,23 @@ llvm::Value* CodeGenCUDA_Host::LowerHostFunc(const ir::_LoweredFunc_* func) {
[](auto& arg) { return std::addressof(arg); });
// @}

// Set local scope table
CHECK_EQ(ll_function_args.size(), func->args.size());
for (int i = 0; i < ll_function_args.size(); ++i) {
SetVar(func->args[i].name(), ll_function_args[i]);
}
llvm::BasicBlock* entry = llvm::BasicBlock::Create(
/*Context=*/b_->getContext(),
/*Name=*/"entry",
/*Parent=*/f_,
/*InsertBefore=*/nullptr);
b_->SetInsertPoint(entry);
CodeGenLLVM::Visit(&func->body);

// Reset local scope table
for (const ir::Argument& func_arg : func->args) {
symbol_table_->Erase(func_arg.name());
}
RetVoid();

return f_;
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/backends/codegen_cuda_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class CodeGenCUDA_Host : public CodeGenLLVM {
} else if (op->name == runtime::intrinsic::call_cuda_kernel) {
return LowerCUDAKernelCall(op);
} else {
CINN_NOT_IMPLEMENTED;
return CodeGenLLVM::Visit(op);
}
}

Expand Down
24 changes: 23 additions & 1 deletion paddle/cinn/backends/codegen_cuda_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ namespace backends {
#define KERNEL_ARGS "kernel_args"
#define KERNEL_ARGS_NUM "kernel_args_num"
#define KERNEL_STREAM "kernel_stream"
#define TENSOR_SHAPE_ARGS "tensor_shape_args"

/**
* Split a CINN Module into two separate modules, one cantains the host
Expand Down Expand Up @@ -150,7 +151,8 @@ struct CollectBucketStrategyHostFunctionVisitor
: CollectHostFunctionVisitor(module_name),
kernel_args_(KERNEL_ARGS, type_of<void*>()),
kernel_args_num_(KERNEL_ARGS_NUM, type_of<int>()),
kernel_stream_(KERNEL_STREAM, type_of<void*>()) {}
kernel_stream_(KERNEL_STREAM, type_of<void*>()),
tensor_shape_args_(TENSOR_SHAPE_ARGS, type_of<int32_t**>()) {}

std::tuple<ir::Module, ir::Module> operator()(Expr* expr) {
ir::IRMutator<>::Visit(expr, expr);
Expand Down Expand Up @@ -181,6 +183,25 @@ struct CollectBucketStrategyHostFunctionVisitor
{});
host_module_builder.AddFunctionWithoutOptim(
host_func.as_lowered_func_ref());

// Parse LoweredFunc to infer output tensor's shape
std::vector<ir::Expr> infer_shape_func_body_stmts(arg_defs_);
infer_shape_func_body_stmts.insert(
infer_shape_func_body_stmts.end(),
op->infer_shape_func.as_lowered_func()->body);

std::vector<ir::Argument> infer_shape_arguments = {
ir::Argument(kernel_args_, ir::Argument::IO::kOutput),
ir::Argument(kernel_args_num_, ir::Argument::IO::kInput),
ir::Argument(tensor_shape_args_, ir::Argument::IO::kOutput)};

ir::Expr host_infer_shape_func =
ir::_LoweredFunc_::Make(op->infer_shape_func.as_lowered_func()->name,
infer_shape_arguments,
ir::Block::Make(infer_shape_func_body_stmts),
{});
host_module_builder.AddFunctionWithoutOptim(
host_infer_shape_func.as_lowered_func_ref());
}

void ProcessLoweredFunc(ir::Expr func, ir::Expr predicate);
Expand All @@ -199,6 +220,7 @@ struct CollectBucketStrategyHostFunctionVisitor
ir::Var kernel_args_;
ir::Var kernel_args_num_;
ir::Var kernel_stream_;
ir::Var tensor_shape_args_;
};

} // namespace detail
Expand Down
3 changes: 2 additions & 1 deletion paddle/cinn/backends/llvm/codegen_llvm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -818,7 +818,8 @@ llvm::Value *CodeGenLLVM::Visit(const ir::_Var_ *op) {
// TODO(fc500110) hard coding
if (LLVM_WillVarLowerAsPointer(op->name)) {
result = value;
} else if (value->getType()->isPointerTy()) {
} else if (value->getType()->isPointerTy() &&
!value->getType()->getPointerElementType()->isPointerTy()) {
result = Load(value, op->name + "_load");
} else {
result = value;
Expand Down
6 changes: 5 additions & 1 deletion paddle/cinn/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ gather_srcs(
nvgpu_dev_info.cc
integer_set.cc
dim_expr_simplify.cc
dim_expr_converter.cc)
dim_expr_converter.cc
broadcast_tree.cc
dim_expr_util.cc)

cinn_cc_test(test_equation_graph_topo_walker SRCS
equation_graph_topo_walker_test.cc DEPS gtest glog)
Expand All @@ -48,8 +50,10 @@ if(WITH_CUDA)
gtest glog)
endif()
if(NOT CINN_ONLY)
cinn_cc_test(dim_expr_util_test SRCS dim_expr_util_test.cc DEPS cinncore)
cinn_cc_test(dim_expr_simplify_test SRCS dim_expr_simplify_test.cc DEPS
cinncore)
cinn_cc_test(dim_expr_converter_test SRCS dim_expr_converter_test.cc DEPS
cinncore)
cinn_cc_test(broadcast_tree_test SRCS broadcast_tree_test.cc DEPS cinncore)
endif()
Loading