Skip to content

Commit c2ca2e5

Browse files
committed
sp default disenable
1 parent 79e1a3e commit c2ca2e5

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

model_zoo/gpt-3/ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_base.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Model:
3535
scale_qk_by_layer_num: True
3636
fused_softmax_with_triangular: True
3737
use_flash_attn: False
38-
38+
sequence_parallel: False # TODO make sequence_parallel as an independent parallel and be set id Distributed
3939

4040
Data:
4141
Train:

model_zoo/gpt-3/ppfleetx/utils/auto_config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
import os
1717
import sys
1818

19+
from sympy import sequence
20+
1921
import paddle
2022
import paddle.distributed as dist
2123
import paddle.distributed.auto_parallel as auto
@@ -42,6 +44,15 @@ def process_dist_configs(config):
4244
mp_degree = configs.setdefault("mp_degree", 1)
4345
pp_degree = configs.setdefault("pp_degree", 1)
4446

47+
# disenable sequence parallel is mp_degree < 2.
48+
sequence_parallel = config["Model"]["sequence_parallel"]
49+
if mp_degree < 2 and sequence_parallel:
50+
config["Model"]["sequence_parallel"] = False
51+
logger.warning(
52+
"sequence_parallel is turn off since mp_degree < 2."
53+
)
54+
55+
4556
# sharding default
4657
sharding_config = configs["sharding"]
4758
sharding_degree = sharding_config.setdefault("sharding_degree", 1)

0 commit comments

Comments
 (0)