Skip to content

Commit d2ef691

Browse files
committed
fix
1 parent 108c8c3 commit d2ef691

File tree

3 files changed

+38
-55
lines changed

3 files changed

+38
-55
lines changed

paddlenlp/trainer/trainer.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
3535

3636
import numpy as np
37-
import hashlib
3837
import paddle
3938
import paddle.amp.auto_cast as autocast
4039
import paddle.distributed as dist
@@ -1273,8 +1272,6 @@ def _maybe_log_save_evaluate(self, tr_loss, model, epoch, ignore_keys_for_eval,
12731272
seq_length=seq_length,
12741273
)
12751274
)
1276-
logs["loss_md5"] = hashlib.md5(
1277-
np.array(tr_loss_scalar).tobytes()).hexdigest()
12781275

12791276
self._total_loss_scalar += tr_loss_scalar
12801277
self._globalstep_last_logged = self.state.global_step

paddlenlp/transformers/gpt/modeling_auto.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -396,8 +396,6 @@ def forward(self,
396396
out = self.out_proj(out)
397397
# if sequence_parallel is true, out shape are [bs * seq_len / n, dim]
398398
# else their shape are [bs, seq_len, dim], n is mp parallelism.
399-
out = dist.reshard(out, get_mesh(self.ipp),
400-
[dist.Shard(0), dist.Replicate()])
401399
outs = [out]
402400
if output_attentions:
403401
outs.append(weights)
@@ -672,11 +670,7 @@ def forward(self,
672670
if not self.config.use_fused_dropout_add:
673671
act = self.activation(self.linear1(hidden_states),
674672
approximate=True)
675-
act = dist.reshard(act, get_mesh(
676-
self.ipp), [dist.Shard(0), dist.Shard(2)])
677673
l_2 = self.linear2(act)
678-
l_2 = dist.reshard(l_2, get_mesh(
679-
self.ipp), [dist.Shard(0), dist.Replicate()])
680674
hidden_states = residual + self.dropout2(l_2)
681675
else:
682676
hidden_states = self.fused_dropout_add2(
@@ -769,10 +763,6 @@ def forward(self, input_ids, position_ids=None, inputs_embeddings=None):
769763
# The 'with' block ensures the correct seed context is used
770764
with seed_guard_context(current_seed):
771765
embeddings = self.dropout(embeddings)
772-
embeddings = dist.reshard(
773-
embeddings, get_mesh(),
774-
[dist.Shard(0), dist.Replicate()])
775-
776766
return embeddings
777767

778768

@@ -1338,8 +1328,6 @@ def forward(self, hidden_states, tensor_parallel_output=None):
13381328
y = dist.reshard(self.weight, get_mesh(self.ipp),
13391329
[dist.Replicate(), dist.Shard(0)])
13401330
logits = paddle.matmul(hidden_states, y, transpose_y=self.transpose_y)
1341-
logits = dist.reshard(logits, get_mesh(self.ipp),
1342-
[dist.Shard(0), dist.Replicate()])
13431331
return logits
13441332

13451333

@@ -1441,8 +1429,6 @@ def forward(
14411429
else:
14421430
hidden_states = outputs[0]
14431431
logits = self.lm_head(hidden_states)
1444-
logits = dist.reshard(logits, get_mesh(self.ipp),
1445-
[dist.Shard(0), dist.Replicate()])
14461432
return logits
14471433

14481434
# NOTE: The following code failed to run from dynamic to static mode

scripts/distribute/ci_case_auto.sh

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -30,42 +30,42 @@ unset CUDA_VISIBLE_DEVICES
3030

3131
function gpt_case_list_auto() {
3232
gpt_auto_recompute_bs16_fp32_DP1-MP1-PP1
33-
# gpt_auto_recompute_bs16_fp16_o2_DP1-MP1-PP8
34-
# gpt_auto_recompute_bs16_fp16_o2_DP1-MP2-PP4
35-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2
36-
# gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage1
37-
# gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage2
38-
# gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage3
39-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage1
40-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage2
41-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage3
42-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage1
43-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage2
44-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage3
45-
# gpt_auto_sp_acc_check
33+
gpt_auto_recompute_bs16_fp16_o2_DP1-MP1-PP8
34+
gpt_auto_recompute_bs16_fp16_o2_DP1-MP2-PP4
35+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2
36+
gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage1
37+
gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage2
38+
gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage3
39+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage1
40+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage2
41+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage3
42+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage1
43+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage2
44+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage3
45+
gpt_auto_sp_acc_check
4646
}
4747

4848
function llama_case_list_auto() {
4949
llama_dygraph_auto_bs8_fp32_DP2
50-
# llama_dygraph_auto_bs8_fp32_DP2-MP2
51-
# llama_dygraph_auto_bs8_fp32_DP2-MP2-PP2
52-
# llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2
53-
54-
# llama_static_auto_recompute_bs8_fp32_DP1-MP1-PP1
55-
# llama_static_auto_recompute_bs16_fp32_DP2-MP1-PP1
56-
# llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP1
57-
# llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP2
58-
# llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP2-VPP2-Sharding2_stage2
59-
# llama_static_auto_recompute_bs16_fp16_DP2-MP2-PP2-VPP2-Sharding2_stage2
50+
llama_dygraph_auto_bs8_fp32_DP2-MP2
51+
llama_dygraph_auto_bs8_fp32_DP2-MP2-PP2
52+
llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2
53+
54+
llama_static_auto_recompute_bs8_fp32_DP1-MP1-PP1
55+
llama_static_auto_recompute_bs16_fp32_DP2-MP1-PP1
56+
llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP1
57+
llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP2
58+
llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP2-VPP2-Sharding2_stage2
59+
llama_static_auto_recompute_bs16_fp16_DP2-MP2-PP2-VPP2-Sharding2_stage2
6060
}
6161

6262
function gpt_case_list_auto_pir() {
6363
gpt_auto_recompute_bs16_fp16_o2_DP1-MP1-PP8_pir
64-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_pir
65-
# gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage1_pir
66-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage1_pir
67-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage2_pir
68-
# gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage3_pir
64+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_pir
65+
gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage1_pir
66+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage1_pir
67+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage2_pir
68+
gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage3_pir
6969
}
7070

7171
function llm_gpt_case_list_auto() {
@@ -1578,11 +1578,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2() {
15781578
ips=-1
15791579
mem=-1
15801580
echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5"
1581-
loss_base=10.57663822
1582-
loss_md5_base=86a59936c56ae83ce556dec0833ca35e
1581+
loss_base=10.57664108
1582+
loss_md5_base=0ebf68698887b33b33a46518621cf412
15831583
ips_base=-1
15841584
mem_base=-1
1585-
check_result $FUNCNAME ${loss_base} ${loss} ${loss_md5_base} ${loss_md5} ${mem_base} ${mem}
1585+
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
15861586
echo "=========== $FUNCNAME run end ==========="
15871587
}
15881588

@@ -1648,11 +1648,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2() {
16481648
ips=-1
16491649
mem=-1
16501650
echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5"
1651-
loss_base=10.57694435
1652-
loss_md5_base=42649563a5ae2af87b9322d33f75deb1
1651+
loss_base=10.57694054
1652+
loss_md5_base=6df87d01bd08113a92930f6349514b35
16531653
ips_base=-1
16541654
mem_base=-1
1655-
check_result $FUNCNAME ${loss_base} ${loss} ${loss_md5_base} ${loss_md5} ${mem_base} ${mem}
1655+
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
16561656
echo "=========== $FUNCNAME run end ==========="
16571657
}
16581658

@@ -1718,11 +1718,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2-PP2() {
17181718
ips=-1
17191719
mem=-1
17201720
echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5"
1721-
loss_base=10.57580185
1722-
loss_md5_base=9751dab0842de5905a8c0b87d1f06d67
1721+
loss_base=10.5758028
1722+
loss_md5_base=6cb4e151b35f026190df90ab240d9a95
17231723
ips_base=-1
17241724
mem_base=-1
1725-
check_result $FUNCNAME ${loss_base} ${loss} ${loss_md5_base} ${loss_md5} ${mem_base} ${mem}
1725+
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
17261726
echo "=========== $FUNCNAME run end ==========="
17271727
}
17281728

@@ -1792,7 +1792,7 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2() {
17921792
loss_md5_base=e82a1f5668870d18a2d45b3ee0a25386
17931793
ips_base=-1
17941794
mem_base=-1
1795-
check_result $FUNCNAME ${loss_base} ${loss} ${loss_md5_base} ${loss_md5} ${mem_base} ${mem}
1795+
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
17961796
echo "=========== $FUNCNAME run end ==========="
17971797
}
17981798

0 commit comments

Comments
 (0)