@@ -30,42 +30,42 @@ unset CUDA_VISIBLE_DEVICES
3030
3131function gpt_case_list_auto() {
3232 gpt_auto_recompute_bs16_fp32_DP1-MP1-PP1
33- # gpt_auto_recompute_bs16_fp16_o2_DP1-MP1-PP8
34- # gpt_auto_recompute_bs16_fp16_o2_DP1-MP2-PP4
35- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2
36- # gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage1
37- # gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage2
38- # gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage3
39- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage1
40- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage2
41- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage3
42- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage1
43- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage2
44- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage3
45- # gpt_auto_sp_acc_check
33+ gpt_auto_recompute_bs16_fp16_o2_DP1-MP1-PP8
34+ gpt_auto_recompute_bs16_fp16_o2_DP1-MP2-PP4
35+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2
36+ gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage1
37+ gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage2
38+ gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage3
39+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage1
40+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage2
41+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage3
42+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage1
43+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage2
44+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage3
45+ gpt_auto_sp_acc_check
4646}
4747
4848function llama_case_list_auto() {
4949 llama_dygraph_auto_bs8_fp32_DP2
50- # llama_dygraph_auto_bs8_fp32_DP2-MP2
51- # llama_dygraph_auto_bs8_fp32_DP2-MP2-PP2
52- # llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2
53-
54- # llama_static_auto_recompute_bs8_fp32_DP1-MP1-PP1
55- # llama_static_auto_recompute_bs16_fp32_DP2-MP1-PP1
56- # llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP1
57- # llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP2
58- # llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP2-VPP2-Sharding2_stage2
59- # llama_static_auto_recompute_bs16_fp16_DP2-MP2-PP2-VPP2-Sharding2_stage2
50+ llama_dygraph_auto_bs8_fp32_DP2-MP2
51+ llama_dygraph_auto_bs8_fp32_DP2-MP2-PP2
52+ llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2
53+
54+ llama_static_auto_recompute_bs8_fp32_DP1-MP1-PP1
55+ llama_static_auto_recompute_bs16_fp32_DP2-MP1-PP1
56+ llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP1
57+ llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP2
58+ llama_static_auto_recompute_bs16_fp32_DP2-MP2-PP2-VPP2-Sharding2_stage2
59+ llama_static_auto_recompute_bs16_fp16_DP2-MP2-PP2-VPP2-Sharding2_stage2
6060}
6161
6262function gpt_case_list_auto_pir() {
6363 gpt_auto_recompute_bs16_fp16_o2_DP1-MP1-PP8_pir
64- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_pir
65- # gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage1_pir
66- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage1_pir
67- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage2_pir
68- # gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage3_pir
64+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_pir
65+ gpt_auto_recompute_bs16_fp16_o2_DP4-MP2-Sharding4_stage1_pir
66+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP1-PP4_Sharding2_stage1_pir
67+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage2_pir
68+ gpt_auto_recompute_bs16_fp16_o2_DP2-MP2-PP2_Sharding2_stage3_pir
6969}
7070
7171function llm_gpt_case_list_auto() {
@@ -1578,11 +1578,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2() {
15781578 ips=-1
15791579 mem=-1
15801580 echo " result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5 "
1581- loss_base=10.57663822
1582- loss_md5_base=86a59936c56ae83ce556dec0833ca35e
1581+ loss_base=10.57664108
1582+ loss_md5_base=0ebf68698887b33b33a46518621cf412
15831583 ips_base=-1
15841584 mem_base=-1
1585- check_result $FUNCNAME ${loss_base} ${loss} ${loss_md5_base } ${loss_md5 } ${mem_base} ${mem}
1585+ check_result $FUNCNAME ${loss_base} ${loss} ${ips_base } ${ips } ${mem_base} ${mem}
15861586 echo " =========== $FUNCNAME run end ==========="
15871587}
15881588
@@ -1648,11 +1648,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2() {
16481648 ips=-1
16491649 mem=-1
16501650 echo " result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5 "
1651- loss_base=10.57694435
1652- loss_md5_base=42649563a5ae2af87b9322d33f75deb1
1651+ loss_base=10.57694054
1652+ loss_md5_base=6df87d01bd08113a92930f6349514b35
16531653 ips_base=-1
16541654 mem_base=-1
1655- check_result $FUNCNAME ${loss_base} ${loss} ${loss_md5_base } ${loss_md5 } ${mem_base} ${mem}
1655+ check_result $FUNCNAME ${loss_base} ${loss} ${ips_base } ${ips } ${mem_base} ${mem}
16561656 echo " =========== $FUNCNAME run end ==========="
16571657}
16581658
@@ -1718,11 +1718,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2-PP2() {
17181718 ips=-1
17191719 mem=-1
17201720 echo " result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5 "
1721- loss_base=10.57580185
1722- loss_md5_base=9751dab0842de5905a8c0b87d1f06d67
1721+ loss_base=10.5758028
1722+ loss_md5_base=6cb4e151b35f026190df90ab240d9a95
17231723 ips_base=-1
17241724 mem_base=-1
1725- check_result $FUNCNAME ${loss_base} ${loss} ${loss_md5_base } ${loss_md5 } ${mem_base} ${mem}
1725+ check_result $FUNCNAME ${loss_base} ${loss} ${ips_base } ${ips } ${mem_base} ${mem}
17261726 echo " =========== $FUNCNAME run end ==========="
17271727}
17281728
@@ -1792,7 +1792,7 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2() {
17921792 loss_md5_base=e82a1f5668870d18a2d45b3ee0a25386
17931793 ips_base=-1
17941794 mem_base=-1
1795- check_result $FUNCNAME ${loss_base} ${loss} ${loss_md5_base } ${loss_md5 } ${mem_base} ${mem}
1795+ check_result $FUNCNAME ${loss_base} ${loss} ${ips_base } ${ips } ${mem_base} ${mem}
17961796 echo " =========== $FUNCNAME run end ==========="
17971797}
17981798
0 commit comments