@@ -50,7 +50,7 @@ function llama_case_list_auto() {
5050 llama_dygraph_auto_bs8_fp32_DP2-MP2-PP2
5151 llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2
5252
53- llama_dygraph2static_auto_bs8_fp16_DP2 -MP2-PP2
53+ llama_dy2st_auto_bs8_fp16_DP2 -MP2-PP2
5454
5555 llama_static_auto_recompute_bs8_fp32_DP1-MP1-PP1
5656 llama_static_auto_recompute_bs16_fp32_DP2-MP1-PP1
@@ -1512,13 +1512,13 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2() {
15121512 echo " =========== $FUNCNAME run end ==========="
15131513}
15141514
1515- function llama_dygraph2static_auto_bs8_fp16_DP2 -MP2-PP2() {
1515+ function llama_dy2st_auto_bs8_fp16_DP2 -MP2-PP2() {
15161516 echo " =========== $FUNCNAME run begin ==========="
15171517 export PYTHONPATH=$root_path /:$PYTHONPATH
15181518 export FLAGS_call_stack_level=3
15191519 export NVIDIA_TF32_OVERRIDE=0
15201520
1521- task_name=" llama_auto_bs8_fp16_dp2mp2pp2 "
1521+ task_name=" llama_dy2st_auto_bs8_fp16_dp2mp2pp2 "
15221522 case_out_dir=" output/$task_name "
15231523 case_log_dir=" output/$task_name " " _log"
15241524 rm -rf $case_out_dir
@@ -1548,6 +1548,11 @@ function llama_dygraph2static_auto_bs8_fp16_DP2-MP2-PP2() {
15481548 --pipeline_parallel_degree 2 \
15491549 --tensor_parallel_degree 2 \
15501550 --sharding_parallel_degree 1 \
1551+ --use_flash_attention 0 \
1552+ --fuse_attention_qkv 1 \
1553+ --fuse_attention_ffn 0 \
1554+ --use_fused_rope 0 \
1555+ --use_fused_rms_norm 1 \
15511556 --learning_rate 0.0001 \
15521557 --min_learning_rate 0.00001 \
15531558 --max_steps 10 \
0 commit comments