Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 100 additions & 28 deletions tools/auto_parallel/ci_auto_parallel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ mkdir -p /workspace/case_logs
export log_path=/workspace/case_logs
export case_list=()

global_total_count=0
global_success_count=0
global_exit_250_arr=()
global_runtime_fail_arr=()
global_verification_fail_arr=()

install_paddle(){
echo -e "\033[31m ---- Install paddlepaddle-gpu \033"
if [ -n "$paddle" ];then
Expand Down Expand Up @@ -70,17 +76,68 @@ if [[ $1 -ne 0 ]] && [[ $1 -ne 250 ]];then
EXCODE=2
if [ ! -f ${log_path}/$2 ];then
echo -e "\033[31m run $2 CI FAIL \033"
else
mv ${log_path}/$2 ${log_path}/$2_FAIL.log
echo -e "\033[31m ${log_path}/$2_FAIL \033"
tail -70 ${log_path}/$2_FAIL.log
else
mv ${log_path}/$2 ${log_path}/$2_FAIL.log
echo -e "\033[31m ${log_path}/$2_FAIL \033"
tail -70 ${log_path}/$2_FAIL.log
fi
exit $EXCODE
else
echo -e "\033[32m run $3 CI SUCCESS \033"
echo -e "\033[32m The $3 CI has completed \033"
fi
}

function execute_func_list(){
cd ${log_path} || { echo "Failed to enter log_path: $log_path"; return 1; }
total_count=0
success_count=0
runtime_fail_count=0
verification_fail_count=0
exit_250_count=0
while IFS= read -r func_name; do
let total_count++
let global_total_count++
execute_num=1
while true; do
bash $1 exec_case $func_name $FLAGS_install_deps $FLAGS_download_data
result=$?
if [ $result -eq 0 ]; then
echo -e "\033[32m test success!"
let success_count++
let global_success_count++
elif [ $result -eq 2 ]; then
echo -e "\033[31m verification failed!"
let verification_fail_count++
global_verification_fail_arr+=("$func_name")
elif [ $result -eq 250 ]; then
if [ $execute_num -eq 1 ]; then
echo -e "\033[31m fist time execute failed, try again!"
let execute_num++
continue
else
echo -e "\033[31m second time execute failed, exit!"
let exit_250_count++
global_exit_250_arr+=("$func_name")
fi
else
echo "test failed!"
mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
echo -e "\033[31m ${log_path}/$func_name_FAIL \033"
tail -15 ${log_path}/${func_name}_FAIL.log
let runtime_fail_count++
global_runtime_fail_arr+=("$func_name")
fi
break
done
done < functions.txt
echo -e "\033[31m $2 test case has complicated \033"
echo -e "\033[31m $(printf '\t') total tests : $total_count \033"
echo -e "\033[31m $(printf '\t') success tests : $success_count \033"
echo -e "\033[31m $(printf '\t') runtime fail tests : $runtime_fail_count \033"
echo -e "\033[31m $(printf '\t') verification fail tests : $verification_fail_count \033"
echo -e "\033[31m $(printf '\t') exit 250 tests(intermittent issue) : $exit_250_count \033"
}

# Get the list of pending cases
get_diff_TO_case
# Remove duplicates and store the results back to the original list
Expand All @@ -101,24 +158,10 @@ if [[ ${#case_list[*]} -ne 0 ]];then
export FLAGS_install_deps=0
for case in ${case_list[*]};do
echo -e "\033[31m ---- running case $case_num/${#case_list[*]}: ${case} \033"
if [[ ${case} == "llama_auto" ]];then
bash /workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh llama_case_list_auto $FLAGS_install_deps $FLAGS_download_data
print_info $? `ls -lt ${log_path} | grep "llama" | head -n 1 | awk '{print $9}'` ${case}
export FLAGS_install_deps=1
export FLAGS_download_data="llama ""$FLAGS_download_data"
let case_num++
elif [[ ${case} == "auto_unit_test" ]];then
if [[ ${case} == "auto_unit_test" ]];then
bash /workspace/Paddle/tools/auto_parallel/ci_case_unit.sh auto_unit_test
print_info $? `ls -lt ${log_path} | grep "test" | head -n 1 | awk '{print $9}'` ${case}
let case_num++
elif [[ ${case} == "gpt-3_auto" ]];then
bash /workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh llm_gpt_case_list_auto $FLAGS_install_deps $FLAGS_download_data
print_info $? `ls -lt ${log_path} | grep "llm_gpt_dygraph_auto_" | head -n 1 | awk '{print $9}'` ${case}
let case_num++
elif [[ ${case} == "gpt-3_dygraph" ]];then
bash /workspace/PaddleNLP/scripts/distribute/ci_case_dy.sh llm_gpt_case_list_dygraph $FLAGS_install_deps $FLAGS_download_data
print_info $? `ls -lt ${log_path} | grep "llm_gpt" | head -n 1 | awk '{print $9}'` ${case}
let case_num++
elif [[ ${case} == "dygraph_unit_test" ]];then
bash /workspace/Paddle/tools/auto_parallel/ci_case_unit.sh dygraph_unit_test
print_info $? `ls -lt ${log_path} | grep "test" | head -n 1 | awk '{print $9}'` ${case}
Expand All @@ -127,22 +170,51 @@ if [[ ${#case_list[*]} -ne 0 ]];then
bash /workspace/Paddle/tools/auto_parallel/ci_case_unit.sh llama_auto_unit_test
print_info $? `ls -lt ${log_path} | grep "test" | head -n 1 | awk '{print $9}'` ${case}
let case_num++
elif [[ ${case} == "llama_auto" ]];then
cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh
bash $cmd prepare_case llama_case_list_auto $FLAGS_install_deps $FLAGS_download_data
execute_func_list $cmd llama_auto
export FLAGS_install_deps=1
export FLAGS_download_data="llama ""$FLAGS_download_data"
let case_num++
elif [[ ${case} == "gpt-3_auto" ]];then
cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh
bash $cmd prepare_case llm_gpt_case_list_auto $FLAGS_install_deps $FLAGS_download_data
execute_func_list $cmd gpt-3_auto
let case_num++
elif [[ ${case} == "gpt-3_dygraph" ]];then
cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_dy.sh
bash $cmd prepare_case llm_gpt_case_list_dygraph $FLAGS_install_deps $FLAGS_download_data
execute_func_list $cmd gpt-3_dygraph
let case_num++
else
echo -e "\033[31m ---- no ${case} \033"
let case_num++
fi
done
echo -e "\033[31m ---- end run case \033"
cd ${log_path}
if [ ! -f *FAIL* ];then
FF=0

echo -e "\033[31m ---- total tests : $global_total_count \033"
if [ ${#global_exit_250_arr[@]} -ne 0 ]; then
echo -e "\033[32m ---- exit 250 test : ${#global_exit_250_arr[@]} \033"
for case in "${global_exit_250_arr[@]}"; do
echo -e "\t$case(exit 250)"
done
fi

if [ ${#global_runtime_fail_arr[@]} -eq 0 ] && [ ${#global_verification_fail_arr[@]} -eq 0 ]; then
echo -e "\033[32m ---- all cases Success \033"
EXCODE=0
echo -e "\033[32m ---- all case Success \033"
else
FF=`ls *FAIL*|wc -l`
EXCODE=2
echo -e "\033[31m ---- case Failed number: ${FF} \033"
ls *_FAIL*
echo -e "\033[32m ---- runtime failed test : ${#global_runtime_fail_arr[@]} \033"
for case in "${global_runtime_fail_arr[@]}"; do
echo -e "\t$case(failed)"
done
echo -e "\033[32m ---- verification failed test : ${#global_verification_fail_arr[@]} \033"
for case in "${global_verification_fail_arr[@]}"; do
echo -e "\t$case(failed)"
done
EXCODE=1
fi
else
echo -e "\033[32m Changed Not CI case, Skips \033"
Expand Down