Skip to content

Commit 2b8c08e

Browse files
authored
Merge pull request #1673 from Jackwaterveg/CER
[asr] Add new cer tools
2 parents f39de8d + 8d1ee82 commit 2b8c08e

File tree

6 files changed

+667
-566
lines changed

6 files changed

+667
-566
lines changed

examples/aishell/asr0/local/test.sh

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ if [ $# != 4 ];then
55
exit -1
66
fi
77

8+
stage=0
9+
stop_stage=100
810
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
911
echo "using $ngpu gpus..."
1012

@@ -19,18 +21,45 @@ if [ $? -ne 0 ]; then
1921
exit 1
2022
fi
2123

22-
python3 -u ${BIN_DIR}/test.py \
23-
--ngpu ${ngpu} \
24-
--config ${config_path} \
25-
--decode_cfg ${decode_config_path} \
26-
--result_file ${ckpt_prefix}.rsl \
27-
--checkpoint_path ${ckpt_prefix} \
28-
--model_type ${model_type}
24+
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
25+
# format the reference test file
26+
python utils/format_rsl.py \
27+
--origin_ref data/manifest.test.raw \
28+
--trans_ref data/manifest.test.text
2929

30-
if [ $? -ne 0 ]; then
31-
echo "Failed in evaluation!"
32-
exit 1
30+
python3 -u ${BIN_DIR}/test.py \
31+
--ngpu ${ngpu} \
32+
--config ${config_path} \
33+
--decode_cfg ${decode_config_path} \
34+
--result_file ${ckpt_prefix}.rsl \
35+
--checkpoint_path ${ckpt_prefix} \
36+
--model_type ${model_type}
37+
38+
if [ $? -ne 0 ]; then
39+
echo "Failed in evaluation!"
40+
exit 1
41+
fi
42+
43+
# format the hyp file
44+
python utils/format_rsl.py \
45+
--origin_hyp ${ckpt_prefix}.rsl \
46+
--trans_hyp ${ckpt_prefix}.rsl.text
47+
48+
python utils/compute-wer.py --char=1 --v=1 \
49+
data/manifest.test.text ${ckpt_prefix}.rsl.text > ${ckpt_prefix}.error
3350
fi
3451

52+
if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then
53+
python utils/format_rsl.py \
54+
--origin_ref data/manifest.test.raw \
55+
--trans_ref_sclite data/manifest.test.text.sclite
56+
57+
python utils/format_rsl.py \
58+
--origin_hyp ${ckpt_prefix}.rsl \
59+
--trans_hyp_sclite ${ckpt_prefix}.rsl.text.sclite
60+
61+
mkdir -p ${ckpt_prefix}_sclite
62+
sclite -i wsj -r data/manifest.test.text.sclite -h ${ckpt_prefix}.rsl.text.sclite -e utf-8 -o all -O ${ckpt_prefix}_sclite -c NOASCII
63+
fi
3564

3665
exit 0

examples/aishell/asr1/local/test.sh

Lines changed: 81 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ if [ $# != 3 ];then
55
exit -1
66
fi
77

8+
stage=0
9+
stop_stage=100
810
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
911
echo "using $ngpu gpus..."
1012

@@ -24,49 +26,86 @@ fi
2426
#fi
2527

2628

27-
for type in attention ctc_greedy_search; do
28-
echo "decoding ${type}"
29-
if [ ${chunk_mode} == true ];then
30-
# stream decoding only support batchsize=1
29+
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
30+
# format the reference test file
31+
python utils/format_rsl.py \
32+
--origin_ref data/manifest.test.raw \
33+
--trans_ref data/manifest.test.text
34+
35+
for type in attention ctc_greedy_search; do
36+
echo "decoding ${type}"
37+
if [ ${chunk_mode} == true ];then
38+
# stream decoding only support batchsize=1
39+
batch_size=1
40+
else
41+
batch_size=64
42+
fi
43+
output_dir=${ckpt_prefix}
44+
mkdir -p ${output_dir}
45+
python3 -u ${BIN_DIR}/test.py \
46+
--ngpu ${ngpu} \
47+
--config ${config_path} \
48+
--decode_cfg ${decode_config_path} \
49+
--result_file ${output_dir}/${type}.rsl \
50+
--checkpoint_path ${ckpt_prefix} \
51+
--opts decode.decoding_method ${type} \
52+
--opts decode.decode_batch_size ${batch_size}
53+
54+
if [ $? -ne 0 ]; then
55+
echo "Failed in evaluation!"
56+
exit 1
57+
58+
fi
59+
# format the hyp file
60+
python utils/format_rsl.py \
61+
--origin_hyp ${output_dir}/${type}.rsl \
62+
--trans_hyp ${output_dir}/${type}.rsl.text
63+
python utils/compute-wer.py --char=1 --v=1 \
64+
data/manifest.test.text ${output_dir}/${type}.rsl.text > ${output_dir}/${type}.error
65+
66+
done
67+
68+
for type in ctc_prefix_beam_search attention_rescoring; do
69+
echo "decoding ${type}"
3170
batch_size=1
32-
else
33-
batch_size=64
34-
fi
35-
output_dir=${ckpt_prefix}
36-
mkdir -p ${output_dir}
37-
python3 -u ${BIN_DIR}/test.py \
38-
--ngpu ${ngpu} \
39-
--config ${config_path} \
40-
--decode_cfg ${decode_config_path} \
41-
--result_file ${output_dir}/${type}.rsl \
42-
--checkpoint_path ${ckpt_prefix} \
43-
--opts decode.decoding_method ${type} \
44-
--opts decode.decode_batch_size ${batch_size}
45-
46-
if [ $? -ne 0 ]; then
47-
echo "Failed in evaluation!"
48-
exit 1
49-
fi
50-
done
51-
52-
for type in ctc_prefix_beam_search attention_rescoring; do
53-
echo "decoding ${type}"
54-
batch_size=1
71+
output_dir=${ckpt_prefix}
72+
mkdir -p ${output_dir}
73+
python3 -u ${BIN_DIR}/test.py \
74+
--ngpu ${ngpu} \
75+
--config ${config_path} \
76+
--decode_cfg ${decode_config_path} \
77+
--result_file ${output_dir}/${type}.rsl \
78+
--checkpoint_path ${ckpt_prefix} \
79+
--opts decode.decoding_method ${type} \
80+
--opts decode.decode_batch_size ${batch_size}
81+
82+
if [ $? -ne 0 ]; then
83+
echo "Failed in evaluation!"
84+
exit 1
85+
fi
86+
python utils/format_rsl.py \
87+
--origin_hyp ${output_dir}/${type}.rsl
88+
--trans_hyp ${output_dir}/${type}.rsl.text
89+
python utils/compute-wer.py --char=1 --v=1 \
90+
data/manifest.test.text ${output_dir}/${type}.rsl.text > ${output_dir}/${type}.error
91+
done
92+
fi
93+
94+
if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then
95+
# format the reference test file for sclite
96+
python utils/format_rsl.py \
97+
--origin_ref data/manifest.test.raw \
98+
--trans_ref_sclite data/manifest.test.text.sclite
99+
55100
output_dir=${ckpt_prefix}
56-
mkdir -p ${output_dir}
57-
python3 -u ${BIN_DIR}/test.py \
58-
--ngpu ${ngpu} \
59-
--config ${config_path} \
60-
--decode_cfg ${decode_config_path} \
61-
--result_file ${output_dir}/${type}.rsl \
62-
--checkpoint_path ${ckpt_prefix} \
63-
--opts decode.decoding_method ${type} \
64-
--opts decode.decode_batch_size ${batch_size}
65-
66-
if [ $? -ne 0 ]; then
67-
echo "Failed in evaluation!"
68-
exit 1
69-
fi
70-
done
101+
for type in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
102+
python utils/format_rsl.py \
103+
--origin_hyp ${output_dir}/${type}.rsl
104+
--trans_hyp_sclite ${output_dir}/${type}.rsl.text.sclite
105+
106+
mkdir -p ${output_dir}/${type}_sclite
107+
sclite -i wsj -r data/manifest.test.text.sclite -h ${output_dir}/${type}.rsl.text.sclite -e utf-8 -o all -O ${output_dir}/${type}_sclite -c NOASCII
108+
done
109+
fi
71110

72111
exit 0

examples/aishell/asr1/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ stage=0
77
stop_stage=50
88
conf_path=conf/conformer.yaml
99
decode_conf_path=conf/tuning/decode.yaml
10-
avg_num=20
10+
avg_num=30
1111
audio_file=data/demo_01_03.wav
1212

1313
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

paddlespeech/s2t/exps/deepspeech2/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def compute_metrics(self,
278278
len_refs += len_ref
279279
num_ins += 1
280280
if fout:
281-
fout.write({"utt": utt, "ref": target, "hyp": result})
281+
fout.write({"utt": utt, "refs": [target], "hyps": [result]})
282282
logger.info(f"Utt: {utt}")
283283
logger.info(f"Ref: {target}")
284284
logger.info(f"Hyp: {result}")

0 commit comments

Comments
 (0)