File size: 2,319 Bytes
62e9ca6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#####################################
# SpeechUT ASR model #
#####################################
[ $# -lt 2 ] && echo "Usage: $0 <model_path> <data_dir> [gen-set=dev_other] [beam_size=10] [ctc_weight=0.2] [nj=32] [ngpu=8] [--normalize]" && exit 1
[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1

model_path=$1
DATA_DIR=$2
gen_set=$3
beam_size=$4
ctc_weight=$5
nj=$6
ngpu=$7
extra=$8
[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..."
[ -z $gen_set ] && gen_set="dev_other"
[ -z $beam_size ] && beam_size=10
[ -z $ctc_weight ] && ctc_weight=0.2
[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 as no ctc-decoding used..." && beam_size=1
[ $ctc_weight != 0 ] && extra="$extra --batch-size 1"
[ -z $nj ] && nj=32
[ -z $ngpu ] && ngpu=8

src_dir=${model_path%/*}
cpt=${model_path##*/}
cpt=${cpt%.*}

CODE_ROOT=${PWD}

world_size=$nj
for rank in $(seq 0 $((nj - 1))); do
    export CUDA_VISIBLE_DEVICES=$((rank % $ngpu))
    for subset in ${gen_set//,/ }; do
        results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}/${subset}_${world_size}_${rank}
        [ ! -d $results_path ] && mkdir -p $results_path

        python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \
        --user-dir $CODE_ROOT/speechut \
        --label-dir ${DATA_DIR} \
        --labels '["ltr"]' \
        --single-target \
        --post-process letter \
        --gen-subset ${subset} \
        --max-tokens 2000000 \
        \
        --task joint_sc2t_pretraining \
        --add-decoder-target \
        --fine-tuning \
        --pad-audio \
        --random-crop \
        \
        --ctc-weight ${ctc_weight} $extra \
        --beam ${beam_size} \
        \
        --path ${model_path} \
        --results-path $results_path \
        \
        --scoring wer --max-len-a 0.00078125 --max-len-b 200 \
        --distributed-world-size ${world_size} --distributed-rank ${rank} \
        &
    done
done
wait


for subset in ${gen_set//,/ }; do
    results_dir=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}
    cat $results_dir/${subset}_${world_size}_*/generate-${subset}.txt | grep -v "^Generate" > $results_dir/generate-${subset}.all.txt
done