File size: 1,321 Bytes
62e9ca6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# ####################################
# SpeechUT Base model #
# ####################################
[ $# -lt 3 ] && echo "Usage: $0 <model_path> <data_dir> <lang> [gen-set=dev] [beam_size=10] [lenpen=1.0]" && exit 0
[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1

model_path=$1
DATA_DIR=$2
lang=$3
gen_set=$4
beam_size=$5
lenpen=$6
[ -z $gen_set ] && gen_set="dev"
[ -z $beam_size ] && beam_size=10
[ -z $lenpen ] && lenpen=1
src_dir=${model_path%/*}
cpt=${model_path##*/}
cpt=${cpt%.*}

CODE_ROOT=${PWD}
results_path=$src_dir/decode_${cpt}_beam${beam_size}/${gen_set}
[ ! -d $results_path ] && mkdir -p $results_path

python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \
    --gen-subset ${gen_set}_st \
    --max-tokens 2000000 \
    --max-source-positions 2000000 \
    --num-workers 0 \
    \
    --user-dir $CODE_ROOT/speechut \
    --task speech_to_text \
    --config-yaml config_en${lang}.yaml \
    \
    --path ${model_path} \
    --results-path $results_path \
    \
    --scoring sacrebleu --max-len-a 0 --max-len-b 512 \
    --beam ${beam_size} \
    --lenpen $lenpen \
    # --model-overrides "{'model':{'w2v_path':'/path/to/your/pretrained/model.pt'}}" \

    echo $results_path
    tail -n 1 $results_path/generate-*.txt
    sleep 1s