amupd's picture
SpeechT5 upload
62e9ca6
# ####################################
# Hubert ED model #
# ####################################
#source /mnt/default/v-ziqzhang/.bashrc_sing
[ $# -lt 4 ] && echo "Usage: $0 <world_size> <update_freq> <w2v_path> <cpt>" && exit 0
world_size=$1
update_freq=$2
w2v_path=$3
cpt=$4
Mount=$5
[ -z $world_size ] && world_size=1
[ -z $update_freq ] && update_freq=1
[ -z $w2v_path ] && echo "you must specify a wav_path !" && exit 1
[ -z $cpt ] && cpt=030.pt
[ -z $Mount ] && Mount=/mnt/default
FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku
CONFIG_DIR=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config
DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/fin_esen"
exp_name=${w2v_path%/*}
exp_name=${exp_name##*/}
MODEL_DIR="/mnt/output/users/v-kunwei/data/s2s_data/finetune/tune_ST_from_esen"
exp_name="tune_esen_lr5e-5_from_$cpt"
MODEL_DIR=$MODEL_DIR/$exp_name
[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR
max_tokens=4900
python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \
--config-dir $CONFIG_DIR/finetune_asr \
--config-name base_100h \
\
+task.store_labels=true \
task.labels='["spm"]' \
task.data=$DATA_DIR \
task.label_dir=$DATA_DIR \
task.add_decoder=true \
+task.max_keep_size=4900 \
\
+model.reuse_text_emb=true \
model._name="stbert_st" \
model.w2v_path=${w2v_path} \
model.add_decoder=true \
\
criterion._name="label_smoothed_cross_entropy" \
+criterion.label_smoothing=0.2 \
+criterion.report_accuracy=true \
\
lr_scheduler._name="polynomial_decay" \
+lr_scheduler.warmup_updates=20000 \
\
optimization.lr=[0.0002] \
optimization.max_update=100000 \
checkpoint.best_checkpoint_metric="accuracy" \
checkpoint.maximize_best_checkpoint_metric=true \
checkpoint.save_interval=1 \
\
dataset.train_subset="train" \
dataset.valid_subset="valid" \
dataset.max_tokens=$max_tokens \
optimization.update_freq=[${update_freq}] \
\
distributed_training.distributed_world_size=${world_size} \
distributed_training.distributed_port=-1 \
\
common.log_interval=100 \
common.tensorboard_logdir=$MODEL_DIR \
checkpoint.save_dir=$MODEL_DIR \
hydra.run.dir=$MODEL_DIR \
hydra.job.name=${exp_name}
sleep 20s
# \
# lr_scheduler._name="polynomial_decay" \
# +lr_scheduler.warmup_updates=5000 \
# /mnt/default/v-ziqzhang/data/stbert-ed/exp/ST_enes/sc2t_base_ende_32gpu_1accum/checkpoint_204_400000.pt