amupd's picture
SpeechT5 upload
62e9ca6
# ####################################
# Hubert SCT2T ED model #
# ####################################
world_size=$1
update_freq=$2
exp_name=$3
[ -z $world_size ] && world_size=8
[ -z $update_freq ] && update_freq=1
[ -z $exp_name ] && exp_name=sc2t_base_enes_${world_size}gpu_${update_freq}accum6666
FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku
CONFIG_DIR=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config
DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/speech_enes"
TEXT_DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/text_enes/bin-idx"
MODEL_DIR="/mnt/output/v-kunwei/data/s2s_data/exp/S2S_enes/$exp_name"
[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR
python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \
--config-dir $CONFIG_DIR/pretrain \
--config-name sc2t_base_librispeech \
\
+task.store_labels=true \
task.labels='["km"]' \
model.label_rate=50 \
task.data=$DATA_DIR \
task.label_dir=$DATA_DIR \
task.text_cfg.text_data=$TEXT_DATA_DIR \
+task.text_cfg.data_config=config.yaml \
task.text_cfg.text_maxtokens_ratio=3.0 \
\
+criterion.dec_loss_type="ce" \
\
criterion.text_weight=1.0 \
\
model.use_rel_pos_enc=true \
+model.code_use_rel_pos_enc=true \
+model.pad_with_code=true \
model.text_transformer.no_scale_embedding=true \
model.text_transformer.layernorm_embedding=true \
+model.share_decoder_input_output_embed=true \
\
dataset.train_subset=\"train_all+en.kmu-spm\" \
dataset.valid_subset=\"valid+en_valid.kmu-spm\" \
dataset.num_workers=0 \
dataset.max_tokens=1000000 \
optimization.update_freq=[${update_freq}] \
optimization.max_update=400000 \
\
distributed_training.distributed_world_size=${world_size} \
\
common.tensorboard_logdir=$MODEL_DIR \
checkpoint.save_dir=$MODEL_DIR \
hydra.run.dir=$MODEL_DIR \
hydra.job.name=${exp_name}
sleep 5m
echo "All finished"