amupd's picture
SpeechT5 upload
62e9ca6
# ####################################
# SpeechLM-P Base model #
# ####################################
[ $# -lt 2 ] && echo "Usage: $0 <data_dir> <text_data_dir> [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1
[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1
DATA_DIR=$1
TEXT_DATA_DIR=$2
mount=$3
world_size=$4
update_freq=$5
[ -z $mount ] && mount=${PWD}
[ -z $world_size ] && world_size=32
[ -z $update_freq ] && update_freq=1
CODE_ROOT=${PWD}
MODEL_DIR="${mount}/exp/pretrain/base_speechlmp_${world_size}gpu_${update_freq}accum"
[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR
python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \
--config-dir $CODE_ROOT/speechlm/config/pretrain \
--config-name speechlm_base_librispeech \
common.user_dir=$CODE_ROOT/speechlm \
\
task.labels='["phn"]' \
model.label_rate=100 \
task.data=$DATA_DIR \
task.label_dir=$DATA_DIR \
task.text_cfg.text_data=$TEXT_DATA_DIR \
\
dataset.train_subset=\"train_960+train_text.phn-ltr\" \
dataset.valid_subset=\"dev_clean+dev_clean.phn-ltr\" \
dataset.num_workers=0 \
dataset.max_tokens=1400000 \
distributed_training.distributed_world_size=${world_size} \
optimization.update_freq=[${update_freq}] \
\
common.tensorboard_logdir=$MODEL_DIR \
checkpoint.save_dir=$MODEL_DIR \
hydra.run.dir=$MODEL_DIR \
hydra.job.name=pretrain
# data_dir="/stdblob/users/v-ziqzhang/dataset/LibriLM/phn2char_sanych/tri4b_mono_label"
# text_data_dir="/stdblob/users/v-ziqzhang/dataset/LibriLM/phn2char_sanych/filt2k_sil025_m5std25_sil14_spn32/bin-idx"