HoneyTian's picture
update
69ad385
raw
history blame
5.09 kB
#!/usr/bin/env bash
: <<'END'
sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification8 \
--filename_patterns "E:/programmer/asr_datasets/voicemail/wav_finished/en-US/wav_finished/*/*.wav \
E:/programmer/asr_datasets/voicemail/wav_finished/id-ID/wav_finished/*/*.wav" \
sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification8 \
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
sh run.sh --stage 4 --stop_stage 4 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification8 \
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
sh run.sh --stage 4 --stop_stage 4 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification8 \
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
"
END
# sh run.sh --stage -1 --stop_stage 9
# sh run.sh --stage -1 --stop_stage 5 --system_version centos --file_folder_name task_cnn_voicemail_id_id --final_model_name cnn_voicemail_id_id
# sh run.sh --stage 3 --stop_stage 4
# sh run.sh --stage 4 --stop_stage 4
# sh run.sh --stage 3 --stop_stage 3 --system_version centos --file_folder_name task_cnn_voicemail_id_id
# params
system_version="windows";
verbose=true;
stage=0 # start from 0 if you need to start from data preparation
stop_stage=9
work_dir="$(pwd)"
file_folder_name=file_folder_name
final_model_name=final_model_name
filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
nohup_name=nohup.out
country=en-US
# model params
batch_size=64
max_epochs=200
save_top_k=10
patience=5
# parse options
while true; do
[ -z "${1:-}" ] && break; # break if there are no arguments
case "$1" in
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
old_value="(eval echo \\$$name)";
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
was_bool=true;
else
was_bool=false;
fi
# Set the variable to the right value-- the escaped quotes make it work if
# the option had spaces, like --cmd "queue.pl -sync y"
eval "${name}=\"$2\"";
# Check that Boolean-valued arguments are really Boolean.
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
exit 1;
fi
shift 2;
;;
*) break;
esac
done
file_dir="${work_dir}/${file_folder_name}"
final_model_dir="${work_dir}/../../trained_models/${final_model_name}";
train_dataset="${file_dir}/train.xlsx"
valid_dataset="${file_dir}/valid.xlsx"
vocabulary_dir="${file_dir}/vocabulary"
$verbose && echo "system_version: ${system_version}"
$verbose && echo "file_folder_name: ${file_folder_name}"
if [ $system_version == "windows" ]; then
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/vm_sound_classification/Scripts/python.exe'
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
#source /data/local/bin/vm_sound_classification/bin/activate
alias python3='/data/local/bin/vm_sound_classification/bin/python3'
fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
$verbose && echo "stage 0: prepare data"
cd "${work_dir}" || exit 1
python3 step_1_prepare_data.py \
--file_dir "${file_dir}" \
--filename_patterns "${filename_patterns}" \
--train_dataset "${train_dataset}" \
--valid_dataset "${valid_dataset}" \
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
$verbose && echo "stage 1: make vocabulary"
cd "${work_dir}" || exit 1
python3 step_2_make_vocabulary.py \
--vocabulary_dir "${vocabulary_dir}" \
--train_dataset "${train_dataset}" \
--valid_dataset "${valid_dataset}" \
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
$verbose && echo "stage 2: train global model"
cd "${work_dir}" || exit 1
python3 step_3_train_global_model.py \
--vocabulary_dir "${vocabulary_dir}" \
--train_dataset "${train_dataset}" \
--valid_dataset "${valid_dataset}" \
--serialization_dir "${file_dir}/global_model" \
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
$verbose && echo "stage 3: train country model"
cd "${work_dir}" || exit 1
python3 step_4_train_country_model.py \
--vocabulary_dir "${vocabulary_dir}" \
--train_dataset "${train_dataset}" \
--valid_dataset "${valid_dataset}" \
--country "${country}" \
--serialization_dir "${file_dir}/country_model" \
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
$verbose && echo "stage 4: train union model"
cd "${work_dir}" || exit 1
python3 step_5_train_union.py \
--vocabulary_dir "${vocabulary_dir}" \
--train_dataset "${train_dataset}" \
--valid_dataset "${valid_dataset}" \
--serialization_dir "${file_dir}/union" \
fi