|
#!/usr/bin/env bash |
|
|
|
: <<'END' |
|
|
|
sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification8 \ |
|
--filename_patterns "E:/programmer/asr_datasets/voicemail/wav_finished/en-US/wav_finished/*/*.wav \ |
|
E:/programmer/asr_datasets/voicemail/wav_finished/id-ID/wav_finished/*/*.wav" \ |
|
|
|
|
|
sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification8 \ |
|
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" |
|
|
|
sh run.sh --stage 4 --stop_stage 4 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification8 \ |
|
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" |
|
|
|
sh run.sh --stage 4 --stop_stage 4 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification8 \ |
|
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" |
|
|
|
|
|
" |
|
|
|
END |
|
|
|
|
|
# sh run.sh --stage -1 --stop_stage 9 |
|
# sh run.sh --stage -1 --stop_stage 5 --system_version centos --file_folder_name task_cnn_voicemail_id_id --final_model_name cnn_voicemail_id_id |
|
# sh run.sh --stage 3 --stop_stage 4 |
|
# sh run.sh --stage 4 --stop_stage 4 |
|
# sh run.sh --stage 3 --stop_stage 3 --system_version centos --file_folder_name task_cnn_voicemail_id_id |
|
|
|
# params |
|
system_version="windows"; |
|
verbose=true; |
|
stage=0 # start from 0 if you need to start from data preparation |
|
stop_stage=9 |
|
|
|
work_dir="$(pwd)" |
|
file_folder_name=file_folder_name |
|
final_model_name=final_model_name |
|
filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" |
|
nohup_name=nohup.out |
|
|
|
country=en-US |
|
|
|
# model params |
|
batch_size=64 |
|
max_epochs=200 |
|
save_top_k=10 |
|
patience=5 |
|
|
|
|
|
# parse options |
|
while true; do |
|
[ -z "${1:-}" ] && break; # break if there are no arguments |
|
case "$1" in |
|
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g); |
|
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1; |
|
old_value="(eval echo \\$$name)"; |
|
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then |
|
was_bool=true; |
|
else |
|
was_bool=false; |
|
fi |
|
|
|
# Set the variable to the right value-- the escaped quotes make it work if |
|
# the option had spaces, like --cmd "queue.pl -sync y" |
|
eval "${name}=\"$2\""; |
|
|
|
# Check that Boolean-valued arguments are really Boolean. |
|
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then |
|
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2 |
|
exit 1; |
|
fi |
|
shift 2; |
|
;; |
|
|
|
*) break; |
|
esac |
|
done |
|
|
|
file_dir="${work_dir}/${file_folder_name}" |
|
final_model_dir="${work_dir}/../../trained_models/${final_model_name}"; |
|
|
|
train_dataset="${file_dir}/train.xlsx" |
|
valid_dataset="${file_dir}/valid.xlsx" |
|
vocabulary_dir="${file_dir}/vocabulary" |
|
|
|
|
|
$verbose && echo "system_version: ${system_version}" |
|
$verbose && echo "file_folder_name: ${file_folder_name}" |
|
|
|
if [ $system_version == "windows" ]; then |
|
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/vm_sound_classification/Scripts/python.exe' |
|
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then |
|
#source /data/local/bin/vm_sound_classification/bin/activate |
|
alias python3='/data/local/bin/vm_sound_classification/bin/python3' |
|
fi |
|
|
|
|
|
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then |
|
$verbose && echo "stage 0: prepare data" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_1_prepare_data.py \ |
|
--file_dir "${file_dir}" \ |
|
--filename_patterns "${filename_patterns}" \ |
|
--train_dataset "${train_dataset}" \ |
|
--valid_dataset "${valid_dataset}" \ |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then |
|
$verbose && echo "stage 1: make vocabulary" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_2_make_vocabulary.py \ |
|
--vocabulary_dir "${vocabulary_dir}" \ |
|
--train_dataset "${train_dataset}" \ |
|
--valid_dataset "${valid_dataset}" \ |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then |
|
$verbose && echo "stage 2: train global model" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_3_train_global_model.py \ |
|
--vocabulary_dir "${vocabulary_dir}" \ |
|
--train_dataset "${train_dataset}" \ |
|
--valid_dataset "${valid_dataset}" \ |
|
--serialization_dir "${file_dir}/global_model" \ |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then |
|
$verbose && echo "stage 3: train country model" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_4_train_country_model.py \ |
|
--vocabulary_dir "${vocabulary_dir}" \ |
|
--train_dataset "${train_dataset}" \ |
|
--valid_dataset "${valid_dataset}" \ |
|
--country "${country}" \ |
|
--serialization_dir "${file_dir}/country_model" \ |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then |
|
$verbose && echo "stage 4: train union model" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_5_train_union.py \ |
|
--vocabulary_dir "${vocabulary_dir}" \ |
|
--train_dataset "${train_dataset}" \ |
|
--valid_dataset "${valid_dataset}" \ |
|
--serialization_dir "${file_dir}/union" \ |
|
|
|
fi |
|
|