File size: 3,199 Bytes
35a4689
 
 
 
 
 
 
 
bd3d872
35a4689
169d6d6
35a4689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c6c36a
 
35a4689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd3d872
35a4689
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env bash

: <<'END'

sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir

sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir

sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name rnnoise-nx-dns3 \
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
--speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train" \


END


# params
system_version="windows";
verbose=true;
stage=0 # start from 0 if you need to start from data preparation
stop_stage=9

work_dir="$(pwd)"
file_folder_name=file_folder_name
final_model_name=final_model_name
config_file="yaml/config.yaml"
limit=10

noise_dir=/data/tianxing/HuggingDatasets/nx_noise/data/noise
speech_dir=/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train

nohup_name=nohup.out

# model params
batch_size=64
max_epochs=200
save_top_k=10
patience=5


# parse options
while true; do
  [ -z "${1:-}" ] && break;  # break if there are no arguments
  case "$1" in
    --*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
      eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
      old_value="(eval echo \\$$name)";
      if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
        was_bool=true;
      else
        was_bool=false;
      fi

      # Set the variable to the right value-- the escaped quotes make it work if
      # the option had spaces, like --cmd "queue.pl -sync y"
      eval "${name}=\"$2\"";

      # Check that Boolean-valued arguments are really Boolean.
      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
        exit 1;
      fi
      shift 2;
      ;;

    *) break;
  esac
done

file_dir="${work_dir}/${file_folder_name}"
final_model_dir="${work_dir}/../../trained_models/${final_model_name}";
evaluation_audio_dir="${file_dir}/evaluation_audio"

train_dataset="${file_dir}/train.jsonl"
valid_dataset="${file_dir}/valid.jsonl"

$verbose && echo "system_version: ${system_version}"
$verbose && echo "file_folder_name: ${file_folder_name}"

if [ $system_version == "windows" ]; then
  alias python3='D:/Users/tianx/PycharmProjects/virtualenv/nx_denoise/Scripts/python.exe'
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
  #source /data/local/bin/nx_denoise/bin/activate
  alias python3='/data/local/bin/nx_denoise/bin/python3'
fi


if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
  $verbose && echo "stage 1: prepare data"
  cd "${work_dir}" || exit 1
  python3 step_1_prepare_data.py \
  --file_dir "${file_dir}" \
  --noise_dir "${noise_dir}" \
  --speech_dir "${speech_dir}" \
  --train_dataset "${train_dataset}" \
  --valid_dataset "${valid_dataset}" \

fi


if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
  $verbose && echo "stage 2: train model"
  cd "${work_dir}" || exit 1
  python3 step_2_train_model.py \
  --train_dataset "${train_dataset}" \
  --valid_dataset "${valid_dataset}" \
  --serialization_dir "${file_dir}" \
  --config_file "${config_file}" \
  --sparse

fi