Spaces:
Running
Running
update
Browse files
examples/nx_mpnet/run.sh
CHANGED
@@ -3,24 +3,10 @@
|
|
3 |
: <<'END'
|
4 |
|
5 |
|
6 |
-
sh run.sh --stage
|
7 |
-
--noise_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/noise" \
|
8 |
-
--speech_dir "E:/programmer/asr_datasets/aishell/data_aishell/wav/train"
|
9 |
-
|
10 |
-
|
11 |
-
sh run.sh --stage 3 --stop_stage 3 --system_version centos --file_folder_name file_dir --final_model_name mpnet-aishell-20250224 \
|
12 |
-
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
13 |
-
--speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train"
|
14 |
-
|
15 |
-
sh run.sh --stage 5 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name mpnet-aishell-20250224 \
|
16 |
-
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
17 |
-
--speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train"
|
18 |
-
|
19 |
-
|
20 |
-
sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name mpnet-nx-speech-20250224 \
|
21 |
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
22 |
-
--speech_dir "/data/tianxing/HuggingDatasets/
|
23 |
-
--max_epochs
|
24 |
|
25 |
|
26 |
END
|
|
|
3 |
: <<'END'
|
4 |
|
5 |
|
6 |
+
sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name nx-mpnet-aishell-20250224 \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
8 |
+
--speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train" \
|
9 |
+
--max_epochs 100
|
10 |
|
11 |
|
12 |
END
|
examples/nx_mpnet/yaml/config.yaml
CHANGED
@@ -1,27 +1,38 @@
|
|
1 |
-
model_name: "
|
2 |
-
|
3 |
-
num_gpus: 0
|
4 |
-
batch_size: 3
|
5 |
-
learning_rate: 0.0005
|
6 |
-
adam_b1: 0.8
|
7 |
-
adam_b2: 0.99
|
8 |
-
lr_decay: 0.99
|
9 |
-
seed: 1234
|
10 |
-
|
11 |
-
dense_channel: 64
|
12 |
-
compress_factor: 0.3
|
13 |
-
num_tsconformers: 4
|
14 |
-
beta: 2.0
|
15 |
|
16 |
sample_rate: 8000
|
17 |
segment_size: 16000
|
18 |
n_fft: 512
|
19 |
-
hop_size: 80
|
20 |
win_size: 200
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "nx_denoise"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
sample_rate: 8000
|
4 |
segment_size: 16000
|
5 |
n_fft: 512
|
|
|
6 |
win_size: 200
|
7 |
+
hop_size: 80
|
8 |
+
|
9 |
+
dense_num_blocks: 4
|
10 |
+
dense_hidden_size: 64
|
11 |
|
12 |
+
mask_num_blocks: 4
|
13 |
+
mask_hidden_size: 64
|
14 |
|
15 |
+
phase_num_blocks: 4
|
16 |
+
phase_hidden_size: 64
|
17 |
+
|
18 |
+
tsfm_hidden_size: 64
|
19 |
+
tsfm_attention_heads: 4
|
20 |
+
tsfm_num_blocks: 4
|
21 |
+
tsfm_dropout_rate: 0.0
|
22 |
+
tsfm_max_time_relative_position: 1024
|
23 |
+
tsfm_max_freq_relative_position: 128
|
24 |
+
tsfm_chunk_size: 1
|
25 |
+
tsfm_num_left_chunks: 64
|
26 |
+
tsfm_num_right_chunks: 2
|
27 |
+
|
28 |
+
discriminator_dim: 32
|
29 |
+
discriminator_in_channel: 2
|
30 |
+
|
31 |
+
compress_factor: 0.3
|
32 |
+
|
33 |
+
batch_size: 4
|
34 |
+
learning_rate: 0.0005
|
35 |
+
adam_b1: 0.8
|
36 |
+
adam_b2: 0.99
|
37 |
+
lr_decay: 0.99
|
38 |
+
seed: 1234
|
toolbox/torchaudio/models/nx_mpnet/yaml/config.yaml
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "nx_denoise"
|
2 |
+
|
3 |
+
sample_rate: 8000
|
4 |
+
segment_size: 16000
|
5 |
+
n_fft: 512
|
6 |
+
win_size: 200
|
7 |
+
hop_size: 80
|
8 |
+
|
9 |
+
dense_num_blocks: 4
|
10 |
+
dense_hidden_size: 64
|
11 |
+
|
12 |
+
mask_num_blocks: 4
|
13 |
+
mask_hidden_size: 64
|
14 |
+
|
15 |
+
phase_num_blocks: 4
|
16 |
+
phase_hidden_size: 64
|
17 |
+
|
18 |
+
tsfm_hidden_size: 64
|
19 |
+
tsfm_attention_heads: 4
|
20 |
+
tsfm_num_blocks: 4
|
21 |
+
tsfm_dropout_rate: 0.0
|
22 |
+
tsfm_max_time_relative_position: 1024
|
23 |
+
tsfm_max_freq_relative_position: 128
|
24 |
+
tsfm_chunk_size: 1
|
25 |
+
tsfm_num_left_chunks: 64
|
26 |
+
tsfm_num_right_chunks: 2
|
27 |
+
|
28 |
+
discriminator_dim: 32
|
29 |
+
discriminator_in_channel: 2
|
30 |
+
|
31 |
+
compress_factor: 0.3
|
32 |
+
|
33 |
+
batch_size: 4
|
34 |
+
learning_rate: 0.0005
|
35 |
+
adam_b1: 0.8
|
36 |
+
adam_b2: 0.99
|
37 |
+
lr_decay: 0.99
|
38 |
+
seed: 1234
|