Spaces:
Running
Running
update
Browse files- examples/dtln/run.sh +7 -1
- examples/dtln/yaml/{config.yaml → config-256.yaml} +1 -1
- examples/dtln/yaml/config-512.yaml +29 -0
- toolbox/torchaudio/models/dtln/modeling_dtln.py +4 -2
- toolbox/torchaudio/models/dtln/yaml/config-160.yaml +1 -1
- toolbox/torchaudio/models/dtln/yaml/config-256.yaml +1 -1
- toolbox/torchaudio/models/dtln/yaml/config-512.yaml +29 -0
examples/dtln/run.sh
CHANGED
@@ -6,7 +6,13 @@ sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name f
|
|
6 |
--noise_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/noise" \
|
7 |
--speech_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/speech"
|
8 |
|
9 |
-
sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name dtln-nx-dns3 \
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
|
11 |
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
|
12 |
|
|
|
6 |
--noise_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/noise" \
|
7 |
--speech_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/speech"
|
8 |
|
9 |
+
sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-256 --final_model_name dtln-256-nx-dns3 \
|
10 |
+
--config_file "yaml/config-256.yaml" \
|
11 |
+
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
|
12 |
+
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
|
13 |
+
|
14 |
+
sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-512 --final_model_name dtln-512-nx-dns3 \
|
15 |
+
--config_file "yaml/config-512.yaml" \
|
16 |
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
|
17 |
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
|
18 |
|
examples/dtln/yaml/{config.yaml → config-256.yaml}
RENAMED
@@ -24,6 +24,6 @@ max_epochs: 100
|
|
24 |
clip_grad_norm: 10.0
|
25 |
seed: 1234
|
26 |
|
27 |
-
batch_size: 64
|
28 |
num_workers: 4
|
|
|
29 |
eval_steps: 15000
|
|
|
24 |
clip_grad_norm: 10.0
|
25 |
seed: 1234
|
26 |
|
|
|
27 |
num_workers: 4
|
28 |
+
batch_size: 64
|
29 |
eval_steps: 15000
|
examples/dtln/yaml/config-512.yaml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "DTLN"
|
2 |
+
|
3 |
+
# spec
|
4 |
+
sample_rate: 8000
|
5 |
+
fft_size: 512
|
6 |
+
hop_size: 128
|
7 |
+
win_type: hann
|
8 |
+
|
9 |
+
# data
|
10 |
+
max_snr_db: 20
|
11 |
+
min_snr_db: -10
|
12 |
+
|
13 |
+
# model
|
14 |
+
encoder_size: 512
|
15 |
+
|
16 |
+
# train
|
17 |
+
lr: 0.001
|
18 |
+
lr_scheduler: "CosineAnnealingLR"
|
19 |
+
lr_scheduler_kwargs:
|
20 |
+
T_max: 250000
|
21 |
+
eta_min: 0.0001
|
22 |
+
|
23 |
+
max_epochs: 100
|
24 |
+
clip_grad_norm: 10.0
|
25 |
+
seed: 1234
|
26 |
+
|
27 |
+
num_workers: 4
|
28 |
+
batch_size: 64
|
29 |
+
eval_steps: 15000
|
toolbox/torchaudio/models/dtln/modeling_dtln.py
CHANGED
@@ -133,7 +133,8 @@ class DTLNModel(nn.Module):
|
|
133 |
)
|
134 |
|
135 |
self.sep1 = SeperationBlock(input_size=(fft_size // 2 + 1),
|
136 |
-
hidden_size=128,
|
|
|
137 |
dropout=0.25,
|
138 |
)
|
139 |
|
@@ -148,7 +149,8 @@ class DTLNModel(nn.Module):
|
|
148 |
self.encoder_norm1 = InstantLayerNormalization(channels=self.encoder_size)
|
149 |
|
150 |
self.sep2 = SeperationBlock(input_size=self.encoder_size,
|
151 |
-
hidden_size=128,
|
|
|
152 |
dropout=0.25,
|
153 |
)
|
154 |
|
|
|
133 |
)
|
134 |
|
135 |
self.sep1 = SeperationBlock(input_size=(fft_size // 2 + 1),
|
136 |
+
# hidden_size=128,
|
137 |
+
hidden_size=self.encoder_size // 2,
|
138 |
dropout=0.25,
|
139 |
)
|
140 |
|
|
|
149 |
self.encoder_norm1 = InstantLayerNormalization(channels=self.encoder_size)
|
150 |
|
151 |
self.sep2 = SeperationBlock(input_size=self.encoder_size,
|
152 |
+
# hidden_size=128,
|
153 |
+
hidden_size=self.encoder_size // 2,
|
154 |
dropout=0.25,
|
155 |
)
|
156 |
|
toolbox/torchaudio/models/dtln/yaml/config-160.yaml
CHANGED
@@ -11,7 +11,7 @@ min_snr_db: -10
|
|
11 |
encoder_size: 256
|
12 |
|
13 |
max_epochs: 100
|
14 |
-
batch_size:
|
15 |
num_workers: 4
|
16 |
seed: 1234
|
17 |
eval_steps: 25000
|
|
|
11 |
encoder_size: 256
|
12 |
|
13 |
max_epochs: 100
|
14 |
+
batch_size: 64
|
15 |
num_workers: 4
|
16 |
seed: 1234
|
17 |
eval_steps: 25000
|
toolbox/torchaudio/models/dtln/yaml/config-256.yaml
CHANGED
@@ -11,7 +11,7 @@ min_snr_db: -10
|
|
11 |
encoder_size: 256
|
12 |
|
13 |
max_epochs: 100
|
14 |
-
batch_size:
|
15 |
num_workers: 4
|
16 |
seed: 1234
|
17 |
eval_steps: 25000
|
|
|
11 |
encoder_size: 256
|
12 |
|
13 |
max_epochs: 100
|
14 |
+
batch_size: 64
|
15 |
num_workers: 4
|
16 |
seed: 1234
|
17 |
eval_steps: 25000
|
toolbox/torchaudio/models/dtln/yaml/config-512.yaml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "DTLN"
|
2 |
+
|
3 |
+
# spec
|
4 |
+
sample_rate: 8000
|
5 |
+
fft_size: 512
|
6 |
+
hop_size: 128
|
7 |
+
win_type: hann
|
8 |
+
|
9 |
+
# data
|
10 |
+
max_snr_db: 20
|
11 |
+
min_snr_db: -10
|
12 |
+
|
13 |
+
# model
|
14 |
+
encoder_size: 512
|
15 |
+
|
16 |
+
# train
|
17 |
+
lr: 0.001
|
18 |
+
lr_scheduler: "CosineAnnealingLR"
|
19 |
+
lr_scheduler_kwargs:
|
20 |
+
T_max: 250000
|
21 |
+
eta_min: 0.0001
|
22 |
+
|
23 |
+
max_epochs: 100
|
24 |
+
clip_grad_norm: 10.0
|
25 |
+
seed: 1234
|
26 |
+
|
27 |
+
num_workers: 4
|
28 |
+
batch_size: 64
|
29 |
+
eval_steps: 15000
|