HoneyTian commited on
Commit
da40843
·
1 Parent(s): 64f179c
examples/dtln/run.sh CHANGED
@@ -6,7 +6,13 @@ sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name f
6
  --noise_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/noise" \
7
  --speech_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/speech"
8
 
9
- sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name dtln-nx-dns3 \
 
 
 
 
 
 
10
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
11
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
12
 
 
6
  --noise_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/noise" \
7
  --speech_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/speech"
8
 
9
+ sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-256 --final_model_name dtln-256-nx-dns3 \
10
+ --config_file "yaml/config-256.yaml" \
11
+ --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
12
+ --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
13
+
14
+ sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-512 --final_model_name dtln-512-nx-dns3 \
15
+ --config_file "yaml/config-512.yaml" \
16
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
17
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
18
 
examples/dtln/yaml/{config.yaml → config-256.yaml} RENAMED
@@ -24,6 +24,6 @@ max_epochs: 100
24
  clip_grad_norm: 10.0
25
  seed: 1234
26
 
27
- batch_size: 64
28
  num_workers: 4
 
29
  eval_steps: 15000
 
24
  clip_grad_norm: 10.0
25
  seed: 1234
26
 
 
27
  num_workers: 4
28
+ batch_size: 64
29
  eval_steps: 15000
examples/dtln/yaml/config-512.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "DTLN"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ fft_size: 512
6
+ hop_size: 128
7
+ win_type: hann
8
+
9
+ # data
10
+ max_snr_db: 20
11
+ min_snr_db: -10
12
+
13
+ # model
14
+ encoder_size: 512
15
+
16
+ # train
17
+ lr: 0.001
18
+ lr_scheduler: "CosineAnnealingLR"
19
+ lr_scheduler_kwargs:
20
+ T_max: 250000
21
+ eta_min: 0.0001
22
+
23
+ max_epochs: 100
24
+ clip_grad_norm: 10.0
25
+ seed: 1234
26
+
27
+ num_workers: 4
28
+ batch_size: 64
29
+ eval_steps: 15000
toolbox/torchaudio/models/dtln/modeling_dtln.py CHANGED
@@ -133,7 +133,8 @@ class DTLNModel(nn.Module):
133
  )
134
 
135
  self.sep1 = SeperationBlock(input_size=(fft_size // 2 + 1),
136
- hidden_size=128,
 
137
  dropout=0.25,
138
  )
139
 
@@ -148,7 +149,8 @@ class DTLNModel(nn.Module):
148
  self.encoder_norm1 = InstantLayerNormalization(channels=self.encoder_size)
149
 
150
  self.sep2 = SeperationBlock(input_size=self.encoder_size,
151
- hidden_size=128,
 
152
  dropout=0.25,
153
  )
154
 
 
133
  )
134
 
135
  self.sep1 = SeperationBlock(input_size=(fft_size // 2 + 1),
136
+ # hidden_size=128,
137
+ hidden_size=self.encoder_size // 2,
138
  dropout=0.25,
139
  )
140
 
 
149
  self.encoder_norm1 = InstantLayerNormalization(channels=self.encoder_size)
150
 
151
  self.sep2 = SeperationBlock(input_size=self.encoder_size,
152
+ # hidden_size=128,
153
+ hidden_size=self.encoder_size // 2,
154
  dropout=0.25,
155
  )
156
 
toolbox/torchaudio/models/dtln/yaml/config-160.yaml CHANGED
@@ -11,7 +11,7 @@ min_snr_db: -10
11
  encoder_size: 256
12
 
13
  max_epochs: 100
14
- batch_size: 4
15
  num_workers: 4
16
  seed: 1234
17
  eval_steps: 25000
 
11
  encoder_size: 256
12
 
13
  max_epochs: 100
14
+ batch_size: 64
15
  num_workers: 4
16
  seed: 1234
17
  eval_steps: 25000
toolbox/torchaudio/models/dtln/yaml/config-256.yaml CHANGED
@@ -11,7 +11,7 @@ min_snr_db: -10
11
  encoder_size: 256
12
 
13
  max_epochs: 100
14
- batch_size: 4
15
  num_workers: 4
16
  seed: 1234
17
  eval_steps: 25000
 
11
  encoder_size: 256
12
 
13
  max_epochs: 100
14
+ batch_size: 64
15
  num_workers: 4
16
  seed: 1234
17
  eval_steps: 25000
toolbox/torchaudio/models/dtln/yaml/config-512.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "DTLN"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ fft_size: 512
6
+ hop_size: 128
7
+ win_type: hann
8
+
9
+ # data
10
+ max_snr_db: 20
11
+ min_snr_db: -10
12
+
13
+ # model
14
+ encoder_size: 512
15
+
16
+ # train
17
+ lr: 0.001
18
+ lr_scheduler: "CosineAnnealingLR"
19
+ lr_scheduler_kwargs:
20
+ T_max: 250000
21
+ eta_min: 0.0001
22
+
23
+ max_epochs: 100
24
+ clip_grad_norm: 10.0
25
+ seed: 1234
26
+
27
+ num_workers: 4
28
+ batch_size: 64
29
+ eval_steps: 15000