HoneyTian commited on
Commit
a8c2bc7
·
1 Parent(s): e8fafc5
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
 
 
2
  .git/
3
  .idea/
4
 
 
1
 
2
+ .gradio/
3
  .git/
4
  .idea/
5
 
README.md CHANGED
@@ -12,7 +12,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
12
  ## NX Denoise
13
 
14
 
15
- ### speech datasets
16
 
17
  ```text
18
 
@@ -22,5 +22,8 @@ https://openslr.trmal.net/resources/33/
22
  AISHELL-3 (19G)
23
  http://www.openslr.org/93/
24
 
 
 
 
25
  ```
26
 
 
12
  ## NX Denoise
13
 
14
 
15
+ ### datasets
16
 
17
  ```text
18
 
 
22
  AISHELL-3 (19G)
23
  http://www.openslr.org/93/
24
 
25
+ DNS3
26
+ https://github.com/microsoft/DNS-Challenge/blob/master/download-dns-challenge-3.sh
27
+
28
  ```
29
 
examples/frcrn/run.sh CHANGED
@@ -4,7 +4,8 @@
4
 
5
 
6
  sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name frcrn \
7
- --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
 
8
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
9
 
10
 
 
4
 
5
 
6
  sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name frcrn \
7
+ --config_file "yaml/config-20-512.yaml" \
8
+ --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
9
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
10
 
11
 
examples/frcrn/yaml/{config.yaml → config-10-512.yaml} RENAMED
@@ -1,24 +1,10 @@
1
-
2
  model_name: "frcrn"
3
 
4
- num_gpus: -1
5
-
6
- lr: 0.001
7
- lr_scheduler: "CosineAnnealingLR"
8
- lr_scheduler_kwargs:
9
- T_max: 250000
10
- eta_min: 0.0001
11
-
12
- max_epochs: 100
13
- weight_decay: 1.0e-05
14
- clip_grad_norm: 10.0
15
- seed: 1234
16
-
17
  sample_rate: 8000
18
  segment_size: 32000
19
- nfft: 128
20
- win_size: 128
21
- hop_size: 64
22
  win_type: hann
23
 
24
  use_complex_networks: true
@@ -31,3 +17,15 @@ max_snr_db: 20
31
  num_workers: 8
32
  batch_size: 32
33
  eval_steps: 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  model_name: "frcrn"
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  sample_rate: 8000
4
  segment_size: 32000
5
+ nfft: 512
6
+ win_size: 512
7
+ hop_size: 128
8
  win_type: hann
9
 
10
  use_complex_networks: true
 
17
  num_workers: 8
18
  batch_size: 32
19
  eval_steps: 10000
20
+
21
+ lr: 0.001
22
+ lr_scheduler: "CosineAnnealingLR"
23
+ lr_scheduler_kwargs:
24
+ T_max: 250000
25
+ eta_min: 0.0001
26
+
27
+ max_epochs: 100
28
+ weight_decay: 1.0e-05
29
+ clip_grad_norm: 10.0
30
+ seed: 1234
31
+ num_gpus: -1
examples/frcrn/yaml/config-14-512.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "frcrn"
2
+
3
+ sample_rate: 8000
4
+ segment_size: 32000
5
+ nfft: 512
6
+ win_size: 512
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ use_complex_networks: true
11
+ model_depth: 14
12
+ model_complexity: -1
13
+
14
+ min_snr_db: -10
15
+ max_snr_db: 20
16
+
17
+ num_workers: 8
18
+ batch_size: 32
19
+ eval_steps: 10000
20
+
21
+ lr: 0.001
22
+ lr_scheduler: "CosineAnnealingLR"
23
+ lr_scheduler_kwargs:
24
+ T_max: 250000
25
+ eta_min: 0.0001
26
+
27
+ max_epochs: 100
28
+ weight_decay: 1.0e-05
29
+ clip_grad_norm: 10.0
30
+ seed: 1234
31
+ num_gpus: -1
examples/frcrn/yaml/config-20-512.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "frcrn"
2
+
3
+ sample_rate: 8000
4
+ segment_size: 32000
5
+ nfft: 512
6
+ win_size: 512
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ use_complex_networks: true
11
+ model_depth: 20
12
+ model_complexity: 45
13
+
14
+ min_snr_db: -10
15
+ max_snr_db: 20
16
+
17
+ num_workers: 8
18
+ batch_size: 32
19
+ eval_steps: 10000
20
+
21
+ lr: 0.001
22
+ lr_scheduler: "CosineAnnealingLR"
23
+ lr_scheduler_kwargs:
24
+ T_max: 250000
25
+ eta_min: 0.0001
26
+
27
+ max_epochs: 100
28
+ weight_decay: 1.0e-05
29
+ clip_grad_norm: 10.0
30
+ seed: 1234
31
+ num_gpus: -1
main.py CHANGED
@@ -1,14 +1,16 @@
1
  #!/usr/bin/python3
2
  # -*- coding: utf-8 -*-
3
  """
4
- docker build -t denoise:v20250609_1536 .
5
  docker stop denoise_7865 && docker rm denoise_7865
6
  docker run -itd \
7
  --name denoise_7865 \
8
  --restart=always \
9
  --network host \
10
- -e port=7865 \
11
- denoise:v20250609_1536
 
 
12
  """
13
  import argparse
14
  import json
@@ -273,7 +275,9 @@ def main():
273
  )
274
 
275
  # http://127.0.0.1:7865/
 
276
  blocks.queue().launch(
 
277
  share=False if platform.system() == "Windows" else False,
278
  server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
279
  server_port=args.server_port
 
1
  #!/usr/bin/python3
2
  # -*- coding: utf-8 -*-
3
  """
4
+ docker build -t denoise:v20250609_1919 .
5
  docker stop denoise_7865 && docker rm denoise_7865
6
  docker run -itd \
7
  --name denoise_7865 \
8
  --restart=always \
9
  --network host \
10
+ -e server_port=7865 \
11
+ -e hf_token=hf_coRVvzwAzCwGHKRK***********EX \
12
+ denoise:v20250609_1919 /bin/bash
13
+
14
  """
15
  import argparse
16
  import json
 
275
  )
276
 
277
  # http://127.0.0.1:7865/
278
+ # http://10.75.27.247:7865/
279
  blocks.queue().launch(
280
+ # share=True,
281
  share=False if platform.system() == "Windows" else False,
282
  server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
283
  server_port=args.server_port
toolbox/torchaudio/models/frcrn/configuration_frcrn.py CHANGED
@@ -10,22 +10,11 @@ from toolbox.torchaudio.configuration_utils import PretrainedConfig
10
 
11
  class FRCRNConfig(PretrainedConfig):
12
  def __init__(self,
13
- num_gpus: int = -1,
14
-
15
- lr: float = 0.001,
16
- lr_scheduler: str = "CosineAnnealingLR",
17
- lr_scheduler_kwargs: dict = None,
18
-
19
- max_epochs: int = 100,
20
- weight_decay: float = 0.00001,
21
- clip_grad_norm: float = 10.,
22
- seed: int = 1234,
23
-
24
  sample_rate: int = 8000,
25
  segment_size: int = 32000,
26
  nfft: int = 512,
27
  win_size: int = 512,
28
- hop_size: int = 256,
29
  win_type: str = "hann",
30
 
31
  use_complex_networks: bool = True,
@@ -39,20 +28,19 @@ class FRCRNConfig(PretrainedConfig):
39
  batch_size: int = 4,
40
  eval_steps: int = 25000,
41
 
 
 
 
 
 
 
 
 
 
 
42
  **kwargs
43
  ):
44
  super(FRCRNConfig, self).__init__(**kwargs)
45
- self.num_gpus = num_gpus
46
-
47
- self.lr = lr
48
- self.lr_scheduler = lr_scheduler
49
- self.lr_scheduler_kwargs = lr_scheduler_kwargs or dict()
50
-
51
- self.max_epochs = max_epochs
52
- self.weight_decay = weight_decay
53
- self.clip_grad_norm = clip_grad_norm
54
- self.seed = seed
55
-
56
  self.sample_rate = sample_rate
57
  self.segment_size = segment_size
58
  self.nfft = nfft
@@ -71,6 +59,16 @@ class FRCRNConfig(PretrainedConfig):
71
  self.batch_size = batch_size
72
  self.eval_steps = eval_steps
73
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  def main():
76
  config = FRCRNConfig()
 
10
 
11
  class FRCRNConfig(PretrainedConfig):
12
  def __init__(self,
 
 
 
 
 
 
 
 
 
 
 
13
  sample_rate: int = 8000,
14
  segment_size: int = 32000,
15
  nfft: int = 512,
16
  win_size: int = 512,
17
+ hop_size: int = 128,
18
  win_type: str = "hann",
19
 
20
  use_complex_networks: bool = True,
 
28
  batch_size: int = 4,
29
  eval_steps: int = 25000,
30
 
31
+ lr: float = 0.001,
32
+ lr_scheduler: str = "CosineAnnealingLR",
33
+ lr_scheduler_kwargs: dict = None,
34
+
35
+ max_epochs: int = 100,
36
+ weight_decay: float = 0.00001,
37
+ clip_grad_norm: float = 10.,
38
+ seed: int = 1234,
39
+ num_gpus: int = -1,
40
+
41
  **kwargs
42
  ):
43
  super(FRCRNConfig, self).__init__(**kwargs)
 
 
 
 
 
 
 
 
 
 
 
44
  self.sample_rate = sample_rate
45
  self.segment_size = segment_size
46
  self.nfft = nfft
 
59
  self.batch_size = batch_size
60
  self.eval_steps = eval_steps
61
 
62
+ self.lr = lr
63
+ self.lr_scheduler = lr_scheduler
64
+ self.lr_scheduler_kwargs = lr_scheduler_kwargs or dict()
65
+
66
+ self.max_epochs = max_epochs
67
+ self.weight_decay = weight_decay
68
+ self.clip_grad_norm = clip_grad_norm
69
+ self.seed = seed
70
+ self.num_gpus = num_gpus
71
+
72
 
73
  def main():
74
  config = FRCRNConfig()
toolbox/torchaudio/models/frcrn/yaml/config-10-512.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "frcrn"
2
+
3
+ sample_rate: 8000
4
+ segment_size: 32000
5
+ nfft: 512
6
+ win_size: 512
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ use_complex_networks: true
11
+ model_depth: 10
12
+ model_complexity: -1
13
+
14
+ min_snr_db: -10
15
+ max_snr_db: 20
16
+
17
+ num_workers: 8
18
+ batch_size: 32
19
+ eval_steps: 10000
20
+
21
+ lr: 0.001
22
+ lr_scheduler: "CosineAnnealingLR"
23
+ lr_scheduler_kwargs:
24
+ T_max: 250000
25
+ eta_min: 0.0001
26
+
27
+ max_epochs: 100
28
+ weight_decay: 1.0e-05
29
+ clip_grad_norm: 10.0
30
+ seed: 1234
31
+ num_gpus: -1
toolbox/torchaudio/models/frcrn/yaml/config-14-512.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "frcrn"
2
+
3
+ sample_rate: 8000
4
+ segment_size: 32000
5
+ nfft: 512
6
+ win_size: 512
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ use_complex_networks: true
11
+ model_depth: 14
12
+ model_complexity: -1
13
+
14
+ min_snr_db: -10
15
+ max_snr_db: 20
16
+
17
+ num_workers: 8
18
+ batch_size: 32
19
+ eval_steps: 10000
20
+
21
+ lr: 0.001
22
+ lr_scheduler: "CosineAnnealingLR"
23
+ lr_scheduler_kwargs:
24
+ T_max: 250000
25
+ eta_min: 0.0001
26
+
27
+ max_epochs: 100
28
+ weight_decay: 1.0e-05
29
+ clip_grad_norm: 10.0
30
+ seed: 1234
31
+ num_gpus: -1
toolbox/torchaudio/models/frcrn/yaml/config-20-512.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "frcrn"
2
+
3
+ sample_rate: 8000
4
+ segment_size: 32000
5
+ nfft: 512
6
+ win_size: 512
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ use_complex_networks: true
11
+ model_depth: 20
12
+ model_complexity: 45
13
+
14
+ min_snr_db: -10
15
+ max_snr_db: 20
16
+
17
+ num_workers: 8
18
+ batch_size: 32
19
+ eval_steps: 10000
20
+
21
+ lr: 0.001
22
+ lr_scheduler: "CosineAnnealingLR"
23
+ lr_scheduler_kwargs:
24
+ T_max: 250000
25
+ eta_min: 0.0001
26
+
27
+ max_epochs: 100
28
+ weight_decay: 1.0e-05
29
+ clip_grad_norm: 10.0
30
+ seed: 1234
31
+ num_gpus: -1
toolbox/torchaudio/models/tcnn/modeling_tcnn.py CHANGED
@@ -233,12 +233,13 @@ class TCNN(nn.Module):
233
  if remainder > 0:
234
  n_samples_pad = self.hop_size - remainder
235
  signal = F.pad(signal, pad=(0, n_samples_pad), mode="constant", value=0)
236
- return signal, n_samples
237
 
238
  def forward(self,
239
  noisy: torch.Tensor,
240
  ):
241
- noisy, num_samples = self.signal_prepare(noisy)
 
242
  batch_size, _, num_samples_pad = noisy.shape
243
 
244
  # n_frame = (num_samples_pad - self.win_size) / self.hop_size + 1
@@ -268,6 +269,8 @@ class TCNN(nn.Module):
268
 
269
  denoise = denoise[:, :num_samples]
270
  # denoise shape: [b, num_samples]
 
 
271
  return denoise
272
 
273
  def forward_chunk(self, inputs: torch.Tensor):
@@ -332,6 +335,7 @@ class TCNN(nn.Module):
332
 
333
  def main():
334
  model = TCNN()
 
335
 
336
  x = torch.randn(64, 1, 5, 320)
337
  # x = torch.randn(64, 1, 5, 160)
 
233
  if remainder > 0:
234
  n_samples_pad = self.hop_size - remainder
235
  signal = F.pad(signal, pad=(0, n_samples_pad), mode="constant", value=0)
236
+ return signal
237
 
238
  def forward(self,
239
  noisy: torch.Tensor,
240
  ):
241
+ num_samples = noisy.shape[-1]
242
+ noisy = self.signal_prepare(noisy)
243
  batch_size, _, num_samples_pad = noisy.shape
244
 
245
  # n_frame = (num_samples_pad - self.win_size) / self.hop_size + 1
 
269
 
270
  denoise = denoise[:, :num_samples]
271
  # denoise shape: [b, num_samples]
272
+ denoise = torch.unsqueeze(denoise, dim=1)
273
+ # denoise shape: [b, 1, num_samples]
274
  return denoise
275
 
276
  def forward_chunk(self, inputs: torch.Tensor):
 
335
 
336
  def main():
337
  model = TCNN()
338
+ model.eval()
339
 
340
  x = torch.randn(64, 1, 5, 320)
341
  # x = torch.randn(64, 1, 5, 160)