HoneyTian commited on
Commit
05cf4cb
·
1 Parent(s): 87129e4
README.md CHANGED
@@ -24,6 +24,25 @@ http://www.openslr.org/93/
24
 
25
  DNS3
26
  https://github.com/microsoft/DNS-Challenge/blob/master/download-dns-challenge-3.sh
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ```
29
 
 
24
 
25
  DNS3
26
  https://github.com/microsoft/DNS-Challenge/blob/master/download-dns-challenge-3.sh
27
+ 噪音数据来源于 DEMAND, FreeSound, AudioSet.
28
 
29
+ MS-SNSD
30
+ https://github.com/microsoft/MS-SNSD
31
+ 噪音数据来源于 DEMAND, FreeSound.
32
+
33
+ MUSAN
34
+ https://www.openslr.org/17/
35
+ 其中包含 music, noise, speech.
36
+ music 是一些纯音乐, noise 包含 free-sound, sound-bible, sound-bible部分也许可以做为补充部分.
37
+ 总的来说, 有用的不部不多, 可能噪音数据仍然需要自己收集为主, 更加可靠.
38
+
39
+ CHiME-4
40
+ https://www.chimechallenge.org/challenges/chime4/download.html
41
+
42
+ freesound
43
+ https://freesound.org/
44
+
45
+ AudioSet
46
+ https://research.google.com/audioset/index.html
47
  ```
48
 
examples/data_preprocess/ms_snsd_to_8k/process_ms_snsd.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ MS-SNSD
5
+ https://github.com/microsoft/MS-SNSD
6
+ """
7
+ import argparse
8
+ import os
9
+ from pathlib import Path
10
+ import sys
11
+
12
+ import numpy as np
13
+ from tqdm import tqdm
14
+
15
+ pwd = os.path.abspath(os.path.dirname(__file__))
16
+ sys.path.append(os.path.join(pwd, "../../"))
17
+
18
+ import librosa
19
+ from scipy.io import wavfile
20
+
21
+
22
+ def get_args():
23
+ parser = argparse.ArgumentParser()
24
+
25
+ parser.add_argument(
26
+ "--data_dir",
27
+ default=r"E:\programmer\asr_datasets\MS-SNSD",
28
+ type=str
29
+ )
30
+ parser.add_argument(
31
+ "--output_dir",
32
+ default=r"E:\programmer\asr_datasets\denoise\ms-snsd-noise-8k",
33
+ type=str
34
+ )
35
+ parser.add_argument("--sample_rate", default=8000, type=int)
36
+ args = parser.parse_args()
37
+ return args
38
+
39
+
40
+ def main():
41
+ args = get_args()
42
+
43
+ data_dir = Path(args.data_dir)
44
+ output_dir = Path(args.output_dir)
45
+ output_dir.mkdir(parents=True, exist_ok=True)
46
+
47
+ for filename in tqdm(data_dir.glob("**/*.wav")):
48
+ label = filename.parts[-2]
49
+ name = filename.stem
50
+
51
+ if label not in ["noise_train", "noise_test", "clean_train", "clean_test"]:
52
+ continue
53
+
54
+ signal, _ = librosa.load(filename.as_posix(), sr=args.sample_rate)
55
+
56
+ signal = signal * (1 << 15)
57
+ signal = np.array(signal, dtype=np.int16)
58
+
59
+ to_file = output_dir / f"{label}/{name}.wav"
60
+ to_file.parent.mkdir(parents=True, exist_ok=True)
61
+ wavfile.write(
62
+ to_file.as_posix(),
63
+ rate=args.sample_rate,
64
+ data=signal,
65
+ )
66
+ return
67
+
68
+
69
+ if __name__ == "__main__":
70
+ main()
examples/dtln/run.sh CHANGED
@@ -20,10 +20,10 @@ sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name dt
20
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2"
21
 
22
 
23
- sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-1024 --final_model_name dtln-1024-nx-devoice \
24
- --config_file "yaml/config-1024.yaml" \
25
- --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2" \
26
- --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/nx-noise"
27
 
28
 
29
  END
 
20
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2"
21
 
22
 
23
+ sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name dtnl-256-nx2 --final_model_name dtln-256-nx2 \
24
+ --config_file "yaml/config-256.yaml" \
25
+ --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
26
+ --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech"
27
 
28
 
29
  END
examples/dtln/yaml/config-256.yaml CHANGED
@@ -8,7 +8,7 @@ win_type: hann
8
 
9
  # data
10
  max_snr_db: 20
11
- min_snr_db: -10
12
 
13
  # model
14
  encoder_size: 256
 
8
 
9
  # data
10
  max_snr_db: 20
11
+ min_snr_db: -20
12
 
13
  # model
14
  encoder_size: 256
toolbox/torchaudio/models/dtln/modeling_dtln.py CHANGED
@@ -8,8 +8,12 @@ https://github.com/AkenoSyuRi/DTLNPytorch
8
  https://github.com/breizhn/DTLN
9
 
10
  数据集: DNS3 DNS-Challenge
11
- 信噪比 -5 25 dB
12
- 5 30 dB
 
 
 
 
13
  窗长 32ms, 窗移 8ms
14
 
15
  在 dns3 500个小时的数据上训练, 在 dns3 的测试集上达到了 pesq 3.04 的水平。
 
8
  https://github.com/breizhn/DTLN
9
 
10
  数据集: DNS3 DNS-Challenge
11
+ 信噪比从 DNS3 的 [0, 40] dB 调整为 [-5, 25] dB
12
+ 信噪比级别从 5 个改到 30 个。 即:
13
+ [0dB, 10dB, 20dB, 30dB, 40dB]
14
+ 改到:
15
+ [-5db, -4dB, -3dB, ..., 22dB, 23dB, 24dB, 25dB]
16
+
17
  窗长 32ms, 窗移 8ms
18
 
19
  在 dns3 500个小时的数据上训练, 在 dns3 的测试集上达到了 pesq 3.04 的水平。