Spaces:
Running
Running
update
Browse files
README.md
CHANGED
@@ -24,6 +24,25 @@ http://www.openslr.org/93/
|
|
24 |
|
25 |
DNS3
|
26 |
https://github.com/microsoft/DNS-Challenge/blob/master/download-dns-challenge-3.sh
|
|
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
```
|
29 |
|
|
|
24 |
|
25 |
DNS3
|
26 |
https://github.com/microsoft/DNS-Challenge/blob/master/download-dns-challenge-3.sh
|
27 |
+
噪音数据来源于 DEMAND, FreeSound, AudioSet.
|
28 |
|
29 |
+
MS-SNSD
|
30 |
+
https://github.com/microsoft/MS-SNSD
|
31 |
+
噪音数据来源于 DEMAND, FreeSound.
|
32 |
+
|
33 |
+
MUSAN
|
34 |
+
https://www.openslr.org/17/
|
35 |
+
其中包含 music, noise, speech.
|
36 |
+
music 是一些纯音乐, noise 包含 free-sound, sound-bible, sound-bible部分也许可以做为补充部分.
|
37 |
+
总的来说, 有用的不部不多, 可能噪音数据仍然需要自己收集为主, 更加可靠.
|
38 |
+
|
39 |
+
CHiME-4
|
40 |
+
https://www.chimechallenge.org/challenges/chime4/download.html
|
41 |
+
|
42 |
+
freesound
|
43 |
+
https://freesound.org/
|
44 |
+
|
45 |
+
AudioSet
|
46 |
+
https://research.google.com/audioset/index.html
|
47 |
```
|
48 |
|
examples/data_preprocess/ms_snsd_to_8k/process_ms_snsd.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
MS-SNSD
|
5 |
+
https://github.com/microsoft/MS-SNSD
|
6 |
+
"""
|
7 |
+
import argparse
|
8 |
+
import os
|
9 |
+
from pathlib import Path
|
10 |
+
import sys
|
11 |
+
|
12 |
+
import numpy as np
|
13 |
+
from tqdm import tqdm
|
14 |
+
|
15 |
+
pwd = os.path.abspath(os.path.dirname(__file__))
|
16 |
+
sys.path.append(os.path.join(pwd, "../../"))
|
17 |
+
|
18 |
+
import librosa
|
19 |
+
from scipy.io import wavfile
|
20 |
+
|
21 |
+
|
22 |
+
def get_args():
|
23 |
+
parser = argparse.ArgumentParser()
|
24 |
+
|
25 |
+
parser.add_argument(
|
26 |
+
"--data_dir",
|
27 |
+
default=r"E:\programmer\asr_datasets\MS-SNSD",
|
28 |
+
type=str
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--output_dir",
|
32 |
+
default=r"E:\programmer\asr_datasets\denoise\ms-snsd-noise-8k",
|
33 |
+
type=str
|
34 |
+
)
|
35 |
+
parser.add_argument("--sample_rate", default=8000, type=int)
|
36 |
+
args = parser.parse_args()
|
37 |
+
return args
|
38 |
+
|
39 |
+
|
40 |
+
def main():
|
41 |
+
args = get_args()
|
42 |
+
|
43 |
+
data_dir = Path(args.data_dir)
|
44 |
+
output_dir = Path(args.output_dir)
|
45 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
46 |
+
|
47 |
+
for filename in tqdm(data_dir.glob("**/*.wav")):
|
48 |
+
label = filename.parts[-2]
|
49 |
+
name = filename.stem
|
50 |
+
|
51 |
+
if label not in ["noise_train", "noise_test", "clean_train", "clean_test"]:
|
52 |
+
continue
|
53 |
+
|
54 |
+
signal, _ = librosa.load(filename.as_posix(), sr=args.sample_rate)
|
55 |
+
|
56 |
+
signal = signal * (1 << 15)
|
57 |
+
signal = np.array(signal, dtype=np.int16)
|
58 |
+
|
59 |
+
to_file = output_dir / f"{label}/{name}.wav"
|
60 |
+
to_file.parent.mkdir(parents=True, exist_ok=True)
|
61 |
+
wavfile.write(
|
62 |
+
to_file.as_posix(),
|
63 |
+
rate=args.sample_rate,
|
64 |
+
data=signal,
|
65 |
+
)
|
66 |
+
return
|
67 |
+
|
68 |
+
|
69 |
+
if __name__ == "__main__":
|
70 |
+
main()
|
examples/dtln/run.sh
CHANGED
@@ -20,10 +20,10 @@ sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name dt
|
|
20 |
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2"
|
21 |
|
22 |
|
23 |
-
sh run.sh --stage
|
24 |
-
--config_file "yaml/config-
|
25 |
-
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/
|
26 |
-
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/
|
27 |
|
28 |
|
29 |
END
|
|
|
20 |
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2"
|
21 |
|
22 |
|
23 |
+
sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name dtnl-256-nx2 --final_model_name dtln-256-nx2 \
|
24 |
+
--config_file "yaml/config-256.yaml" \
|
25 |
+
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
26 |
+
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech"
|
27 |
|
28 |
|
29 |
END
|
examples/dtln/yaml/config-256.yaml
CHANGED
@@ -8,7 +8,7 @@ win_type: hann
|
|
8 |
|
9 |
# data
|
10 |
max_snr_db: 20
|
11 |
-
min_snr_db: -
|
12 |
|
13 |
# model
|
14 |
encoder_size: 256
|
|
|
8 |
|
9 |
# data
|
10 |
max_snr_db: 20
|
11 |
+
min_snr_db: -20
|
12 |
|
13 |
# model
|
14 |
encoder_size: 256
|
toolbox/torchaudio/models/dtln/modeling_dtln.py
CHANGED
@@ -8,8 +8,12 @@ https://github.com/AkenoSyuRi/DTLNPytorch
|
|
8 |
https://github.com/breizhn/DTLN
|
9 |
|
10 |
数据集: DNS3 DNS-Challenge
|
11 |
-
|
12 |
-
5
|
|
|
|
|
|
|
|
|
13 |
窗长 32ms, 窗移 8ms
|
14 |
|
15 |
在 dns3 500个小时的数据上训练, 在 dns3 的测试集上达到了 pesq 3.04 的水平。
|
|
|
8 |
https://github.com/breizhn/DTLN
|
9 |
|
10 |
数据集: DNS3 DNS-Challenge
|
11 |
+
信噪比从 DNS3 的 [0, 40] dB 调整为 [-5, 25] dB
|
12 |
+
信噪比级别从 5 个改到 30 个。 即:
|
13 |
+
[0dB, 10dB, 20dB, 30dB, 40dB]
|
14 |
+
改到:
|
15 |
+
[-5db, -4dB, -3dB, ..., 22dB, 23dB, 24dB, 25dB]
|
16 |
+
|
17 |
窗长 32ms, 窗移 8ms
|
18 |
|
19 |
在 dns3 500个小时的数据上训练, 在 dns3 的测试集上达到了 pesq 3.04 的水平。
|