Spaces:

qgyd2021
/

nx_denoise

Running

App Files Files Community

HoneyTian commited on 3 days ago

Commit

05cf4cb

1 Parent(s): 87129e4

update

Browse files

Files changed (5) hide show

README.md +19 -0
examples/data_preprocess/ms_snsd_to_8k/process_ms_snsd.py +70 -0
examples/dtln/run.sh +4 -4
examples/dtln/yaml/config-256.yaml +1 -1
toolbox/torchaudio/models/dtln/modeling_dtln.py +6 -2

README.md CHANGED Viewed

@@ -24,6 +24,25 @@ http://www.openslr.org/93/
 DNS3
 https://github.com/microsoft/DNS-Challenge/blob/master/download-dns-challenge-3.sh
 ```

 DNS3
 https://github.com/microsoft/DNS-Challenge/blob/master/download-dns-challenge-3.sh
+噪音数据来源于 DEMAND, FreeSound, AudioSet.
+MS-SNSD
+https://github.com/microsoft/MS-SNSD
+噪音数据来源于 DEMAND, FreeSound.
+MUSAN
+https://www.openslr.org/17/
+其中包含 music, noise, speech.
+music 是一些纯音乐, noise 包含 free-sound, sound-bible, sound-bible部分也许可以做为补充部分.
+总的来说, 有用的不部不多, 可能噪音数据仍然需要自己收集为主, 更加可靠.
+CHiME-4
+https://www.chimechallenge.org/challenges/chime4/download.html
+freesound
+https://freesound.org/
+AudioSet
+https://research.google.com/audioset/index.html
 ```

examples/data_preprocess/ms_snsd_to_8k/process_ms_snsd.py ADDED Viewed

	@@ -0,0 +1,70 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+"""
+MS-SNSD
+https://github.com/microsoft/MS-SNSD
+"""
+import argparse
+import os
+from pathlib import Path
+import sys
+import numpy as np
+from tqdm import tqdm
+pwd = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.join(pwd, "../../"))
+import librosa
+from scipy.io import wavfile
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--data_dir",
+        default=r"E:\programmer\asr_datasets\MS-SNSD",
+        type=str
+    )
+    parser.add_argument(
+        "--output_dir",
+        default=r"E:\programmer\asr_datasets\denoise\ms-snsd-noise-8k",
+        type=str
+    )
+    parser.add_argument("--sample_rate", default=8000, type=int)
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    data_dir = Path(args.data_dir)
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    for filename in tqdm(data_dir.glob("**/*.wav")):
+        label = filename.parts[-2]
+        name = filename.stem
+        if label not in ["noise_train", "noise_test", "clean_train", "clean_test"]:
+            continue
+        signal, _ = librosa.load(filename.as_posix(), sr=args.sample_rate)
+        signal = signal * (1 << 15)
+        signal = np.array(signal, dtype=np.int16)
+        to_file = output_dir / f"{label}/{name}.wav"
+        to_file.parent.mkdir(parents=True, exist_ok=True)
+        wavfile.write(
+            to_file.as_posix(),
+            rate=args.sample_rate,
+            data=signal,
+        )
+    return
+if __name__ == "__main__":
+    main()

examples/dtln/run.sh CHANGED Viewed

@@ -20,10 +20,10 @@ sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name dt
 --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2"
-sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-1024 --final_model_name dtln-1024-nx-devoice \
---config_file "yaml/config-1024.yaml" \
---noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2" \
---speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/nx-noise"
 END

 --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2"
+sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name dtnl-256-nx2 --final_model_name dtln-256-nx2 \
+--config_file "yaml/config-256.yaml" \
+--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
+--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech"
 END

examples/dtln/yaml/config-256.yaml CHANGED Viewed

@@ -8,7 +8,7 @@ win_type: hann
 # data
 max_snr_db: 20
-min_snr_db: -10
 # model
 encoder_size: 256

 # data
 max_snr_db: 20
+min_snr_db: -20
 # model
 encoder_size: 256

toolbox/torchaudio/models/dtln/modeling_dtln.py CHANGED Viewed

@@ -8,8 +8,12 @@ https://github.com/AkenoSyuRi/DTLNPytorch
 https://github.com/breizhn/DTLN
 数据集： DNS3 DNS-Challenge
-信噪比 -5 到 25 dB
-5 到 30 dB
 窗长 32ms, 窗移 8ms
 在 dns3 500个小时的数据上训练, 在 dns3 的测试集上达到了 pesq 3.04 的水平。

 https://github.com/breizhn/DTLN
 数据集： DNS3 DNS-Challenge
+信噪比从 DNS3 的 [0, 40] dB 调整为 [-5, 25] dB
+信噪比级别从 5 个改到 30 个。 即：
+[0dB, 10dB, 20dB, 30dB, 40dB]
+改到：
+[-5db, -4dB, -3dB, ..., 22dB, 23dB, 24dB, 25dB]
 窗长 32ms, 窗移 8ms
 在 dns3 500个小时的数据上训练, 在 dns3 的测试集上达到了 pesq 3.04 的水平。