Spaces:
Running
Running
update
Browse files
examples/data_preprocess/nx_speech_denoise/nx_speech_denoise.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
import os
|
5 |
+
from pathlib import Path
|
6 |
+
import sys
|
7 |
+
|
8 |
+
from gradio_client import Client, handle_file
|
9 |
+
import numpy as np
|
10 |
+
from tqdm import tqdm
|
11 |
+
import shutil
|
12 |
+
|
13 |
+
pwd = os.path.abspath(os.path.dirname(__file__))
|
14 |
+
sys.path.append(os.path.join(pwd, "../../"))
|
15 |
+
|
16 |
+
import librosa
|
17 |
+
from scipy.io import wavfile
|
18 |
+
|
19 |
+
|
20 |
+
def get_args():
|
21 |
+
parser = argparse.ArgumentParser()
|
22 |
+
parser.add_argument(
|
23 |
+
"--src_dir",
|
24 |
+
default=r"E:\Users\tianx\HuggingDatasets\nx_noise\data\speech\en-PH",
|
25 |
+
# default=r"/data/tianxing/HuggingDatasets/nx_noise/data/speech/en-PH",
|
26 |
+
type=str
|
27 |
+
)
|
28 |
+
parser.add_argument(
|
29 |
+
"--tgt_dir",
|
30 |
+
default=r"E:\Users\tianx\HuggingDatasets\nx_noise\data\speech-denoise\en-PH",
|
31 |
+
# default=r"/data/tianxing/HuggingDatasets/nx_noise/data/speech-denoise/en-PH",
|
32 |
+
type=str
|
33 |
+
)
|
34 |
+
args = parser.parse_args()
|
35 |
+
return args
|
36 |
+
|
37 |
+
|
38 |
+
def main():
|
39 |
+
args = get_args()
|
40 |
+
|
41 |
+
# client = Client(src="http://10.75.27.247:7865/")
|
42 |
+
client = Client(src="http://127.0.0.1:7865/")
|
43 |
+
|
44 |
+
src_dir = Path(args.src_dir)
|
45 |
+
tgt_dir = Path(args.tgt_dir)
|
46 |
+
tgt_dir.mkdir(parents=True, exist_ok=True)
|
47 |
+
|
48 |
+
tgt_date_list = list(sorted([date.name for date in src_dir.glob("*") if not date.name.endswith(".zip")]))
|
49 |
+
finished_date_set = set(tgt_date_list[:-1])
|
50 |
+
current_date = tgt_date_list[-1]
|
51 |
+
|
52 |
+
print(f"finished_date_set: {finished_date_set}")
|
53 |
+
print(f"current_date: {current_date}")
|
54 |
+
|
55 |
+
finished_set = set()
|
56 |
+
for filename in (tgt_dir / current_date).glob("*.wav"):
|
57 |
+
name = filename.name
|
58 |
+
finished_set.add(name)
|
59 |
+
|
60 |
+
src_date_list = list(sorted([date.name for date in src_dir.glob("*")]))
|
61 |
+
for date in src_date_list:
|
62 |
+
if date in finished_date_set:
|
63 |
+
continue
|
64 |
+
for filename in (src_dir / current_date).glob("**/*.wav"):
|
65 |
+
result = client.predict(
|
66 |
+
noisy_audio_file_t=handle_file(filename.as_posix()),
|
67 |
+
noisy_audio_microphone_t=None,
|
68 |
+
engine="frcrn-dns3",
|
69 |
+
api_name="/when_click_denoise_button"
|
70 |
+
)
|
71 |
+
denoise_file = result[0]
|
72 |
+
tgt_file = tgt_dir / current_date / f"{filename.name}"
|
73 |
+
tgt_file.parent.mkdir(parents=True, exist_ok=True)
|
74 |
+
|
75 |
+
shutil.move(denoise_file, tgt_file)
|
76 |
+
print(denoise_file)
|
77 |
+
exit(0)
|
78 |
+
|
79 |
+
return
|
80 |
+
|
81 |
+
|
82 |
+
if __name__ == "__main__":
|
83 |
+
main()
|
examples/frcrn/run.sh
CHANGED
@@ -3,10 +3,10 @@
|
|
3 |
: <<'END'
|
4 |
|
5 |
|
6 |
-
sh run.sh --stage
|
7 |
--config_file "yaml/config-10.yaml" \
|
8 |
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
9 |
-
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech
|
10 |
|
11 |
|
12 |
END
|
|
|
3 |
: <<'END'
|
4 |
|
5 |
|
6 |
+
sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name frcrn-20-512-nx-dns3 \
|
7 |
--config_file "yaml/config-10.yaml" \
|
8 |
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
9 |
+
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech"
|
10 |
|
11 |
|
12 |
END
|
examples/frcrn/step_1_prepare_data.py
CHANGED
@@ -33,13 +33,13 @@ def get_args():
|
|
33 |
parser.add_argument("--train_dataset", default="train.jsonl", type=str)
|
34 |
parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
|
35 |
|
36 |
-
parser.add_argument("--duration", default=
|
37 |
parser.add_argument("--min_snr_db", default=-10, type=float)
|
38 |
parser.add_argument("--max_snr_db", default=20, type=float)
|
39 |
|
40 |
parser.add_argument("--target_sample_rate", default=8000, type=int)
|
41 |
|
42 |
-
parser.add_argument("--
|
43 |
|
44 |
args = parser.parse_args()
|
45 |
return args
|
@@ -107,9 +107,8 @@ def main():
|
|
107 |
process_bar = tqdm(desc="build dataset jsonl")
|
108 |
with open(args.train_dataset, "w", encoding="utf-8") as ftrain, open(args.valid_dataset, "w", encoding="utf-8") as fvalid:
|
109 |
for noise, speech in zip(noise_generator, speech_generator):
|
110 |
-
|
111 |
-
|
112 |
-
continue
|
113 |
|
114 |
noise_filename = noise["filename"]
|
115 |
noise_raw_duration = noise["raw_duration"]
|
|
|
33 |
parser.add_argument("--train_dataset", default="train.jsonl", type=str)
|
34 |
parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
|
35 |
|
36 |
+
parser.add_argument("--duration", default=2.0, type=float)
|
37 |
parser.add_argument("--min_snr_db", default=-10, type=float)
|
38 |
parser.add_argument("--max_snr_db", default=20, type=float)
|
39 |
|
40 |
parser.add_argument("--target_sample_rate", default=8000, type=int)
|
41 |
|
42 |
+
parser.add_argument("--max_count", default=-1, type=int)
|
43 |
|
44 |
args = parser.parse_args()
|
45 |
return args
|
|
|
107 |
process_bar = tqdm(desc="build dataset jsonl")
|
108 |
with open(args.train_dataset, "w", encoding="utf-8") as ftrain, open(args.valid_dataset, "w", encoding="utf-8") as fvalid:
|
109 |
for noise, speech in zip(noise_generator, speech_generator):
|
110 |
+
if count >= args.max_count > 0:
|
111 |
+
break
|
|
|
112 |
|
113 |
noise_filename = noise["filename"]
|
114 |
noise_raw_duration = noise["raw_duration"]
|
requirements-python-3-9-9.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
gradio
|
|
|
2 |
datasets==3.2.0
|
3 |
python-dotenv==1.0.1
|
4 |
scipy==1.13.1
|
|
|
1 |
+
gradio
|
2 |
+
gradio_client
|
3 |
datasets==3.2.0
|
4 |
python-dotenv==1.0.1
|
5 |
scipy==1.13.1
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
gradio
|
|
|
2 |
datasets==3.2.0
|
3 |
python-dotenv==1.0.1
|
4 |
scipy==1.15.1
|
|
|
1 |
+
gradio==5.33.0
|
2 |
+
gradio_client==1.10.2
|
3 |
datasets==3.2.0
|
4 |
python-dotenv==1.0.1
|
5 |
scipy==1.15.1
|
toolbox/torchaudio/models/frcrn/modeling_frcrn.py
CHANGED
@@ -298,16 +298,16 @@ class FRCRNPretrainedModel(FRCRN):
|
|
298 |
|
299 |
|
300 |
def main():
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
|
312 |
# model = FRCRN(
|
313 |
# use_complex_networks=True,
|
@@ -320,16 +320,16 @@ def main():
|
|
320 |
# win_type="hann",
|
321 |
# )
|
322 |
|
323 |
-
model = FRCRN(
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
)
|
333 |
|
334 |
mixture = torch.rand(size=(1, 32000), dtype=torch.float32)
|
335 |
|
|
|
298 |
|
299 |
|
300 |
def main():
|
301 |
+
model = FRCRN(
|
302 |
+
use_complex_networks=True,
|
303 |
+
model_complexity=-1,
|
304 |
+
model_depth=10,
|
305 |
+
padding_mode="zeros",
|
306 |
+
nfft=128,
|
307 |
+
win_size=128,
|
308 |
+
hop_size=64,
|
309 |
+
win_type="hann",
|
310 |
+
)
|
311 |
|
312 |
# model = FRCRN(
|
313 |
# use_complex_networks=True,
|
|
|
320 |
# win_type="hann",
|
321 |
# )
|
322 |
|
323 |
+
# model = FRCRN(
|
324 |
+
# use_complex_networks=True,
|
325 |
+
# model_complexity=45,
|
326 |
+
# model_depth=20,
|
327 |
+
# padding_mode="zeros",
|
328 |
+
# nfft=512,
|
329 |
+
# win_size=512,
|
330 |
+
# hop_size=256,
|
331 |
+
# win_type="hann",
|
332 |
+
# )
|
333 |
|
334 |
mixture = torch.rand(size=(1, 32000), dtype=torch.float32)
|
335 |
|