HoneyTian commited on
Commit
b4e8cd2
·
1 Parent(s): 7335f6f
examples/data_preprocess/nx_speech_denoise/nx_speech_denoise.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import os
5
+ from pathlib import Path
6
+ import sys
7
+
8
+ from gradio_client import Client, handle_file
9
+ import numpy as np
10
+ from tqdm import tqdm
11
+ import shutil
12
+
13
+ pwd = os.path.abspath(os.path.dirname(__file__))
14
+ sys.path.append(os.path.join(pwd, "../../"))
15
+
16
+ import librosa
17
+ from scipy.io import wavfile
18
+
19
+
20
+ def get_args():
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument(
23
+ "--src_dir",
24
+ default=r"E:\Users\tianx\HuggingDatasets\nx_noise\data\speech\en-PH",
25
+ # default=r"/data/tianxing/HuggingDatasets/nx_noise/data/speech/en-PH",
26
+ type=str
27
+ )
28
+ parser.add_argument(
29
+ "--tgt_dir",
30
+ default=r"E:\Users\tianx\HuggingDatasets\nx_noise\data\speech-denoise\en-PH",
31
+ # default=r"/data/tianxing/HuggingDatasets/nx_noise/data/speech-denoise/en-PH",
32
+ type=str
33
+ )
34
+ args = parser.parse_args()
35
+ return args
36
+
37
+
38
+ def main():
39
+ args = get_args()
40
+
41
+ # client = Client(src="http://10.75.27.247:7865/")
42
+ client = Client(src="http://127.0.0.1:7865/")
43
+
44
+ src_dir = Path(args.src_dir)
45
+ tgt_dir = Path(args.tgt_dir)
46
+ tgt_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ tgt_date_list = list(sorted([date.name for date in src_dir.glob("*") if not date.name.endswith(".zip")]))
49
+ finished_date_set = set(tgt_date_list[:-1])
50
+ current_date = tgt_date_list[-1]
51
+
52
+ print(f"finished_date_set: {finished_date_set}")
53
+ print(f"current_date: {current_date}")
54
+
55
+ finished_set = set()
56
+ for filename in (tgt_dir / current_date).glob("*.wav"):
57
+ name = filename.name
58
+ finished_set.add(name)
59
+
60
+ src_date_list = list(sorted([date.name for date in src_dir.glob("*")]))
61
+ for date in src_date_list:
62
+ if date in finished_date_set:
63
+ continue
64
+ for filename in (src_dir / current_date).glob("**/*.wav"):
65
+ result = client.predict(
66
+ noisy_audio_file_t=handle_file(filename.as_posix()),
67
+ noisy_audio_microphone_t=None,
68
+ engine="frcrn-dns3",
69
+ api_name="/when_click_denoise_button"
70
+ )
71
+ denoise_file = result[0]
72
+ tgt_file = tgt_dir / current_date / f"{filename.name}"
73
+ tgt_file.parent.mkdir(parents=True, exist_ok=True)
74
+
75
+ shutil.move(denoise_file, tgt_file)
76
+ print(denoise_file)
77
+ exit(0)
78
+
79
+ return
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()
examples/frcrn/run.sh CHANGED
@@ -3,10 +3,10 @@
3
  : <<'END'
4
 
5
 
6
- sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name frcrn-20-512-nx-dns3 \
7
  --config_file "yaml/config-10.yaml" \
8
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
9
- --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
10
 
11
 
12
  END
 
3
  : <<'END'
4
 
5
 
6
+ sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name frcrn-20-512-nx-dns3 \
7
  --config_file "yaml/config-10.yaml" \
8
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
9
+ --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech"
10
 
11
 
12
  END
examples/frcrn/step_1_prepare_data.py CHANGED
@@ -33,13 +33,13 @@ def get_args():
33
  parser.add_argument("--train_dataset", default="train.jsonl", type=str)
34
  parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
35
 
36
- parser.add_argument("--duration", default=4.0, type=float)
37
  parser.add_argument("--min_snr_db", default=-10, type=float)
38
  parser.add_argument("--max_snr_db", default=20, type=float)
39
 
40
  parser.add_argument("--target_sample_rate", default=8000, type=int)
41
 
42
- parser.add_argument("--scale", default=1, type=float)
43
 
44
  args = parser.parse_args()
45
  return args
@@ -107,9 +107,8 @@ def main():
107
  process_bar = tqdm(desc="build dataset jsonl")
108
  with open(args.train_dataset, "w", encoding="utf-8") as ftrain, open(args.valid_dataset, "w", encoding="utf-8") as fvalid:
109
  for noise, speech in zip(noise_generator, speech_generator):
110
- flag = random.random()
111
- if flag > args.scale:
112
- continue
113
 
114
  noise_filename = noise["filename"]
115
  noise_raw_duration = noise["raw_duration"]
 
33
  parser.add_argument("--train_dataset", default="train.jsonl", type=str)
34
  parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
35
 
36
+ parser.add_argument("--duration", default=2.0, type=float)
37
  parser.add_argument("--min_snr_db", default=-10, type=float)
38
  parser.add_argument("--max_snr_db", default=20, type=float)
39
 
40
  parser.add_argument("--target_sample_rate", default=8000, type=int)
41
 
42
+ parser.add_argument("--max_count", default=-1, type=int)
43
 
44
  args = parser.parse_args()
45
  return args
 
107
  process_bar = tqdm(desc="build dataset jsonl")
108
  with open(args.train_dataset, "w", encoding="utf-8") as ftrain, open(args.valid_dataset, "w", encoding="utf-8") as fvalid:
109
  for noise, speech in zip(noise_generator, speech_generator):
110
+ if count >= args.max_count > 0:
111
+ break
 
112
 
113
  noise_filename = noise["filename"]
114
  noise_raw_duration = noise["raw_duration"]
requirements-python-3-9-9.txt CHANGED
@@ -1,4 +1,5 @@
1
- gradio==4.44.1
 
2
  datasets==3.2.0
3
  python-dotenv==1.0.1
4
  scipy==1.13.1
 
1
+ gradio
2
+ gradio_client
3
  datasets==3.2.0
4
  python-dotenv==1.0.1
5
  scipy==1.13.1
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
- gradio
 
2
  datasets==3.2.0
3
  python-dotenv==1.0.1
4
  scipy==1.15.1
 
1
+ gradio==5.33.0
2
+ gradio_client==1.10.2
3
  datasets==3.2.0
4
  python-dotenv==1.0.1
5
  scipy==1.15.1
toolbox/torchaudio/models/frcrn/modeling_frcrn.py CHANGED
@@ -298,16 +298,16 @@ class FRCRNPretrainedModel(FRCRN):
298
 
299
 
300
  def main():
301
- # model = FRCRN(
302
- # use_complex_networks=True,
303
- # model_complexity=-1,
304
- # model_depth=10,
305
- # padding_mode="zeros",
306
- # nfft=128,
307
- # win_size=128,
308
- # hop_size=64,
309
- # win_type="hann",
310
- # )
311
 
312
  # model = FRCRN(
313
  # use_complex_networks=True,
@@ -320,16 +320,16 @@ def main():
320
  # win_type="hann",
321
  # )
322
 
323
- model = FRCRN(
324
- use_complex_networks=True,
325
- model_complexity=20,
326
- model_depth=20,
327
- padding_mode="zeros",
328
- nfft=512,
329
- win_size=512,
330
- hop_size=256,
331
- win_type="hann",
332
- )
333
 
334
  mixture = torch.rand(size=(1, 32000), dtype=torch.float32)
335
 
 
298
 
299
 
300
  def main():
301
+ model = FRCRN(
302
+ use_complex_networks=True,
303
+ model_complexity=-1,
304
+ model_depth=10,
305
+ padding_mode="zeros",
306
+ nfft=128,
307
+ win_size=128,
308
+ hop_size=64,
309
+ win_type="hann",
310
+ )
311
 
312
  # model = FRCRN(
313
  # use_complex_networks=True,
 
320
  # win_type="hann",
321
  # )
322
 
323
+ # model = FRCRN(
324
+ # use_complex_networks=True,
325
+ # model_complexity=45,
326
+ # model_depth=20,
327
+ # padding_mode="zeros",
328
+ # nfft=512,
329
+ # win_size=512,
330
+ # hop_size=256,
331
+ # win_type="hann",
332
+ # )
333
 
334
  mixture = torch.rand(size=(1, 32000), dtype=torch.float32)
335