HoneyTian commited on
Commit
115aca3
·
1 Parent(s): da40843
examples/dtln/run.sh CHANGED
@@ -2,18 +2,14 @@
2
 
3
  : <<'END'
4
 
5
- sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name dfnet-nx-speech \
6
- --noise_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/noise" \
7
- --speech_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/speech"
8
-
9
  sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-256 --final_model_name dtln-256-nx-dns3 \
10
  --config_file "yaml/config-256.yaml" \
11
- --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
12
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
13
 
14
- sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-512 --final_model_name dtln-512-nx-dns3 \
15
  --config_file "yaml/config-512.yaml" \
16
- --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/dns3-noise" \
17
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
18
 
19
 
 
2
 
3
  : <<'END'
4
 
 
 
 
 
5
  sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-256 --final_model_name dtln-256-nx-dns3 \
6
  --config_file "yaml/config-256.yaml" \
7
+ --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
8
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
9
 
10
+ sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name file_dir-512 --final_model_name dtln-512-nx-dns3 \
11
  --config_file "yaml/config-512.yaml" \
12
+ --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
13
  --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
14
 
15
 
main.py CHANGED
@@ -7,12 +7,16 @@ import logging
7
  from pathlib import Path
8
  import platform
9
  import shutil
 
 
10
  from typing import Tuple
11
  import zipfile
12
- import time
13
 
14
  import gradio as gr
15
  from huggingface_hub import snapshot_download
 
 
 
16
  import numpy as np
17
 
18
  import log
@@ -108,6 +112,23 @@ def load_denoise_model(infer_cls, **kwargs):
108
  return infer_engine
109
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_t = None, engine: str = None):
112
  if noisy_audio_file_t is None and noisy_audio_microphone_t is None:
113
  raise gr.Error(f"audio file and microphone is null.")
@@ -137,6 +158,9 @@ def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_
137
  enhanced_audio = infer_engine.enhancement_by_ndarray(noisy_audio)
138
  time_cost = time.time() - begin
139
 
 
 
 
140
  fpr = time_cost / audio_duration
141
 
142
  info = {
@@ -151,7 +175,7 @@ def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_
151
  raise gr.Error(f"enhancement failed, error type: {type(e)}, error text: {str(e)}.")
152
 
153
  enhanced_audio_t = (sample_rate, enhanced_audio)
154
- return enhanced_audio_t, message
155
 
156
 
157
  def main():
@@ -207,18 +231,23 @@ def main():
207
  dn_engine = gr.Dropdown(choices=denoise_engine_choices, value=denoise_engine_choices[0], label="engine")
208
  dn_button = gr.Button(variant="primary")
209
  with gr.Column(variant="panel", scale=5):
210
- dn_enhanced_audio = gr.Audio(label="enhanced_audio")
211
- dn_message = gr.Textbox(lines=1, max_lines=20, label="message")
 
 
 
 
 
212
 
213
  dn_button.click(
214
  when_click_denoise_button,
215
  inputs=[dn_noisy_audio_file, dn_noisy_audio_microphone, dn_engine],
216
- outputs=[dn_enhanced_audio, dn_message]
217
  )
218
  gr.Examples(
219
  examples=examples,
220
  inputs=[dn_noisy_audio_file, dn_noisy_audio_microphone, dn_engine],
221
- outputs=[dn_enhanced_audio, dn_message],
222
  fn=when_click_denoise_button,
223
  # cache_examples=True,
224
  # cache_mode="lazy",
 
7
  from pathlib import Path
8
  import platform
9
  import shutil
10
+ import tempfile
11
+ import time
12
  from typing import Tuple
13
  import zipfile
 
14
 
15
  import gradio as gr
16
  from huggingface_hub import snapshot_download
17
+ import librosa
18
+ import librosa.display
19
+ import matplotlib.pyplot as plt
20
  import numpy as np
21
 
22
  import log
 
112
  return infer_engine
113
 
114
 
115
+ def generate_spectrogram(signal: np.ndarray, sample_rate: int = 8000, title: str = "Spectrogram"):
116
+ mag = np.abs(librosa.stft(signal))
117
+ mag_db = librosa.amplitude_to_db(mag, ref=np.max)
118
+
119
+ # 保存为临时图片文件
120
+ plt.figure(figsize=(10, 3))
121
+ librosa.display.specshow(mag_db, sr=sample_rate)
122
+ # librosa.display.specshow(mag_db, sr=sample_rate, x_axis='time', y_axis='log')
123
+ # plt.colorbar(format='%+2.0f dB')
124
+ plt.title(title)
125
+
126
+ temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
127
+ plt.savefig(temp_file.name, bbox_inches="tight")
128
+ plt.close()
129
+ return temp_file.name
130
+
131
+
132
  def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_t = None, engine: str = None):
133
  if noisy_audio_file_t is None and noisy_audio_microphone_t is None:
134
  raise gr.Error(f"audio file and microphone is null.")
 
158
  enhanced_audio = infer_engine.enhancement_by_ndarray(noisy_audio)
159
  time_cost = time.time() - begin
160
 
161
+ noisy_mag_db = generate_spectrogram(noisy_audio, title="noisy")
162
+ denoise_mag_db = generate_spectrogram(enhanced_audio, title="denoise")
163
+
164
  fpr = time_cost / audio_duration
165
 
166
  info = {
 
175
  raise gr.Error(f"enhancement failed, error type: {type(e)}, error text: {str(e)}.")
176
 
177
  enhanced_audio_t = (sample_rate, enhanced_audio)
178
+ return enhanced_audio_t, message, noisy_mag_db, denoise_mag_db
179
 
180
 
181
  def main():
 
231
  dn_engine = gr.Dropdown(choices=denoise_engine_choices, value=denoise_engine_choices[0], label="engine")
232
  dn_button = gr.Button(variant="primary")
233
  with gr.Column(variant="panel", scale=5):
234
+ with gr.Tabs():
235
+ with gr.TabItem("audio"):
236
+ dn_enhanced_audio = gr.Audio(label="enhanced_audio")
237
+ dn_message = gr.Textbox(lines=1, max_lines=20, label="message")
238
+ with gr.TabItem("mag_db"):
239
+ dn_noisy_mag_db = gr.Image(label="noisy_mag_db")
240
+ dn_denoise_mag_db = gr.Image(label="denoise_mag_db")
241
 
242
  dn_button.click(
243
  when_click_denoise_button,
244
  inputs=[dn_noisy_audio_file, dn_noisy_audio_microphone, dn_engine],
245
+ outputs=[dn_enhanced_audio, dn_message, dn_noisy_mag_db, dn_denoise_mag_db]
246
  )
247
  gr.Examples(
248
  examples=examples,
249
  inputs=[dn_noisy_audio_file, dn_noisy_audio_microphone, dn_engine],
250
+ outputs=[dn_enhanced_audio, dn_message, dn_noisy_mag_db, dn_denoise_mag_db],
251
  fn=when_click_denoise_button,
252
  # cache_examples=True,
253
  # cache_mode="lazy",
toolbox/torchaudio/models/dtln/modeling_dtln.py CHANGED
@@ -344,7 +344,9 @@ class DTLNPretrainedModel(DTLNModel):
344
 
345
 
346
  def main():
347
- config = DTLNConfig()
 
 
348
  model = DTLNPretrainedModel(config)
349
  model.eval()
350
 
@@ -354,6 +356,8 @@ def main():
354
  denoise = model.forward(noisy)
355
  print(f"denoise.shape: {denoise.shape}")
356
  print(denoise[:, :, 300: 302])
 
 
357
  print(denoise[:, :, 15680: 15682])
358
  print(denoise[:, :, 15760: 15762])
359
  print(denoise[:, :, 15840: 15842])
@@ -362,6 +366,8 @@ def main():
362
  print(f"denoise.shape: {denoise.shape}")
363
  # denoise = denoise[:, :, (config.fft_size - config.hop_size):]
364
  print(denoise[:, :, 300: 302])
 
 
365
  print(denoise[:, :, 15680: 15682])
366
  print(denoise[:, :, 15760: 15762])
367
  print(denoise[:, :, 15840: 15842])
 
344
 
345
 
346
  def main():
347
+ config = DTLNConfig(fft_size=512,
348
+ hop_size=128,
349
+ )
350
  model = DTLNPretrainedModel(config)
351
  model.eval()
352
 
 
356
  denoise = model.forward(noisy)
357
  print(f"denoise.shape: {denoise.shape}")
358
  print(denoise[:, :, 300: 302])
359
+ print(denoise[:, :, 8000: 8002])
360
+ print(denoise[:, :, 15600: 15602])
361
  print(denoise[:, :, 15680: 15682])
362
  print(denoise[:, :, 15760: 15762])
363
  print(denoise[:, :, 15840: 15842])
 
366
  print(f"denoise.shape: {denoise.shape}")
367
  # denoise = denoise[:, :, (config.fft_size - config.hop_size):]
368
  print(denoise[:, :, 300: 302])
369
+ print(denoise[:, :, 8000: 8002])
370
+ print(denoise[:, :, 15600: 15602])
371
  print(denoise[:, :, 15680: 15682])
372
  print(denoise[:, :, 15760: 15762])
373
  print(denoise[:, :, 15840: 15842])