Spaces:
Running
Running
update
Browse files- examples/dtln/run.sh +3 -7
- main.py +35 -6
- toolbox/torchaudio/models/dtln/modeling_dtln.py +7 -1
examples/dtln/run.sh
CHANGED
@@ -2,18 +2,14 @@
|
|
2 |
|
3 |
: <<'END'
|
4 |
|
5 |
-
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name dfnet-nx-speech \
|
6 |
-
--noise_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/noise" \
|
7 |
-
--speech_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/speech"
|
8 |
-
|
9 |
sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-256 --final_model_name dtln-256-nx-dns3 \
|
10 |
--config_file "yaml/config-256.yaml" \
|
11 |
-
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise
|
12 |
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
|
13 |
|
14 |
-
sh run.sh --stage
|
15 |
--config_file "yaml/config-512.yaml" \
|
16 |
-
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise
|
17 |
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
|
18 |
|
19 |
|
|
|
2 |
|
3 |
: <<'END'
|
4 |
|
|
|
|
|
|
|
|
|
5 |
sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir-256 --final_model_name dtln-256-nx-dns3 \
|
6 |
--config_file "yaml/config-256.yaml" \
|
7 |
+
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
8 |
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
|
9 |
|
10 |
+
sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name file_dir-512 --final_model_name dtln-512-nx-dns3 \
|
11 |
--config_file "yaml/config-512.yaml" \
|
12 |
+
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
13 |
--speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech"
|
14 |
|
15 |
|
main.py
CHANGED
@@ -7,12 +7,16 @@ import logging
|
|
7 |
from pathlib import Path
|
8 |
import platform
|
9 |
import shutil
|
|
|
|
|
10 |
from typing import Tuple
|
11 |
import zipfile
|
12 |
-
import time
|
13 |
|
14 |
import gradio as gr
|
15 |
from huggingface_hub import snapshot_download
|
|
|
|
|
|
|
16 |
import numpy as np
|
17 |
|
18 |
import log
|
@@ -108,6 +112,23 @@ def load_denoise_model(infer_cls, **kwargs):
|
|
108 |
return infer_engine
|
109 |
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_t = None, engine: str = None):
|
112 |
if noisy_audio_file_t is None and noisy_audio_microphone_t is None:
|
113 |
raise gr.Error(f"audio file and microphone is null.")
|
@@ -137,6 +158,9 @@ def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_
|
|
137 |
enhanced_audio = infer_engine.enhancement_by_ndarray(noisy_audio)
|
138 |
time_cost = time.time() - begin
|
139 |
|
|
|
|
|
|
|
140 |
fpr = time_cost / audio_duration
|
141 |
|
142 |
info = {
|
@@ -151,7 +175,7 @@ def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_
|
|
151 |
raise gr.Error(f"enhancement failed, error type: {type(e)}, error text: {str(e)}.")
|
152 |
|
153 |
enhanced_audio_t = (sample_rate, enhanced_audio)
|
154 |
-
return enhanced_audio_t, message
|
155 |
|
156 |
|
157 |
def main():
|
@@ -207,18 +231,23 @@ def main():
|
|
207 |
dn_engine = gr.Dropdown(choices=denoise_engine_choices, value=denoise_engine_choices[0], label="engine")
|
208 |
dn_button = gr.Button(variant="primary")
|
209 |
with gr.Column(variant="panel", scale=5):
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
dn_button.click(
|
214 |
when_click_denoise_button,
|
215 |
inputs=[dn_noisy_audio_file, dn_noisy_audio_microphone, dn_engine],
|
216 |
-
outputs=[dn_enhanced_audio, dn_message]
|
217 |
)
|
218 |
gr.Examples(
|
219 |
examples=examples,
|
220 |
inputs=[dn_noisy_audio_file, dn_noisy_audio_microphone, dn_engine],
|
221 |
-
outputs=[dn_enhanced_audio, dn_message],
|
222 |
fn=when_click_denoise_button,
|
223 |
# cache_examples=True,
|
224 |
# cache_mode="lazy",
|
|
|
7 |
from pathlib import Path
|
8 |
import platform
|
9 |
import shutil
|
10 |
+
import tempfile
|
11 |
+
import time
|
12 |
from typing import Tuple
|
13 |
import zipfile
|
|
|
14 |
|
15 |
import gradio as gr
|
16 |
from huggingface_hub import snapshot_download
|
17 |
+
import librosa
|
18 |
+
import librosa.display
|
19 |
+
import matplotlib.pyplot as plt
|
20 |
import numpy as np
|
21 |
|
22 |
import log
|
|
|
112 |
return infer_engine
|
113 |
|
114 |
|
115 |
+
def generate_spectrogram(signal: np.ndarray, sample_rate: int = 8000, title: str = "Spectrogram"):
|
116 |
+
mag = np.abs(librosa.stft(signal))
|
117 |
+
mag_db = librosa.amplitude_to_db(mag, ref=np.max)
|
118 |
+
|
119 |
+
# 保存为临时图片文件
|
120 |
+
plt.figure(figsize=(10, 3))
|
121 |
+
librosa.display.specshow(mag_db, sr=sample_rate)
|
122 |
+
# librosa.display.specshow(mag_db, sr=sample_rate, x_axis='time', y_axis='log')
|
123 |
+
# plt.colorbar(format='%+2.0f dB')
|
124 |
+
plt.title(title)
|
125 |
+
|
126 |
+
temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
|
127 |
+
plt.savefig(temp_file.name, bbox_inches="tight")
|
128 |
+
plt.close()
|
129 |
+
return temp_file.name
|
130 |
+
|
131 |
+
|
132 |
def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_t = None, engine: str = None):
|
133 |
if noisy_audio_file_t is None and noisy_audio_microphone_t is None:
|
134 |
raise gr.Error(f"audio file and microphone is null.")
|
|
|
158 |
enhanced_audio = infer_engine.enhancement_by_ndarray(noisy_audio)
|
159 |
time_cost = time.time() - begin
|
160 |
|
161 |
+
noisy_mag_db = generate_spectrogram(noisy_audio, title="noisy")
|
162 |
+
denoise_mag_db = generate_spectrogram(enhanced_audio, title="denoise")
|
163 |
+
|
164 |
fpr = time_cost / audio_duration
|
165 |
|
166 |
info = {
|
|
|
175 |
raise gr.Error(f"enhancement failed, error type: {type(e)}, error text: {str(e)}.")
|
176 |
|
177 |
enhanced_audio_t = (sample_rate, enhanced_audio)
|
178 |
+
return enhanced_audio_t, message, noisy_mag_db, denoise_mag_db
|
179 |
|
180 |
|
181 |
def main():
|
|
|
231 |
dn_engine = gr.Dropdown(choices=denoise_engine_choices, value=denoise_engine_choices[0], label="engine")
|
232 |
dn_button = gr.Button(variant="primary")
|
233 |
with gr.Column(variant="panel", scale=5):
|
234 |
+
with gr.Tabs():
|
235 |
+
with gr.TabItem("audio"):
|
236 |
+
dn_enhanced_audio = gr.Audio(label="enhanced_audio")
|
237 |
+
dn_message = gr.Textbox(lines=1, max_lines=20, label="message")
|
238 |
+
with gr.TabItem("mag_db"):
|
239 |
+
dn_noisy_mag_db = gr.Image(label="noisy_mag_db")
|
240 |
+
dn_denoise_mag_db = gr.Image(label="denoise_mag_db")
|
241 |
|
242 |
dn_button.click(
|
243 |
when_click_denoise_button,
|
244 |
inputs=[dn_noisy_audio_file, dn_noisy_audio_microphone, dn_engine],
|
245 |
+
outputs=[dn_enhanced_audio, dn_message, dn_noisy_mag_db, dn_denoise_mag_db]
|
246 |
)
|
247 |
gr.Examples(
|
248 |
examples=examples,
|
249 |
inputs=[dn_noisy_audio_file, dn_noisy_audio_microphone, dn_engine],
|
250 |
+
outputs=[dn_enhanced_audio, dn_message, dn_noisy_mag_db, dn_denoise_mag_db],
|
251 |
fn=when_click_denoise_button,
|
252 |
# cache_examples=True,
|
253 |
# cache_mode="lazy",
|
toolbox/torchaudio/models/dtln/modeling_dtln.py
CHANGED
@@ -344,7 +344,9 @@ class DTLNPretrainedModel(DTLNModel):
|
|
344 |
|
345 |
|
346 |
def main():
|
347 |
-
config = DTLNConfig(
|
|
|
|
|
348 |
model = DTLNPretrainedModel(config)
|
349 |
model.eval()
|
350 |
|
@@ -354,6 +356,8 @@ def main():
|
|
354 |
denoise = model.forward(noisy)
|
355 |
print(f"denoise.shape: {denoise.shape}")
|
356 |
print(denoise[:, :, 300: 302])
|
|
|
|
|
357 |
print(denoise[:, :, 15680: 15682])
|
358 |
print(denoise[:, :, 15760: 15762])
|
359 |
print(denoise[:, :, 15840: 15842])
|
@@ -362,6 +366,8 @@ def main():
|
|
362 |
print(f"denoise.shape: {denoise.shape}")
|
363 |
# denoise = denoise[:, :, (config.fft_size - config.hop_size):]
|
364 |
print(denoise[:, :, 300: 302])
|
|
|
|
|
365 |
print(denoise[:, :, 15680: 15682])
|
366 |
print(denoise[:, :, 15760: 15762])
|
367 |
print(denoise[:, :, 15840: 15842])
|
|
|
344 |
|
345 |
|
346 |
def main():
|
347 |
+
config = DTLNConfig(fft_size=512,
|
348 |
+
hop_size=128,
|
349 |
+
)
|
350 |
model = DTLNPretrainedModel(config)
|
351 |
model.eval()
|
352 |
|
|
|
356 |
denoise = model.forward(noisy)
|
357 |
print(f"denoise.shape: {denoise.shape}")
|
358 |
print(denoise[:, :, 300: 302])
|
359 |
+
print(denoise[:, :, 8000: 8002])
|
360 |
+
print(denoise[:, :, 15600: 15602])
|
361 |
print(denoise[:, :, 15680: 15682])
|
362 |
print(denoise[:, :, 15760: 15762])
|
363 |
print(denoise[:, :, 15840: 15842])
|
|
|
366 |
print(f"denoise.shape: {denoise.shape}")
|
367 |
# denoise = denoise[:, :, (config.fft_size - config.hop_size):]
|
368 |
print(denoise[:, :, 300: 302])
|
369 |
+
print(denoise[:, :, 8000: 8002])
|
370 |
+
print(denoise[:, :, 15600: 15602])
|
371 |
print(denoise[:, :, 15680: 15682])
|
372 |
print(denoise[:, :, 15760: 15762])
|
373 |
print(denoise[:, :, 15840: 15842])
|