Spaces:
Running
Running
update
Browse files- main.py +125 -39
- requirements.txt +2 -0
- toolbox/audio_edit/reverb.py +84 -0
main.py
CHANGED
@@ -10,10 +10,13 @@ import argparse
|
|
10 |
import json
|
11 |
from pathlib import Path
|
12 |
import platform
|
|
|
13 |
from typing import Tuple, List
|
|
|
14 |
|
15 |
import gradio as gr
|
16 |
import numpy as np
|
|
|
17 |
|
18 |
from project_settings import project_path
|
19 |
from toolbox.audio_edit.info import get_audio_info, engine_to_function as info_engine_to_function
|
@@ -21,6 +24,7 @@ from toolbox.audio_edit.convert import audio_convert, engine_to_function as cvt_
|
|
21 |
from toolbox.audio_edit.speech_speed import change_speech_speed, engine_to_function as speed_engine_to_function
|
22 |
from toolbox.audio_edit.volume import change_volume, engine_to_function as volume_engine_to_function
|
23 |
from toolbox.audio_edit.augment import mix_speech_and_noise
|
|
|
24 |
|
25 |
|
26 |
def get_args():
|
@@ -34,7 +38,24 @@ def get_args():
|
|
34 |
return args
|
35 |
|
36 |
|
37 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
message = "success"
|
39 |
|
40 |
try:
|
@@ -47,12 +68,15 @@ def when_click_get_audio_info(filename: str, engine: str) -> str:
|
|
47 |
return result, message
|
48 |
|
49 |
|
50 |
-
def when_click_audio_convert(
|
51 |
to_sample_rate: int = 8000,
|
52 |
sample_width: int = 2,
|
53 |
channels: str = "0",
|
54 |
engine: str = "librosa",
|
55 |
) -> Tuple[str, str, str, str]:
|
|
|
|
|
|
|
56 |
message = "success"
|
57 |
|
58 |
try:
|
@@ -62,50 +86,62 @@ def when_click_audio_convert(filename: str,
|
|
62 |
channels=channels,
|
63 |
engine=engine,
|
64 |
)
|
65 |
-
origin_audio_info
|
66 |
-
|
|
|
|
|
|
|
67 |
except Exception as e:
|
68 |
output_file = None
|
69 |
origin_audio_info = None
|
70 |
output_audio_info = None
|
71 |
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
72 |
|
73 |
-
return
|
|
|
74 |
|
|
|
|
|
|
|
75 |
|
76 |
-
def when_click_change_speech_speed(filename: str, speed: float = 1.0, engine: str = "librosa"):
|
77 |
message = "success"
|
78 |
|
79 |
try:
|
80 |
output_file: str = change_speech_speed(filename, speed, engine)
|
81 |
-
origin_audio_info
|
82 |
-
|
|
|
|
|
|
|
83 |
except Exception as e:
|
84 |
output_file = None
|
85 |
origin_audio_info = None
|
86 |
output_audio_info = None
|
87 |
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
88 |
|
89 |
-
return
|
90 |
|
91 |
|
92 |
-
def when_click_change_volume(
|
93 |
radio: float = 1.0,
|
94 |
decibel: float = 0.0,
|
95 |
reference: str = None,
|
96 |
engine: str = "by_ffmpy_by_db",
|
97 |
):
|
|
|
|
|
|
|
98 |
message = "success"
|
99 |
try:
|
100 |
output_file: str = change_volume(filename, radio, decibel, reference, engine)
|
101 |
except Exception as e:
|
102 |
output_file = None
|
103 |
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
104 |
-
return
|
105 |
|
106 |
|
107 |
-
def when_click_pad_audio(
|
108 |
-
sample_rate, signal =
|
109 |
|
110 |
message = "success"
|
111 |
|
@@ -135,6 +171,28 @@ def when_click_pad_audio(audio, pad_seconds: int = 10, pad_mode: str = "zero"):
|
|
135 |
return (sample_rate, pad_signal), message
|
136 |
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
def when_click_mix_speech_and_noise(speech_t, noise_t, snr_db: float):
|
139 |
sample_rate1, speech = speech_t
|
140 |
sample_rate2, noise = noise_t
|
@@ -184,7 +242,7 @@ change_volume_examples = [
|
|
184 |
],
|
185 |
[
|
186 |
(project_path / "data/examples/default/audio_0_3_clone_from_audio_0_2.wav").as_posix(),
|
187 |
-
|
188 |
None,
|
189 |
"by_ffmpy_by_radio"
|
190 |
],
|
@@ -211,6 +269,20 @@ pad_audio_examples = [
|
|
211 |
]
|
212 |
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
mix_speech_and_noise_examples = [
|
215 |
[
|
216 |
(project_path / "data/examples/mix/speech/000f62f5-5b05-4494-a8db-0eaca3ebd871_th-TH_1678353399860.wav").as_posix(),
|
@@ -247,6 +319,7 @@ def main():
|
|
247 |
cvt_choices = list(cvt_engine_to_function.keys())
|
248 |
speed_choices = list(speed_engine_to_function.keys())
|
249 |
volume_choices = list(volume_engine_to_function.keys())
|
|
|
250 |
|
251 |
# ui
|
252 |
with gr.Blocks() as blocks:
|
@@ -254,7 +327,7 @@ def main():
|
|
254 |
with gr.TabItem("info"):
|
255 |
with gr.Row():
|
256 |
with gr.Column(variant="panel", scale=5):
|
257 |
-
info_audio = gr.
|
258 |
info_engine = gr.Dropdown(choices=info_choices, value=info_choices[0], label="engine")
|
259 |
info_button = gr.Button(variant="primary")
|
260 |
with gr.Column(variant="panel", scale=5):
|
@@ -278,7 +351,6 @@ def main():
|
|
278 |
with gr.TabItem("convert"):
|
279 |
with gr.Row():
|
280 |
with gr.Column(variant="panel", scale=5):
|
281 |
-
cvt_audio_file = gr.File(label="audio_file")
|
282 |
cvt_audio = gr.Audio(label="audio")
|
283 |
|
284 |
with gr.Row():
|
@@ -291,7 +363,6 @@ def main():
|
|
291 |
cvt_engine = gr.Dropdown(choices=cvt_choices, value=cvt_choices[0], label="engine")
|
292 |
cvt_button = gr.Button(variant="primary")
|
293 |
with gr.Column(variant="panel", scale=5):
|
294 |
-
cvt_output_audio_file = gr.File(label="output_audio_file")
|
295 |
cvt_output_audio = gr.Audio(label="output_audio")
|
296 |
cvt_origin_audio_info = gr.Text(label="origin_audio_info")
|
297 |
cvt_output_audio_info = gr.Text(label="output_audio_info")
|
@@ -299,13 +370,12 @@ def main():
|
|
299 |
gr.Examples(
|
300 |
examples=audio_convert_examples,
|
301 |
inputs=[
|
302 |
-
|
303 |
cvt_sample_rate, cvt_sample_width, cvt_channels,
|
304 |
cvt_engine,
|
305 |
],
|
306 |
outputs=[
|
307 |
-
|
308 |
-
cvt_output_audio_file, cvt_output_audio,
|
309 |
cvt_origin_audio_info, cvt_output_audio_info,
|
310 |
cvt_log
|
311 |
],
|
@@ -314,13 +384,12 @@ def main():
|
|
314 |
cvt_button.click(
|
315 |
when_click_audio_convert,
|
316 |
inputs=[
|
317 |
-
|
318 |
cvt_sample_rate, cvt_sample_width, cvt_channels,
|
319 |
cvt_engine,
|
320 |
],
|
321 |
outputs=[
|
322 |
-
|
323 |
-
cvt_output_audio_file, cvt_output_audio,
|
324 |
cvt_origin_audio_info, cvt_output_audio_info,
|
325 |
cvt_log
|
326 |
],
|
@@ -328,14 +397,12 @@ def main():
|
|
328 |
with gr.TabItem("speech_speed"):
|
329 |
with gr.Row():
|
330 |
with gr.Column(variant="panel", scale=5):
|
331 |
-
speech_speed_audio_file = gr.File(label="audio_file")
|
332 |
speech_speed_audio = gr.Audio(label="audio")
|
333 |
with gr.Row():
|
334 |
speech_speed_speed = gr.Slider(minimum=0.0, maximum=4.0, value=1.0, label="speed")
|
335 |
speech_speed_engine = gr.Dropdown(choices=speed_choices, value=speed_choices[0], label="engine")
|
336 |
speech_speed_button = gr.Button(variant="primary")
|
337 |
with gr.Column(variant="panel", scale=5):
|
338 |
-
speech_speed_output_audio_file = gr.File(label="output_audio_file")
|
339 |
speech_speed_output_audio = gr.Audio(label="output_audio")
|
340 |
speech_speed_origin_audio_info = gr.Text(label="origin_audio_info")
|
341 |
speech_speed_output_audio_info = gr.Text(label="output_audio_info")
|
@@ -345,10 +412,9 @@ def main():
|
|
345 |
[filename.as_posix(), 0.5]
|
346 |
for filename in examples_dir.glob("**/*.wav")
|
347 |
],
|
348 |
-
inputs=[
|
349 |
outputs=[
|
350 |
-
|
351 |
-
speech_speed_output_audio_file, speech_speed_output_audio,
|
352 |
speech_speed_origin_audio_info, speech_speed_output_audio_info,
|
353 |
speech_speed_log,
|
354 |
],
|
@@ -356,10 +422,9 @@ def main():
|
|
356 |
)
|
357 |
speech_speed_button.click(
|
358 |
when_click_change_speech_speed,
|
359 |
-
inputs=[
|
360 |
outputs=[
|
361 |
-
|
362 |
-
speech_speed_output_audio_file, speech_speed_output_audio,
|
363 |
speech_speed_origin_audio_info, speech_speed_output_audio_info,
|
364 |
speech_speed_log,
|
365 |
]
|
@@ -367,7 +432,6 @@ def main():
|
|
367 |
with gr.TabItem("volume"):
|
368 |
with gr.Row():
|
369 |
with gr.Column(variant="panel", scale=5):
|
370 |
-
volume_audio_file = gr.File(label="audio_file")
|
371 |
volume_speed_audio = gr.Audio(label="audio")
|
372 |
with gr.Row():
|
373 |
with gr.Column():
|
@@ -379,26 +443,23 @@ def main():
|
|
379 |
|
380 |
volume_button = gr.Button(variant="primary")
|
381 |
with gr.Column(variant="panel", scale=5):
|
382 |
-
volume_output_audio_file = gr.File(label="output_audio_file")
|
383 |
volume_output_audio = gr.Audio(label="output_audio")
|
384 |
volume_log = gr.Text(label="log")
|
385 |
|
386 |
gr.Examples(
|
387 |
examples=change_volume_examples,
|
388 |
-
inputs=[
|
389 |
outputs=[
|
390 |
-
|
391 |
-
volume_output_audio_file, volume_output_audio,
|
392 |
volume_log,
|
393 |
],
|
394 |
fn=when_click_change_volume,
|
395 |
)
|
396 |
volume_button.click(
|
397 |
when_click_change_volume,
|
398 |
-
inputs=[
|
399 |
outputs=[
|
400 |
-
|
401 |
-
volume_output_audio_file, volume_output_audio,
|
402 |
volume_log,
|
403 |
]
|
404 |
)
|
@@ -429,7 +490,32 @@ def main():
|
|
429 |
pad_output_audio, pad_log
|
430 |
],
|
431 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
with gr.TabItem("mix"):
|
434 |
with gr.Row():
|
435 |
with gr.Column(variant="panel", scale=5):
|
|
|
10 |
import json
|
11 |
from pathlib import Path
|
12 |
import platform
|
13 |
+
import tempfile
|
14 |
from typing import Tuple, List
|
15 |
+
import uuid
|
16 |
|
17 |
import gradio as gr
|
18 |
import numpy as np
|
19 |
+
from scipy.io import wavfile
|
20 |
|
21 |
from project_settings import project_path
|
22 |
from toolbox.audio_edit.info import get_audio_info, engine_to_function as info_engine_to_function
|
|
|
24 |
from toolbox.audio_edit.speech_speed import change_speech_speed, engine_to_function as speed_engine_to_function
|
25 |
from toolbox.audio_edit.volume import change_volume, engine_to_function as volume_engine_to_function
|
26 |
from toolbox.audio_edit.augment import mix_speech_and_noise
|
27 |
+
from toolbox.audio_edit.reverb import reverb, engine_to_function as reverb_engine_to_function
|
28 |
|
29 |
|
30 |
def get_args():
|
|
|
38 |
return args
|
39 |
|
40 |
|
41 |
+
def save_input_audio(sample_rate: int, signal: np.ndarray) -> str:
|
42 |
+
|
43 |
+
temp_audio_dir = Path(tempfile.gettempdir()) / "input_audio"
|
44 |
+
temp_audio_dir.mkdir(parents=True, exist_ok=True)
|
45 |
+
filename = temp_audio_dir / f"{uuid.uuid4()}.wav"
|
46 |
+
filename = filename.as_posix()
|
47 |
+
wavfile.write(
|
48 |
+
filename,
|
49 |
+
sample_rate, signal
|
50 |
+
)
|
51 |
+
|
52 |
+
return filename
|
53 |
+
|
54 |
+
|
55 |
+
def when_click_get_audio_info(audio_t, engine: str):
|
56 |
+
sample_rate, signal = audio_t
|
57 |
+
filename = save_input_audio(sample_rate, signal)
|
58 |
+
|
59 |
message = "success"
|
60 |
|
61 |
try:
|
|
|
68 |
return result, message
|
69 |
|
70 |
|
71 |
+
def when_click_audio_convert(audio_t,
|
72 |
to_sample_rate: int = 8000,
|
73 |
sample_width: int = 2,
|
74 |
channels: str = "0",
|
75 |
engine: str = "librosa",
|
76 |
) -> Tuple[str, str, str, str]:
|
77 |
+
sample_rate, signal = audio_t
|
78 |
+
filename = save_input_audio(sample_rate, signal)
|
79 |
+
|
80 |
message = "success"
|
81 |
|
82 |
try:
|
|
|
86 |
channels=channels,
|
87 |
engine=engine,
|
88 |
)
|
89 |
+
origin_audio_info: dict = get_audio_info(filename, engine="wave")
|
90 |
+
origin_audio_info = json.dumps(origin_audio_info, ensure_ascii=False, indent=4)
|
91 |
+
output_audio_info: dict = get_audio_info(output_file, engine="wave")
|
92 |
+
output_audio_info = json.dumps(output_audio_info, ensure_ascii=False, indent=4)
|
93 |
+
|
94 |
except Exception as e:
|
95 |
output_file = None
|
96 |
origin_audio_info = None
|
97 |
output_audio_info = None
|
98 |
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
99 |
|
100 |
+
return output_file, origin_audio_info, output_audio_info, message
|
101 |
+
|
102 |
|
103 |
+
def when_click_change_speech_speed(audio_t, speed: float = 1.0, engine: str = "librosa"):
|
104 |
+
sample_rate, signal = audio_t
|
105 |
+
filename = save_input_audio(sample_rate, signal)
|
106 |
|
|
|
107 |
message = "success"
|
108 |
|
109 |
try:
|
110 |
output_file: str = change_speech_speed(filename, speed, engine)
|
111 |
+
origin_audio_info: dict = get_audio_info(filename, engine="pydub")
|
112 |
+
origin_audio_info = json.dumps(origin_audio_info, ensure_ascii=False, indent=4)
|
113 |
+
output_audio_info: dict = get_audio_info(output_file, engine="pydub")
|
114 |
+
output_audio_info = json.dumps(output_audio_info, ensure_ascii=False, indent=4)
|
115 |
+
|
116 |
except Exception as e:
|
117 |
output_file = None
|
118 |
origin_audio_info = None
|
119 |
output_audio_info = None
|
120 |
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
121 |
|
122 |
+
return output_file, origin_audio_info, output_audio_info, message
|
123 |
|
124 |
|
125 |
+
def when_click_change_volume(audio_t: str,
|
126 |
radio: float = 1.0,
|
127 |
decibel: float = 0.0,
|
128 |
reference: str = None,
|
129 |
engine: str = "by_ffmpy_by_db",
|
130 |
):
|
131 |
+
sample_rate, signal = audio_t
|
132 |
+
filename = save_input_audio(sample_rate, signal)
|
133 |
+
|
134 |
message = "success"
|
135 |
try:
|
136 |
output_file: str = change_volume(filename, radio, decibel, reference, engine)
|
137 |
except Exception as e:
|
138 |
output_file = None
|
139 |
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
140 |
+
return output_file, message
|
141 |
|
142 |
|
143 |
+
def when_click_pad_audio(audio_t, pad_seconds: int = 10, pad_mode: str = "zero"):
|
144 |
+
sample_rate, signal = audio_t
|
145 |
|
146 |
message = "success"
|
147 |
|
|
|
171 |
return (sample_rate, pad_signal), message
|
172 |
|
173 |
|
174 |
+
def when_click_reverb(audio_t, kwargs: str, engine: str):
|
175 |
+
sample_rate, signal = audio_t
|
176 |
+
|
177 |
+
message = "success"
|
178 |
+
|
179 |
+
try:
|
180 |
+
signal = np.array(signal / (1 << 15), dtype=np.float32)
|
181 |
+
kwargs = json.loads(kwargs)
|
182 |
+
reverberated_audio = reverb(
|
183 |
+
signal=signal,
|
184 |
+
sample_rate=sample_rate,
|
185 |
+
engine=engine,
|
186 |
+
**kwargs,
|
187 |
+
)
|
188 |
+
reverberated_audio = np.array(reverberated_audio * (1 << 15), dtype=np.int16)
|
189 |
+
except Exception as e:
|
190 |
+
reverberated_audio = None
|
191 |
+
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
192 |
+
|
193 |
+
return (sample_rate, reverberated_audio), message
|
194 |
+
|
195 |
+
|
196 |
def when_click_mix_speech_and_noise(speech_t, noise_t, snr_db: float):
|
197 |
sample_rate1, speech = speech_t
|
198 |
sample_rate2, noise = noise_t
|
|
|
242 |
],
|
243 |
[
|
244 |
(project_path / "data/examples/default/audio_0_3_clone_from_audio_0_2.wav").as_posix(),
|
245 |
+
0.3, 0.0,
|
246 |
None,
|
247 |
"by_ffmpy_by_radio"
|
248 |
],
|
|
|
269 |
]
|
270 |
|
271 |
|
272 |
+
reverb_examples = [
|
273 |
+
[
|
274 |
+
(project_path / "data/examples/default/audio_0_2.wav").as_posix(),
|
275 |
+
'{\n "room_size": 0.25,\n "damping": 0.5,\n "width": 1.0,\n "dry_level": 0.4,\n "wet_level": 0.6,\n "freeze_mode": false\n}',
|
276 |
+
"pedalboard",
|
277 |
+
],
|
278 |
+
[
|
279 |
+
(project_path / "data/examples/default/audio_0_2.wav").as_posix(),
|
280 |
+
'{\n "room_size": [4.0, 6.0],\n "source_position": [2.5, 4.5],\n "microphone_array": [\n [1.5, 1.5],\n [2.5, 1.5]\n ],\n "output_microphone_idx": 0\n}',
|
281 |
+
"pyroomacoustics",
|
282 |
+
]
|
283 |
+
]
|
284 |
+
|
285 |
+
|
286 |
mix_speech_and_noise_examples = [
|
287 |
[
|
288 |
(project_path / "data/examples/mix/speech/000f62f5-5b05-4494-a8db-0eaca3ebd871_th-TH_1678353399860.wav").as_posix(),
|
|
|
319 |
cvt_choices = list(cvt_engine_to_function.keys())
|
320 |
speed_choices = list(speed_engine_to_function.keys())
|
321 |
volume_choices = list(volume_engine_to_function.keys())
|
322 |
+
reverb_choices = list(reverb_engine_to_function.keys())
|
323 |
|
324 |
# ui
|
325 |
with gr.Blocks() as blocks:
|
|
|
327 |
with gr.TabItem("info"):
|
328 |
with gr.Row():
|
329 |
with gr.Column(variant="panel", scale=5):
|
330 |
+
info_audio = gr.Audio(label="audio")
|
331 |
info_engine = gr.Dropdown(choices=info_choices, value=info_choices[0], label="engine")
|
332 |
info_button = gr.Button(variant="primary")
|
333 |
with gr.Column(variant="panel", scale=5):
|
|
|
351 |
with gr.TabItem("convert"):
|
352 |
with gr.Row():
|
353 |
with gr.Column(variant="panel", scale=5):
|
|
|
354 |
cvt_audio = gr.Audio(label="audio")
|
355 |
|
356 |
with gr.Row():
|
|
|
363 |
cvt_engine = gr.Dropdown(choices=cvt_choices, value=cvt_choices[0], label="engine")
|
364 |
cvt_button = gr.Button(variant="primary")
|
365 |
with gr.Column(variant="panel", scale=5):
|
|
|
366 |
cvt_output_audio = gr.Audio(label="output_audio")
|
367 |
cvt_origin_audio_info = gr.Text(label="origin_audio_info")
|
368 |
cvt_output_audio_info = gr.Text(label="output_audio_info")
|
|
|
370 |
gr.Examples(
|
371 |
examples=audio_convert_examples,
|
372 |
inputs=[
|
373 |
+
cvt_audio,
|
374 |
cvt_sample_rate, cvt_sample_width, cvt_channels,
|
375 |
cvt_engine,
|
376 |
],
|
377 |
outputs=[
|
378 |
+
cvt_output_audio,
|
|
|
379 |
cvt_origin_audio_info, cvt_output_audio_info,
|
380 |
cvt_log
|
381 |
],
|
|
|
384 |
cvt_button.click(
|
385 |
when_click_audio_convert,
|
386 |
inputs=[
|
387 |
+
cvt_audio,
|
388 |
cvt_sample_rate, cvt_sample_width, cvt_channels,
|
389 |
cvt_engine,
|
390 |
],
|
391 |
outputs=[
|
392 |
+
cvt_output_audio,
|
|
|
393 |
cvt_origin_audio_info, cvt_output_audio_info,
|
394 |
cvt_log
|
395 |
],
|
|
|
397 |
with gr.TabItem("speech_speed"):
|
398 |
with gr.Row():
|
399 |
with gr.Column(variant="panel", scale=5):
|
|
|
400 |
speech_speed_audio = gr.Audio(label="audio")
|
401 |
with gr.Row():
|
402 |
speech_speed_speed = gr.Slider(minimum=0.0, maximum=4.0, value=1.0, label="speed")
|
403 |
speech_speed_engine = gr.Dropdown(choices=speed_choices, value=speed_choices[0], label="engine")
|
404 |
speech_speed_button = gr.Button(variant="primary")
|
405 |
with gr.Column(variant="panel", scale=5):
|
|
|
406 |
speech_speed_output_audio = gr.Audio(label="output_audio")
|
407 |
speech_speed_origin_audio_info = gr.Text(label="origin_audio_info")
|
408 |
speech_speed_output_audio_info = gr.Text(label="output_audio_info")
|
|
|
412 |
[filename.as_posix(), 0.5]
|
413 |
for filename in examples_dir.glob("**/*.wav")
|
414 |
],
|
415 |
+
inputs=[speech_speed_audio, speech_speed_speed, speech_speed_engine],
|
416 |
outputs=[
|
417 |
+
speech_speed_output_audio,
|
|
|
418 |
speech_speed_origin_audio_info, speech_speed_output_audio_info,
|
419 |
speech_speed_log,
|
420 |
],
|
|
|
422 |
)
|
423 |
speech_speed_button.click(
|
424 |
when_click_change_speech_speed,
|
425 |
+
inputs=[speech_speed_audio, speech_speed_speed, speech_speed_engine],
|
426 |
outputs=[
|
427 |
+
speech_speed_output_audio,
|
|
|
428 |
speech_speed_origin_audio_info, speech_speed_output_audio_info,
|
429 |
speech_speed_log,
|
430 |
]
|
|
|
432 |
with gr.TabItem("volume"):
|
433 |
with gr.Row():
|
434 |
with gr.Column(variant="panel", scale=5):
|
|
|
435 |
volume_speed_audio = gr.Audio(label="audio")
|
436 |
with gr.Row():
|
437 |
with gr.Column():
|
|
|
443 |
|
444 |
volume_button = gr.Button(variant="primary")
|
445 |
with gr.Column(variant="panel", scale=5):
|
|
|
446 |
volume_output_audio = gr.Audio(label="output_audio")
|
447 |
volume_log = gr.Text(label="log")
|
448 |
|
449 |
gr.Examples(
|
450 |
examples=change_volume_examples,
|
451 |
+
inputs=[volume_speed_audio, volume_radio, volume_decibel, volume_reference, volume_engine],
|
452 |
outputs=[
|
453 |
+
volume_output_audio,
|
|
|
454 |
volume_log,
|
455 |
],
|
456 |
fn=when_click_change_volume,
|
457 |
)
|
458 |
volume_button.click(
|
459 |
when_click_change_volume,
|
460 |
+
inputs=[volume_speed_audio, volume_radio, volume_decibel, volume_reference, volume_engine],
|
461 |
outputs=[
|
462 |
+
volume_output_audio,
|
|
|
463 |
volume_log,
|
464 |
]
|
465 |
)
|
|
|
490 |
pad_output_audio, pad_log
|
491 |
],
|
492 |
)
|
493 |
+
with gr.TabItem("reverb"):
|
494 |
+
with gr.Row():
|
495 |
+
with gr.Column(variant="panel", scale=5):
|
496 |
+
reverb_audio = gr.Audio(label="audio")
|
497 |
+
reverb_kwargs = gr.Textbox(lines=8, label="kwargs")
|
498 |
+
reverb_engine = gr.Dropdown(choices=reverb_choices, value=reverb_choices[0], label="engine")
|
499 |
+
reverb_button = gr.Button(variant="primary")
|
500 |
|
501 |
+
with gr.Column(variant="panel", scale=5):
|
502 |
+
reverb_output_audio = gr.Audio(label="output_audio")
|
503 |
+
reverb_log = gr.Text(label="log")
|
504 |
+
gr.Examples(
|
505 |
+
examples=reverb_examples,
|
506 |
+
inputs=[reverb_audio, reverb_kwargs, reverb_engine],
|
507 |
+
outputs=[
|
508 |
+
reverb_output_audio, reverb_log
|
509 |
+
],
|
510 |
+
fn=when_click_reverb,
|
511 |
+
)
|
512 |
+
reverb_button.click(
|
513 |
+
when_click_reverb,
|
514 |
+
inputs=[reverb_audio, reverb_kwargs, reverb_engine],
|
515 |
+
outputs=[
|
516 |
+
reverb_output_audio, reverb_log
|
517 |
+
],
|
518 |
+
)
|
519 |
with gr.TabItem("mix"):
|
520 |
with gr.Row():
|
521 |
with gr.Column(variant="panel", scale=5):
|
requirements.txt
CHANGED
@@ -5,3 +5,5 @@ scipy==1.14.1
|
|
5 |
audiotsm==0.1.2
|
6 |
audiostretchy==1.3.5
|
7 |
tinytag==2.0.0
|
|
|
|
|
|
5 |
audiotsm==0.1.2
|
6 |
audiostretchy==1.3.5
|
7 |
tinytag==2.0.0
|
8 |
+
pedalboard==0.9.16
|
9 |
+
pyroomacoustics==0.8.3
|
toolbox/audio_edit/reverb.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import json
|
4 |
+
from typing import List, Tuple
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import pedalboard
|
8 |
+
import pyroomacoustics as pra
|
9 |
+
|
10 |
+
|
11 |
+
def reverb_by_pedalboard(signal: np.ndarray,
|
12 |
+
sample_rate: int,
|
13 |
+
room_size: float = 0.25,
|
14 |
+
damping: float = 0.5,
|
15 |
+
width: float = 1.0,
|
16 |
+
dry_level: float = 0.4,
|
17 |
+
wet_level: float = 0.6,
|
18 |
+
freeze_mode: bool = False
|
19 |
+
):
|
20 |
+
|
21 |
+
board = pedalboard.Pedalboard([
|
22 |
+
pedalboard.Reverb(
|
23 |
+
room_size=room_size,
|
24 |
+
damping=damping,
|
25 |
+
width=width,
|
26 |
+
dry_level=dry_level,
|
27 |
+
wet_level=wet_level,
|
28 |
+
freeze_mode=freeze_mode
|
29 |
+
)
|
30 |
+
])
|
31 |
+
|
32 |
+
reverberated_audio = board.__call__(signal, sample_rate)
|
33 |
+
|
34 |
+
return reverberated_audio
|
35 |
+
|
36 |
+
|
37 |
+
def reverb_by_pyroomacoustics(signal: np.ndarray,
|
38 |
+
sample_rate: int,
|
39 |
+
room_size: Tuple[float, float] = (4.0, 6.0),
|
40 |
+
source_position: Tuple[float, float] = (2.5, 4.5),
|
41 |
+
microphone_array: List[Tuple[float, float]] = None,
|
42 |
+
output_microphone_idx: int = 0,
|
43 |
+
):
|
44 |
+
# signal: float32, (-1, 1)
|
45 |
+
if microphone_array is None:
|
46 |
+
microphone_array = [[1.5, 1.5], [2.5, 1.5]]
|
47 |
+
|
48 |
+
# 创建一个 4x6 米的房间
|
49 |
+
room = pra.ShoeBox(room_size, fs=sample_rate)
|
50 |
+
room.add_source(source_position, signal=signal)
|
51 |
+
|
52 |
+
# 创建一个包含两个麦克风的阵列
|
53 |
+
mic_array = np.array(microphone_array)
|
54 |
+
room.add_microphone_array(mic_array)
|
55 |
+
|
56 |
+
# 计算房间冲击响应
|
57 |
+
room.compute_rir()
|
58 |
+
# 模拟声音传播
|
59 |
+
room.simulate()
|
60 |
+
|
61 |
+
# 获取麦克风接收到的信号
|
62 |
+
received_signal = room.mic_array.signals
|
63 |
+
# 假设我们只使用第一个麦克风的信号
|
64 |
+
reverberated_audio = received_signal[output_microphone_idx]
|
65 |
+
return reverberated_audio
|
66 |
+
|
67 |
+
|
68 |
+
engine_to_function = {
|
69 |
+
"pedalboard": reverb_by_pedalboard,
|
70 |
+
"pyroomacoustics": reverb_by_pyroomacoustics,
|
71 |
+
}
|
72 |
+
|
73 |
+
|
74 |
+
def reverb(signal: np.ndarray, sample_rate: int, engine: str = "pedalboard", **kwargs):
|
75 |
+
function = engine_to_function.get(engine)
|
76 |
+
if function is None:
|
77 |
+
raise AssertionError(f"invalid engine: {engine}")
|
78 |
+
|
79 |
+
reverberated_audio = function(signal=signal, sample_rate=sample_rate, **kwargs)
|
80 |
+
return reverberated_audio
|
81 |
+
|
82 |
+
|
83 |
+
if __name__ == '__main__':
|
84 |
+
pass
|