HoneyTian commited on
Commit
922f0b3
·
1 Parent(s): a8c8d73
Files changed (3) hide show
  1. main.py +125 -39
  2. requirements.txt +2 -0
  3. toolbox/audio_edit/reverb.py +84 -0
main.py CHANGED
@@ -10,10 +10,13 @@ import argparse
10
  import json
11
  from pathlib import Path
12
  import platform
 
13
  from typing import Tuple, List
 
14
 
15
  import gradio as gr
16
  import numpy as np
 
17
 
18
  from project_settings import project_path
19
  from toolbox.audio_edit.info import get_audio_info, engine_to_function as info_engine_to_function
@@ -21,6 +24,7 @@ from toolbox.audio_edit.convert import audio_convert, engine_to_function as cvt_
21
  from toolbox.audio_edit.speech_speed import change_speech_speed, engine_to_function as speed_engine_to_function
22
  from toolbox.audio_edit.volume import change_volume, engine_to_function as volume_engine_to_function
23
  from toolbox.audio_edit.augment import mix_speech_and_noise
 
24
 
25
 
26
  def get_args():
@@ -34,7 +38,24 @@ def get_args():
34
  return args
35
 
36
 
37
- def when_click_get_audio_info(filename: str, engine: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  message = "success"
39
 
40
  try:
@@ -47,12 +68,15 @@ def when_click_get_audio_info(filename: str, engine: str) -> str:
47
  return result, message
48
 
49
 
50
- def when_click_audio_convert(filename: str,
51
  to_sample_rate: int = 8000,
52
  sample_width: int = 2,
53
  channels: str = "0",
54
  engine: str = "librosa",
55
  ) -> Tuple[str, str, str, str]:
 
 
 
56
  message = "success"
57
 
58
  try:
@@ -62,50 +86,62 @@ def when_click_audio_convert(filename: str,
62
  channels=channels,
63
  engine=engine,
64
  )
65
- origin_audio_info, _ = when_click_get_audio_info(filename, engine="wave")
66
- output_audio_info, _ = when_click_get_audio_info(output_file, engine="wave")
 
 
 
67
  except Exception as e:
68
  output_file = None
69
  origin_audio_info = None
70
  output_audio_info = None
71
  message = f"failed. error type: {type(e)}, error text: {str(e)}"
72
 
73
- return filename, output_file, output_file, origin_audio_info, output_audio_info, message
 
74
 
 
 
 
75
 
76
- def when_click_change_speech_speed(filename: str, speed: float = 1.0, engine: str = "librosa"):
77
  message = "success"
78
 
79
  try:
80
  output_file: str = change_speech_speed(filename, speed, engine)
81
- origin_audio_info, _ = when_click_get_audio_info(filename, engine="pydub")
82
- output_audio_info, _ = when_click_get_audio_info(output_file, engine="pydub")
 
 
 
83
  except Exception as e:
84
  output_file = None
85
  origin_audio_info = None
86
  output_audio_info = None
87
  message = f"failed. error type: {type(e)}, error text: {str(e)}"
88
 
89
- return filename, output_file, output_file, origin_audio_info, output_audio_info, message
90
 
91
 
92
- def when_click_change_volume(filename: str,
93
  radio: float = 1.0,
94
  decibel: float = 0.0,
95
  reference: str = None,
96
  engine: str = "by_ffmpy_by_db",
97
  ):
 
 
 
98
  message = "success"
99
  try:
100
  output_file: str = change_volume(filename, radio, decibel, reference, engine)
101
  except Exception as e:
102
  output_file = None
103
  message = f"failed. error type: {type(e)}, error text: {str(e)}"
104
- return filename, output_file, output_file, message
105
 
106
 
107
- def when_click_pad_audio(audio, pad_seconds: int = 10, pad_mode: str = "zero"):
108
- sample_rate, signal = audio
109
 
110
  message = "success"
111
 
@@ -135,6 +171,28 @@ def when_click_pad_audio(audio, pad_seconds: int = 10, pad_mode: str = "zero"):
135
  return (sample_rate, pad_signal), message
136
 
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  def when_click_mix_speech_and_noise(speech_t, noise_t, snr_db: float):
139
  sample_rate1, speech = speech_t
140
  sample_rate2, noise = noise_t
@@ -184,7 +242,7 @@ change_volume_examples = [
184
  ],
185
  [
186
  (project_path / "data/examples/default/audio_0_3_clone_from_audio_0_2.wav").as_posix(),
187
- -0.5, 0.0,
188
  None,
189
  "by_ffmpy_by_radio"
190
  ],
@@ -211,6 +269,20 @@ pad_audio_examples = [
211
  ]
212
 
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  mix_speech_and_noise_examples = [
215
  [
216
  (project_path / "data/examples/mix/speech/000f62f5-5b05-4494-a8db-0eaca3ebd871_th-TH_1678353399860.wav").as_posix(),
@@ -247,6 +319,7 @@ def main():
247
  cvt_choices = list(cvt_engine_to_function.keys())
248
  speed_choices = list(speed_engine_to_function.keys())
249
  volume_choices = list(volume_engine_to_function.keys())
 
250
 
251
  # ui
252
  with gr.Blocks() as blocks:
@@ -254,7 +327,7 @@ def main():
254
  with gr.TabItem("info"):
255
  with gr.Row():
256
  with gr.Column(variant="panel", scale=5):
257
- info_audio = gr.File(label="audio")
258
  info_engine = gr.Dropdown(choices=info_choices, value=info_choices[0], label="engine")
259
  info_button = gr.Button(variant="primary")
260
  with gr.Column(variant="panel", scale=5):
@@ -278,7 +351,6 @@ def main():
278
  with gr.TabItem("convert"):
279
  with gr.Row():
280
  with gr.Column(variant="panel", scale=5):
281
- cvt_audio_file = gr.File(label="audio_file")
282
  cvt_audio = gr.Audio(label="audio")
283
 
284
  with gr.Row():
@@ -291,7 +363,6 @@ def main():
291
  cvt_engine = gr.Dropdown(choices=cvt_choices, value=cvt_choices[0], label="engine")
292
  cvt_button = gr.Button(variant="primary")
293
  with gr.Column(variant="panel", scale=5):
294
- cvt_output_audio_file = gr.File(label="output_audio_file")
295
  cvt_output_audio = gr.Audio(label="output_audio")
296
  cvt_origin_audio_info = gr.Text(label="origin_audio_info")
297
  cvt_output_audio_info = gr.Text(label="output_audio_info")
@@ -299,13 +370,12 @@ def main():
299
  gr.Examples(
300
  examples=audio_convert_examples,
301
  inputs=[
302
- cvt_audio_file,
303
  cvt_sample_rate, cvt_sample_width, cvt_channels,
304
  cvt_engine,
305
  ],
306
  outputs=[
307
- cvt_audio,
308
- cvt_output_audio_file, cvt_output_audio,
309
  cvt_origin_audio_info, cvt_output_audio_info,
310
  cvt_log
311
  ],
@@ -314,13 +384,12 @@ def main():
314
  cvt_button.click(
315
  when_click_audio_convert,
316
  inputs=[
317
- cvt_audio_file,
318
  cvt_sample_rate, cvt_sample_width, cvt_channels,
319
  cvt_engine,
320
  ],
321
  outputs=[
322
- cvt_audio,
323
- cvt_output_audio_file, cvt_output_audio,
324
  cvt_origin_audio_info, cvt_output_audio_info,
325
  cvt_log
326
  ],
@@ -328,14 +397,12 @@ def main():
328
  with gr.TabItem("speech_speed"):
329
  with gr.Row():
330
  with gr.Column(variant="panel", scale=5):
331
- speech_speed_audio_file = gr.File(label="audio_file")
332
  speech_speed_audio = gr.Audio(label="audio")
333
  with gr.Row():
334
  speech_speed_speed = gr.Slider(minimum=0.0, maximum=4.0, value=1.0, label="speed")
335
  speech_speed_engine = gr.Dropdown(choices=speed_choices, value=speed_choices[0], label="engine")
336
  speech_speed_button = gr.Button(variant="primary")
337
  with gr.Column(variant="panel", scale=5):
338
- speech_speed_output_audio_file = gr.File(label="output_audio_file")
339
  speech_speed_output_audio = gr.Audio(label="output_audio")
340
  speech_speed_origin_audio_info = gr.Text(label="origin_audio_info")
341
  speech_speed_output_audio_info = gr.Text(label="output_audio_info")
@@ -345,10 +412,9 @@ def main():
345
  [filename.as_posix(), 0.5]
346
  for filename in examples_dir.glob("**/*.wav")
347
  ],
348
- inputs=[speech_speed_audio_file, speech_speed_speed, speech_speed_engine],
349
  outputs=[
350
- speech_speed_audio,
351
- speech_speed_output_audio_file, speech_speed_output_audio,
352
  speech_speed_origin_audio_info, speech_speed_output_audio_info,
353
  speech_speed_log,
354
  ],
@@ -356,10 +422,9 @@ def main():
356
  )
357
  speech_speed_button.click(
358
  when_click_change_speech_speed,
359
- inputs=[speech_speed_audio_file, speech_speed_speed, speech_speed_engine],
360
  outputs=[
361
- speech_speed_audio,
362
- speech_speed_output_audio_file, speech_speed_output_audio,
363
  speech_speed_origin_audio_info, speech_speed_output_audio_info,
364
  speech_speed_log,
365
  ]
@@ -367,7 +432,6 @@ def main():
367
  with gr.TabItem("volume"):
368
  with gr.Row():
369
  with gr.Column(variant="panel", scale=5):
370
- volume_audio_file = gr.File(label="audio_file")
371
  volume_speed_audio = gr.Audio(label="audio")
372
  with gr.Row():
373
  with gr.Column():
@@ -379,26 +443,23 @@ def main():
379
 
380
  volume_button = gr.Button(variant="primary")
381
  with gr.Column(variant="panel", scale=5):
382
- volume_output_audio_file = gr.File(label="output_audio_file")
383
  volume_output_audio = gr.Audio(label="output_audio")
384
  volume_log = gr.Text(label="log")
385
 
386
  gr.Examples(
387
  examples=change_volume_examples,
388
- inputs=[volume_audio_file, volume_radio, volume_decibel, volume_reference, volume_engine],
389
  outputs=[
390
- volume_speed_audio,
391
- volume_output_audio_file, volume_output_audio,
392
  volume_log,
393
  ],
394
  fn=when_click_change_volume,
395
  )
396
  volume_button.click(
397
  when_click_change_volume,
398
- inputs=[volume_audio_file, volume_radio, volume_decibel, volume_reference, volume_engine],
399
  outputs=[
400
- volume_speed_audio,
401
- volume_output_audio_file, volume_output_audio,
402
  volume_log,
403
  ]
404
  )
@@ -429,7 +490,32 @@ def main():
429
  pad_output_audio, pad_log
430
  ],
431
  )
 
 
 
 
 
 
 
432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  with gr.TabItem("mix"):
434
  with gr.Row():
435
  with gr.Column(variant="panel", scale=5):
 
10
  import json
11
  from pathlib import Path
12
  import platform
13
+ import tempfile
14
  from typing import Tuple, List
15
+ import uuid
16
 
17
  import gradio as gr
18
  import numpy as np
19
+ from scipy.io import wavfile
20
 
21
  from project_settings import project_path
22
  from toolbox.audio_edit.info import get_audio_info, engine_to_function as info_engine_to_function
 
24
  from toolbox.audio_edit.speech_speed import change_speech_speed, engine_to_function as speed_engine_to_function
25
  from toolbox.audio_edit.volume import change_volume, engine_to_function as volume_engine_to_function
26
  from toolbox.audio_edit.augment import mix_speech_and_noise
27
+ from toolbox.audio_edit.reverb import reverb, engine_to_function as reverb_engine_to_function
28
 
29
 
30
  def get_args():
 
38
  return args
39
 
40
 
41
+ def save_input_audio(sample_rate: int, signal: np.ndarray) -> str:
42
+
43
+ temp_audio_dir = Path(tempfile.gettempdir()) / "input_audio"
44
+ temp_audio_dir.mkdir(parents=True, exist_ok=True)
45
+ filename = temp_audio_dir / f"{uuid.uuid4()}.wav"
46
+ filename = filename.as_posix()
47
+ wavfile.write(
48
+ filename,
49
+ sample_rate, signal
50
+ )
51
+
52
+ return filename
53
+
54
+
55
+ def when_click_get_audio_info(audio_t, engine: str):
56
+ sample_rate, signal = audio_t
57
+ filename = save_input_audio(sample_rate, signal)
58
+
59
  message = "success"
60
 
61
  try:
 
68
  return result, message
69
 
70
 
71
+ def when_click_audio_convert(audio_t,
72
  to_sample_rate: int = 8000,
73
  sample_width: int = 2,
74
  channels: str = "0",
75
  engine: str = "librosa",
76
  ) -> Tuple[str, str, str, str]:
77
+ sample_rate, signal = audio_t
78
+ filename = save_input_audio(sample_rate, signal)
79
+
80
  message = "success"
81
 
82
  try:
 
86
  channels=channels,
87
  engine=engine,
88
  )
89
+ origin_audio_info: dict = get_audio_info(filename, engine="wave")
90
+ origin_audio_info = json.dumps(origin_audio_info, ensure_ascii=False, indent=4)
91
+ output_audio_info: dict = get_audio_info(output_file, engine="wave")
92
+ output_audio_info = json.dumps(output_audio_info, ensure_ascii=False, indent=4)
93
+
94
  except Exception as e:
95
  output_file = None
96
  origin_audio_info = None
97
  output_audio_info = None
98
  message = f"failed. error type: {type(e)}, error text: {str(e)}"
99
 
100
+ return output_file, origin_audio_info, output_audio_info, message
101
+
102
 
103
+ def when_click_change_speech_speed(audio_t, speed: float = 1.0, engine: str = "librosa"):
104
+ sample_rate, signal = audio_t
105
+ filename = save_input_audio(sample_rate, signal)
106
 
 
107
  message = "success"
108
 
109
  try:
110
  output_file: str = change_speech_speed(filename, speed, engine)
111
+ origin_audio_info: dict = get_audio_info(filename, engine="pydub")
112
+ origin_audio_info = json.dumps(origin_audio_info, ensure_ascii=False, indent=4)
113
+ output_audio_info: dict = get_audio_info(output_file, engine="pydub")
114
+ output_audio_info = json.dumps(output_audio_info, ensure_ascii=False, indent=4)
115
+
116
  except Exception as e:
117
  output_file = None
118
  origin_audio_info = None
119
  output_audio_info = None
120
  message = f"failed. error type: {type(e)}, error text: {str(e)}"
121
 
122
+ return output_file, origin_audio_info, output_audio_info, message
123
 
124
 
125
+ def when_click_change_volume(audio_t: str,
126
  radio: float = 1.0,
127
  decibel: float = 0.0,
128
  reference: str = None,
129
  engine: str = "by_ffmpy_by_db",
130
  ):
131
+ sample_rate, signal = audio_t
132
+ filename = save_input_audio(sample_rate, signal)
133
+
134
  message = "success"
135
  try:
136
  output_file: str = change_volume(filename, radio, decibel, reference, engine)
137
  except Exception as e:
138
  output_file = None
139
  message = f"failed. error type: {type(e)}, error text: {str(e)}"
140
+ return output_file, message
141
 
142
 
143
+ def when_click_pad_audio(audio_t, pad_seconds: int = 10, pad_mode: str = "zero"):
144
+ sample_rate, signal = audio_t
145
 
146
  message = "success"
147
 
 
171
  return (sample_rate, pad_signal), message
172
 
173
 
174
+ def when_click_reverb(audio_t, kwargs: str, engine: str):
175
+ sample_rate, signal = audio_t
176
+
177
+ message = "success"
178
+
179
+ try:
180
+ signal = np.array(signal / (1 << 15), dtype=np.float32)
181
+ kwargs = json.loads(kwargs)
182
+ reverberated_audio = reverb(
183
+ signal=signal,
184
+ sample_rate=sample_rate,
185
+ engine=engine,
186
+ **kwargs,
187
+ )
188
+ reverberated_audio = np.array(reverberated_audio * (1 << 15), dtype=np.int16)
189
+ except Exception as e:
190
+ reverberated_audio = None
191
+ message = f"failed. error type: {type(e)}, error text: {str(e)}"
192
+
193
+ return (sample_rate, reverberated_audio), message
194
+
195
+
196
  def when_click_mix_speech_and_noise(speech_t, noise_t, snr_db: float):
197
  sample_rate1, speech = speech_t
198
  sample_rate2, noise = noise_t
 
242
  ],
243
  [
244
  (project_path / "data/examples/default/audio_0_3_clone_from_audio_0_2.wav").as_posix(),
245
+ 0.3, 0.0,
246
  None,
247
  "by_ffmpy_by_radio"
248
  ],
 
269
  ]
270
 
271
 
272
+ reverb_examples = [
273
+ [
274
+ (project_path / "data/examples/default/audio_0_2.wav").as_posix(),
275
+ '{\n "room_size": 0.25,\n "damping": 0.5,\n "width": 1.0,\n "dry_level": 0.4,\n "wet_level": 0.6,\n "freeze_mode": false\n}',
276
+ "pedalboard",
277
+ ],
278
+ [
279
+ (project_path / "data/examples/default/audio_0_2.wav").as_posix(),
280
+ '{\n "room_size": [4.0, 6.0],\n "source_position": [2.5, 4.5],\n "microphone_array": [\n [1.5, 1.5],\n [2.5, 1.5]\n ],\n "output_microphone_idx": 0\n}',
281
+ "pyroomacoustics",
282
+ ]
283
+ ]
284
+
285
+
286
  mix_speech_and_noise_examples = [
287
  [
288
  (project_path / "data/examples/mix/speech/000f62f5-5b05-4494-a8db-0eaca3ebd871_th-TH_1678353399860.wav").as_posix(),
 
319
  cvt_choices = list(cvt_engine_to_function.keys())
320
  speed_choices = list(speed_engine_to_function.keys())
321
  volume_choices = list(volume_engine_to_function.keys())
322
+ reverb_choices = list(reverb_engine_to_function.keys())
323
 
324
  # ui
325
  with gr.Blocks() as blocks:
 
327
  with gr.TabItem("info"):
328
  with gr.Row():
329
  with gr.Column(variant="panel", scale=5):
330
+ info_audio = gr.Audio(label="audio")
331
  info_engine = gr.Dropdown(choices=info_choices, value=info_choices[0], label="engine")
332
  info_button = gr.Button(variant="primary")
333
  with gr.Column(variant="panel", scale=5):
 
351
  with gr.TabItem("convert"):
352
  with gr.Row():
353
  with gr.Column(variant="panel", scale=5):
 
354
  cvt_audio = gr.Audio(label="audio")
355
 
356
  with gr.Row():
 
363
  cvt_engine = gr.Dropdown(choices=cvt_choices, value=cvt_choices[0], label="engine")
364
  cvt_button = gr.Button(variant="primary")
365
  with gr.Column(variant="panel", scale=5):
 
366
  cvt_output_audio = gr.Audio(label="output_audio")
367
  cvt_origin_audio_info = gr.Text(label="origin_audio_info")
368
  cvt_output_audio_info = gr.Text(label="output_audio_info")
 
370
  gr.Examples(
371
  examples=audio_convert_examples,
372
  inputs=[
373
+ cvt_audio,
374
  cvt_sample_rate, cvt_sample_width, cvt_channels,
375
  cvt_engine,
376
  ],
377
  outputs=[
378
+ cvt_output_audio,
 
379
  cvt_origin_audio_info, cvt_output_audio_info,
380
  cvt_log
381
  ],
 
384
  cvt_button.click(
385
  when_click_audio_convert,
386
  inputs=[
387
+ cvt_audio,
388
  cvt_sample_rate, cvt_sample_width, cvt_channels,
389
  cvt_engine,
390
  ],
391
  outputs=[
392
+ cvt_output_audio,
 
393
  cvt_origin_audio_info, cvt_output_audio_info,
394
  cvt_log
395
  ],
 
397
  with gr.TabItem("speech_speed"):
398
  with gr.Row():
399
  with gr.Column(variant="panel", scale=5):
 
400
  speech_speed_audio = gr.Audio(label="audio")
401
  with gr.Row():
402
  speech_speed_speed = gr.Slider(minimum=0.0, maximum=4.0, value=1.0, label="speed")
403
  speech_speed_engine = gr.Dropdown(choices=speed_choices, value=speed_choices[0], label="engine")
404
  speech_speed_button = gr.Button(variant="primary")
405
  with gr.Column(variant="panel", scale=5):
 
406
  speech_speed_output_audio = gr.Audio(label="output_audio")
407
  speech_speed_origin_audio_info = gr.Text(label="origin_audio_info")
408
  speech_speed_output_audio_info = gr.Text(label="output_audio_info")
 
412
  [filename.as_posix(), 0.5]
413
  for filename in examples_dir.glob("**/*.wav")
414
  ],
415
+ inputs=[speech_speed_audio, speech_speed_speed, speech_speed_engine],
416
  outputs=[
417
+ speech_speed_output_audio,
 
418
  speech_speed_origin_audio_info, speech_speed_output_audio_info,
419
  speech_speed_log,
420
  ],
 
422
  )
423
  speech_speed_button.click(
424
  when_click_change_speech_speed,
425
+ inputs=[speech_speed_audio, speech_speed_speed, speech_speed_engine],
426
  outputs=[
427
+ speech_speed_output_audio,
 
428
  speech_speed_origin_audio_info, speech_speed_output_audio_info,
429
  speech_speed_log,
430
  ]
 
432
  with gr.TabItem("volume"):
433
  with gr.Row():
434
  with gr.Column(variant="panel", scale=5):
 
435
  volume_speed_audio = gr.Audio(label="audio")
436
  with gr.Row():
437
  with gr.Column():
 
443
 
444
  volume_button = gr.Button(variant="primary")
445
  with gr.Column(variant="panel", scale=5):
 
446
  volume_output_audio = gr.Audio(label="output_audio")
447
  volume_log = gr.Text(label="log")
448
 
449
  gr.Examples(
450
  examples=change_volume_examples,
451
+ inputs=[volume_speed_audio, volume_radio, volume_decibel, volume_reference, volume_engine],
452
  outputs=[
453
+ volume_output_audio,
 
454
  volume_log,
455
  ],
456
  fn=when_click_change_volume,
457
  )
458
  volume_button.click(
459
  when_click_change_volume,
460
+ inputs=[volume_speed_audio, volume_radio, volume_decibel, volume_reference, volume_engine],
461
  outputs=[
462
+ volume_output_audio,
 
463
  volume_log,
464
  ]
465
  )
 
490
  pad_output_audio, pad_log
491
  ],
492
  )
493
+ with gr.TabItem("reverb"):
494
+ with gr.Row():
495
+ with gr.Column(variant="panel", scale=5):
496
+ reverb_audio = gr.Audio(label="audio")
497
+ reverb_kwargs = gr.Textbox(lines=8, label="kwargs")
498
+ reverb_engine = gr.Dropdown(choices=reverb_choices, value=reverb_choices[0], label="engine")
499
+ reverb_button = gr.Button(variant="primary")
500
 
501
+ with gr.Column(variant="panel", scale=5):
502
+ reverb_output_audio = gr.Audio(label="output_audio")
503
+ reverb_log = gr.Text(label="log")
504
+ gr.Examples(
505
+ examples=reverb_examples,
506
+ inputs=[reverb_audio, reverb_kwargs, reverb_engine],
507
+ outputs=[
508
+ reverb_output_audio, reverb_log
509
+ ],
510
+ fn=when_click_reverb,
511
+ )
512
+ reverb_button.click(
513
+ when_click_reverb,
514
+ inputs=[reverb_audio, reverb_kwargs, reverb_engine],
515
+ outputs=[
516
+ reverb_output_audio, reverb_log
517
+ ],
518
+ )
519
  with gr.TabItem("mix"):
520
  with gr.Row():
521
  with gr.Column(variant="panel", scale=5):
requirements.txt CHANGED
@@ -5,3 +5,5 @@ scipy==1.14.1
5
  audiotsm==0.1.2
6
  audiostretchy==1.3.5
7
  tinytag==2.0.0
 
 
 
5
  audiotsm==0.1.2
6
  audiostretchy==1.3.5
7
  tinytag==2.0.0
8
+ pedalboard==0.9.16
9
+ pyroomacoustics==0.8.3
toolbox/audio_edit/reverb.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import json
4
+ from typing import List, Tuple
5
+
6
+ import numpy as np
7
+ import pedalboard
8
+ import pyroomacoustics as pra
9
+
10
+
11
+ def reverb_by_pedalboard(signal: np.ndarray,
12
+ sample_rate: int,
13
+ room_size: float = 0.25,
14
+ damping: float = 0.5,
15
+ width: float = 1.0,
16
+ dry_level: float = 0.4,
17
+ wet_level: float = 0.6,
18
+ freeze_mode: bool = False
19
+ ):
20
+
21
+ board = pedalboard.Pedalboard([
22
+ pedalboard.Reverb(
23
+ room_size=room_size,
24
+ damping=damping,
25
+ width=width,
26
+ dry_level=dry_level,
27
+ wet_level=wet_level,
28
+ freeze_mode=freeze_mode
29
+ )
30
+ ])
31
+
32
+ reverberated_audio = board.__call__(signal, sample_rate)
33
+
34
+ return reverberated_audio
35
+
36
+
37
+ def reverb_by_pyroomacoustics(signal: np.ndarray,
38
+ sample_rate: int,
39
+ room_size: Tuple[float, float] = (4.0, 6.0),
40
+ source_position: Tuple[float, float] = (2.5, 4.5),
41
+ microphone_array: List[Tuple[float, float]] = None,
42
+ output_microphone_idx: int = 0,
43
+ ):
44
+ # signal: float32, (-1, 1)
45
+ if microphone_array is None:
46
+ microphone_array = [[1.5, 1.5], [2.5, 1.5]]
47
+
48
+ # 创建一个 4x6 米的房间
49
+ room = pra.ShoeBox(room_size, fs=sample_rate)
50
+ room.add_source(source_position, signal=signal)
51
+
52
+ # 创建一个包含两个麦克风的阵列
53
+ mic_array = np.array(microphone_array)
54
+ room.add_microphone_array(mic_array)
55
+
56
+ # 计算房间冲击响应
57
+ room.compute_rir()
58
+ # 模拟声音传播
59
+ room.simulate()
60
+
61
+ # 获取麦克风接收到的信号
62
+ received_signal = room.mic_array.signals
63
+ # 假设我们只使用第一个麦克风的信号
64
+ reverberated_audio = received_signal[output_microphone_idx]
65
+ return reverberated_audio
66
+
67
+
68
+ engine_to_function = {
69
+ "pedalboard": reverb_by_pedalboard,
70
+ "pyroomacoustics": reverb_by_pyroomacoustics,
71
+ }
72
+
73
+
74
+ def reverb(signal: np.ndarray, sample_rate: int, engine: str = "pedalboard", **kwargs):
75
+ function = engine_to_function.get(engine)
76
+ if function is None:
77
+ raise AssertionError(f"invalid engine: {engine}")
78
+
79
+ reverberated_audio = function(signal=signal, sample_rate=sample_rate, **kwargs)
80
+ return reverberated_audio
81
+
82
+
83
+ if __name__ == '__main__':
84
+ pass