Spaces:
Running
Running
add trim audio
Browse files- examples/batch_audio_fmt_convert.py +7 -3
- main.py +64 -4
- requirements.txt +1 -1
- toolbox/audio_edit/trim.py +152 -0
examples/batch_audio_fmt_convert.py
CHANGED
@@ -18,13 +18,13 @@ def get_args():
|
|
18 |
parser.add_argument(
|
19 |
"--audio_dir",
|
20 |
# default=(project_path / "data/yd").as_posix(),
|
21 |
-
default=r"
|
22 |
type=str,
|
23 |
)
|
24 |
parser.add_argument(
|
25 |
"--output_dir",
|
26 |
# default=(project_path / "data/temp_wav").as_posix(),
|
27 |
-
default=r"
|
28 |
type=str,
|
29 |
)
|
30 |
args = parser.parse_args()
|
@@ -44,10 +44,14 @@ def main():
|
|
44 |
basename = filename.stem
|
45 |
relative_dir = filename.parent.relative_to(audio_dir)
|
46 |
|
47 |
-
signal, sample_rate = librosa.load(filename, sr=8000)
|
|
|
48 |
# print(signal.shape)
|
49 |
# print(signal.dtype)
|
50 |
# exit(0)
|
|
|
|
|
|
|
51 |
signal *= max_wave_value
|
52 |
signal = np.array(signal, dtype=np.int16)
|
53 |
|
|
|
18 |
parser.add_argument(
|
19 |
"--audio_dir",
|
20 |
# default=(project_path / "data/yd").as_posix(),
|
21 |
+
default=r"C:\Users\tianx\Desktop\sample-audio",
|
22 |
type=str,
|
23 |
)
|
24 |
parser.add_argument(
|
25 |
"--output_dir",
|
26 |
# default=(project_path / "data/temp_wav").as_posix(),
|
27 |
+
default=r"C:\Users\tianx\Desktop\sample-audio2",
|
28 |
type=str,
|
29 |
)
|
30 |
args = parser.parse_args()
|
|
|
44 |
basename = filename.stem
|
45 |
relative_dir = filename.parent.relative_to(audio_dir)
|
46 |
|
47 |
+
signal, sample_rate = librosa.load(filename, mono=False, sr=8000)
|
48 |
+
|
49 |
# print(signal.shape)
|
50 |
# print(signal.dtype)
|
51 |
# exit(0)
|
52 |
+
if not signal.ndim == 2:
|
53 |
+
raise AssertionError
|
54 |
+
signal = signal[0]
|
55 |
signal *= max_wave_value
|
56 |
signal = np.array(signal, dtype=np.int16)
|
57 |
|
main.py
CHANGED
@@ -1,14 +1,16 @@
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
"""
|
4 |
-
docker build -t audio_edit:
|
|
|
|
|
5 |
|
6 |
docker run -itd \
|
7 |
--name audio_edit_7861 \
|
8 |
--restart=always \
|
9 |
--network host \
|
10 |
-e port=7861 \
|
11 |
-
audio_edit:
|
12 |
"""
|
13 |
import argparse
|
14 |
import json
|
@@ -30,6 +32,7 @@ from toolbox.audio_edit.volume import change_volume, engine_to_function as volum
|
|
30 |
from toolbox.audio_edit.augment import mix_speech_and_noise
|
31 |
from toolbox.audio_edit.reverb import reverb, engine_to_function as reverb_engine_to_function
|
32 |
from toolbox.os.command import Command
|
|
|
33 |
|
34 |
|
35 |
def get_args():
|
@@ -180,6 +183,27 @@ def when_click_pad_audio(audio_t, pad_seconds: int = 10, pad_mode: str = "zero")
|
|
180 |
return (sample_rate, pad_signal), message
|
181 |
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
def when_click_reverb(audio_t, kwargs: str, engine: str):
|
184 |
sample_rate, signal = audio_t
|
185 |
|
@@ -278,6 +302,15 @@ pad_audio_examples = [
|
|
278 |
]
|
279 |
|
280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
reverb_examples = [
|
282 |
[
|
283 |
(project_path / "data/examples/default/audio_0_2.wav").as_posix(),
|
@@ -328,6 +361,7 @@ def main():
|
|
328 |
cvt_choices = list(cvt_engine_to_function.keys())
|
329 |
speed_choices = list(speed_engine_to_function.keys())
|
330 |
volume_choices = list(volume_engine_to_function.keys())
|
|
|
331 |
reverb_choices = list(reverb_engine_to_function.keys())
|
332 |
|
333 |
# ui
|
@@ -499,6 +533,32 @@ def main():
|
|
499 |
pad_output_audio, pad_log
|
500 |
],
|
501 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
502 |
with gr.TabItem("reverb"):
|
503 |
with gr.Row():
|
504 |
with gr.Column(variant="panel", scale=5):
|
@@ -568,8 +628,8 @@ def main():
|
|
568 |
# http://10.75.27.247:7861/
|
569 |
blocks.queue().launch(
|
570 |
share=False if platform.system() == "Windows" else False,
|
571 |
-
|
572 |
-
server_name="0.0.0.0",
|
573 |
server_port=environment.get("port", 7860, dtype=int),
|
574 |
)
|
575 |
return
|
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
"""
|
4 |
+
docker build -t audio_edit:v20250314_1357 .
|
5 |
+
|
6 |
+
docker stop audio_edit_7861 && docker rm audio_edit_7861
|
7 |
|
8 |
docker run -itd \
|
9 |
--name audio_edit_7861 \
|
10 |
--restart=always \
|
11 |
--network host \
|
12 |
-e port=7861 \
|
13 |
+
audio_edit:v20250314_1357
|
14 |
"""
|
15 |
import argparse
|
16 |
import json
|
|
|
32 |
from toolbox.audio_edit.augment import mix_speech_and_noise
|
33 |
from toolbox.audio_edit.reverb import reverb, engine_to_function as reverb_engine_to_function
|
34 |
from toolbox.os.command import Command
|
35 |
+
from toolbox.audio_edit.trim import audio_trim, engine_to_function as trim_engine_to_function
|
36 |
|
37 |
|
38 |
def get_args():
|
|
|
183 |
return (sample_rate, pad_signal), message
|
184 |
|
185 |
|
186 |
+
def when_click_trim_audio(audio_t, kwargs: str, engine: str):
|
187 |
+
sample_rate, signal = audio_t
|
188 |
+
filename = save_input_audio(sample_rate, signal)
|
189 |
+
|
190 |
+
message = "success"
|
191 |
+
try:
|
192 |
+
kwargs = json.loads(kwargs)
|
193 |
+
output_file, ext = audio_trim(
|
194 |
+
filename=filename,
|
195 |
+
engine=engine,
|
196 |
+
**kwargs,
|
197 |
+
)
|
198 |
+
ext = json.dumps(ext, ensure_ascii=False, indent=4)
|
199 |
+
message += f"\n\n{ext}"
|
200 |
+
except Exception as e:
|
201 |
+
output_file = None
|
202 |
+
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
203 |
+
|
204 |
+
return output_file, message
|
205 |
+
|
206 |
+
|
207 |
def when_click_reverb(audio_t, kwargs: str, engine: str):
|
208 |
sample_rate, signal = audio_t
|
209 |
|
|
|
302 |
]
|
303 |
|
304 |
|
305 |
+
trim_examples = [
|
306 |
+
[
|
307 |
+
(project_path / "data/examples/mix/speech/000f62f5-5b05-4494-a8db-0eaca3ebd871_th-TH_1678353399860.wav").as_posix(),
|
308 |
+
'{\n "silence_threshold": -40,\n "min_silence_len": 200,\n "min_kept_silence": 200,\n "mode": "trim"\n}',
|
309 |
+
"pydub",
|
310 |
+
]
|
311 |
+
]
|
312 |
+
|
313 |
+
|
314 |
reverb_examples = [
|
315 |
[
|
316 |
(project_path / "data/examples/default/audio_0_2.wav").as_posix(),
|
|
|
361 |
cvt_choices = list(cvt_engine_to_function.keys())
|
362 |
speed_choices = list(speed_engine_to_function.keys())
|
363 |
volume_choices = list(volume_engine_to_function.keys())
|
364 |
+
trim_choices = list(trim_engine_to_function.keys())
|
365 |
reverb_choices = list(reverb_engine_to_function.keys())
|
366 |
|
367 |
# ui
|
|
|
533 |
pad_output_audio, pad_log
|
534 |
],
|
535 |
)
|
536 |
+
with gr.TabItem("trim"):
|
537 |
+
with gr.Row():
|
538 |
+
with gr.Column(variant="panel", scale=5):
|
539 |
+
trim_audio = gr.Audio(label="audio")
|
540 |
+
trim_kwargs = gr.Textbox(lines=8, label="kwargs")
|
541 |
+
trim_engine = gr.Dropdown(choices=trim_choices, value=trim_choices[0], label="engine")
|
542 |
+
trim_button = gr.Button(variant="primary")
|
543 |
+
|
544 |
+
with gr.Column(variant="panel", scale=5):
|
545 |
+
trim_output_audio = gr.Audio(label="output_audio")
|
546 |
+
trim_log = gr.Text(label="log")
|
547 |
+
gr.Examples(
|
548 |
+
examples=trim_examples,
|
549 |
+
inputs=[trim_audio, trim_kwargs, trim_engine],
|
550 |
+
outputs=[
|
551 |
+
trim_output_audio, trim_log
|
552 |
+
],
|
553 |
+
fn=when_click_trim_audio,
|
554 |
+
)
|
555 |
+
trim_button.click(
|
556 |
+
when_click_trim_audio,
|
557 |
+
inputs=[trim_audio, trim_kwargs, trim_engine],
|
558 |
+
outputs=[
|
559 |
+
trim_output_audio, trim_log
|
560 |
+
],
|
561 |
+
)
|
562 |
with gr.TabItem("reverb"):
|
563 |
with gr.Row():
|
564 |
with gr.Column(variant="panel", scale=5):
|
|
|
628 |
# http://10.75.27.247:7861/
|
629 |
blocks.queue().launch(
|
630 |
share=False if platform.system() == "Windows" else False,
|
631 |
+
server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
|
632 |
+
# server_name="0.0.0.0",
|
633 |
server_port=environment.get("port", 7860, dtype=int),
|
634 |
)
|
635 |
return
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
gradio
|
2 |
librosa==0.10.2
|
3 |
soundfile==0.12.1
|
4 |
scipy==1.14.1
|
|
|
1 |
+
gradio
|
2 |
librosa==0.10.2
|
3 |
soundfile==0.12.1
|
4 |
scipy==1.14.1
|
toolbox/audio_edit/trim.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
from pathlib import Path
|
5 |
+
import tempfile
|
6 |
+
import uuid
|
7 |
+
|
8 |
+
import librosa
|
9 |
+
from pydub import AudioSegment
|
10 |
+
from pydub.silence import detect_silence
|
11 |
+
from scipy.io import wavfile
|
12 |
+
|
13 |
+
from project_settings import project_path
|
14 |
+
|
15 |
+
|
16 |
+
def get_args():
|
17 |
+
parser = argparse.ArgumentParser()
|
18 |
+
parser.add_argument(
|
19 |
+
"--filename",
|
20 |
+
default=(project_path / "data/examples/mix/speech/000f62f5-5b05-4494-a8db-0eaca3ebd871_th-TH_1678353399860.wav").as_posix(),
|
21 |
+
type=str,
|
22 |
+
)
|
23 |
+
args = parser.parse_args()
|
24 |
+
return args
|
25 |
+
|
26 |
+
|
27 |
+
def audio_trim_by_pydub(filename: str,
|
28 |
+
silence_threshold: int = -40,
|
29 |
+
min_silence_len: float = 1000,
|
30 |
+
min_kept_silence: float = 200,
|
31 |
+
mode: str = "trim"
|
32 |
+
):
|
33 |
+
audio = AudioSegment.from_file(filename, format="wav")
|
34 |
+
length = len(audio)
|
35 |
+
|
36 |
+
silent_ranges = detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_threshold)
|
37 |
+
|
38 |
+
output_dir = Path(tempfile.gettempdir()) / "audio_edit/trim"
|
39 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
40 |
+
output_file = output_dir / f"{uuid.uuid4()}.wav"
|
41 |
+
output_file = output_file.as_posix()
|
42 |
+
|
43 |
+
if len(silent_ranges) == 0:
|
44 |
+
audio.export(output_file)
|
45 |
+
ext = {
|
46 |
+
"begin": 0,
|
47 |
+
"end": length,
|
48 |
+
"origin_length": length,
|
49 |
+
}
|
50 |
+
return output_file, ext
|
51 |
+
|
52 |
+
begin_silence = silent_ranges[0]
|
53 |
+
begin = 0
|
54 |
+
if begin_silence[0] == 0:
|
55 |
+
begin = max(0, begin_silence[1] - min_kept_silence)
|
56 |
+
|
57 |
+
end_silence = silent_ranges[-1]
|
58 |
+
end = length
|
59 |
+
if end_silence[1] == length:
|
60 |
+
end = min(length, end_silence[0] + min_kept_silence)
|
61 |
+
|
62 |
+
if mode == "trim":
|
63 |
+
pass
|
64 |
+
elif mode == "rtrim":
|
65 |
+
begin = 0
|
66 |
+
elif mode == "ltrim":
|
67 |
+
end = length
|
68 |
+
|
69 |
+
trimmed_audio = audio[begin:end]
|
70 |
+
trimmed_audio.export(output_file)
|
71 |
+
|
72 |
+
ext = {
|
73 |
+
"begin (ms)": begin,
|
74 |
+
"end (ms)": end,
|
75 |
+
"origin_length (ms)": length,
|
76 |
+
}
|
77 |
+
return output_file, ext
|
78 |
+
|
79 |
+
|
80 |
+
def audio_trim_by_librosa(filename: str,
|
81 |
+
sample_rate: int = None,
|
82 |
+
top_db: float = 60,
|
83 |
+
frame_length: int = 2048,
|
84 |
+
hop_length: int = 512,
|
85 |
+
mode: str = "trim",
|
86 |
+
**kwargs
|
87 |
+
):
|
88 |
+
signal, sample_rate = librosa.load(filename, sr=sample_rate, mono=False)
|
89 |
+
length = len(signal)
|
90 |
+
|
91 |
+
_, index= librosa.effects.trim(
|
92 |
+
signal,
|
93 |
+
top_db=top_db, frame_length=frame_length,
|
94 |
+
hop_length=hop_length,
|
95 |
+
**kwargs
|
96 |
+
)
|
97 |
+
|
98 |
+
output_dir = Path(tempfile.gettempdir()) / "audio_edit/trim"
|
99 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
100 |
+
output_file = output_dir / f"{uuid.uuid4()}.wav"
|
101 |
+
output_file = output_file.as_posix()
|
102 |
+
|
103 |
+
begin = index[0]
|
104 |
+
end = index[1]
|
105 |
+
|
106 |
+
if mode == "trim":
|
107 |
+
pass
|
108 |
+
elif mode == "rtrim":
|
109 |
+
begin = 0
|
110 |
+
elif mode == "ltrim":
|
111 |
+
end = length
|
112 |
+
|
113 |
+
signal_trimmed = signal[begin:end]
|
114 |
+
wavfile.write(
|
115 |
+
output_file,
|
116 |
+
rate=sample_rate,
|
117 |
+
data=signal_trimmed
|
118 |
+
)
|
119 |
+
|
120 |
+
ext = {
|
121 |
+
"begin": begin,
|
122 |
+
"end": end,
|
123 |
+
"origin_length": length,
|
124 |
+
}
|
125 |
+
return output_file, ext
|
126 |
+
|
127 |
+
|
128 |
+
engine_to_function = {
|
129 |
+
"pydub": audio_trim_by_pydub,
|
130 |
+
"librosa": audio_trim_by_librosa,
|
131 |
+
}
|
132 |
+
|
133 |
+
|
134 |
+
def audio_trim(filename: str, engine: str = "librosa", **kwargs):
|
135 |
+
function = engine_to_function.get(engine)
|
136 |
+
if function is None:
|
137 |
+
raise AssertionError(f"invalid engine: {engine}")
|
138 |
+
|
139 |
+
return function(filename, **kwargs)
|
140 |
+
|
141 |
+
|
142 |
+
def main():
|
143 |
+
args = get_args()
|
144 |
+
|
145 |
+
output_file, ext = audio_trim(args.filename, engine="pydub")
|
146 |
+
# output_file, ext = audio_trim(args.filename, engine="librosa")
|
147 |
+
print(output_file)
|
148 |
+
return
|
149 |
+
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
main()
|