Spaces:
Running
Running
update
Browse files
examples/jik_trim/step_1_download_audio.py
CHANGED
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
7 |
|
8 |
import requests
|
9 |
import pandas as pd
|
|
|
10 |
|
11 |
from project_settings import project_path
|
12 |
|
@@ -35,7 +36,7 @@ def main():
|
|
35 |
|
36 |
df = pd.read_excel(args.audio_file)
|
37 |
|
38 |
-
for i, row in df.iterrows():
|
39 |
name = row["name"]
|
40 |
scene_id = row["scene_id"]
|
41 |
audio_id = row["audio_id"]
|
@@ -47,6 +48,9 @@ def main():
|
|
47 |
filename = output_dir / path[1:]
|
48 |
filename.parent.mkdir(parents=True, exist_ok=True)
|
49 |
|
|
|
|
|
|
|
50 |
resp = requests.get(audio_url)
|
51 |
with open(filename.as_posix(), "wb") as f:
|
52 |
f.write(resp.content)
|
|
|
7 |
|
8 |
import requests
|
9 |
import pandas as pd
|
10 |
+
from tqdm import tqdm
|
11 |
|
12 |
from project_settings import project_path
|
13 |
|
|
|
36 |
|
37 |
df = pd.read_excel(args.audio_file)
|
38 |
|
39 |
+
for i, row in tqdm(df.iterrows(), total=len(df)):
|
40 |
name = row["name"]
|
41 |
scene_id = row["scene_id"]
|
42 |
audio_id = row["audio_id"]
|
|
|
48 |
filename = output_dir / path[1:]
|
49 |
filename.parent.mkdir(parents=True, exist_ok=True)
|
50 |
|
51 |
+
if filename.exists():
|
52 |
+
continue
|
53 |
+
|
54 |
resp = requests.get(audio_url)
|
55 |
with open(filename.as_posix(), "wb") as f:
|
56 |
f.write(resp.content)
|
examples/jik_trim/step_2_trim_audio.py
CHANGED
@@ -4,15 +4,11 @@ import argparse
|
|
4 |
import shutil
|
5 |
from urllib.parse import urlparse
|
6 |
import json
|
7 |
-
|
8 |
-
import librosa
|
9 |
-
import numpy as np
|
10 |
from pathlib import Path
|
11 |
|
12 |
from gradio_client import Client, handle_file
|
13 |
-
import requests
|
14 |
import pandas as pd
|
15 |
-
from
|
16 |
|
17 |
from project_settings import project_path
|
18 |
|
@@ -52,7 +48,7 @@ def main():
|
|
52 |
|
53 |
df = pd.read_excel(args.audio_file)
|
54 |
|
55 |
-
for i, row in df.iterrows():
|
56 |
name = row["name"]
|
57 |
scene_id = row["scene_id"]
|
58 |
audio_id = row["audio_id"]
|
@@ -65,8 +61,8 @@ def main():
|
|
65 |
|
66 |
kwargs = {
|
67 |
"silence_threshold": -40,
|
68 |
-
"min_silence_len":
|
69 |
-
"min_kept_silence":
|
70 |
"mode": "trim"
|
71 |
}
|
72 |
kwargs = json.dumps(kwargs, ensure_ascii=False, indent=4)
|
|
|
4 |
import shutil
|
5 |
from urllib.parse import urlparse
|
6 |
import json
|
|
|
|
|
|
|
7 |
from pathlib import Path
|
8 |
|
9 |
from gradio_client import Client, handle_file
|
|
|
10 |
import pandas as pd
|
11 |
+
from tqdm import tqdm
|
12 |
|
13 |
from project_settings import project_path
|
14 |
|
|
|
48 |
|
49 |
df = pd.read_excel(args.audio_file)
|
50 |
|
51 |
+
for i, row in tqdm(df.iterrows(), total=len(df)):
|
52 |
name = row["name"]
|
53 |
scene_id = row["scene_id"]
|
54 |
audio_id = row["audio_id"]
|
|
|
61 |
|
62 |
kwargs = {
|
63 |
"silence_threshold": -40,
|
64 |
+
"min_silence_len": 50,
|
65 |
+
"min_kept_silence": 0,
|
66 |
"mode": "trim"
|
67 |
}
|
68 |
kwargs = json.dumps(kwargs, ensure_ascii=False, indent=4)
|
examples/jik_trim/step_3_upload_to_obs.py
CHANGED
@@ -1,18 +1,13 @@
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
import argparse
|
4 |
-
import shutil
|
5 |
from urllib.parse import urlparse
|
6 |
import json
|
7 |
-
|
8 |
-
import librosa
|
9 |
-
import numpy as np
|
10 |
from pathlib import Path
|
11 |
|
12 |
from gradio_client import Client, handle_file
|
13 |
-
import requests
|
14 |
import pandas as pd
|
15 |
-
from
|
16 |
|
17 |
from project_settings import project_path, environment
|
18 |
|
@@ -67,7 +62,7 @@ def main():
|
|
67 |
|
68 |
df = pd.read_excel(args.audio_file)
|
69 |
|
70 |
-
for i, row in df.iterrows():
|
71 |
name = row["name"]
|
72 |
scene_id = row["scene_id"]
|
73 |
audio_id = row["audio_id"]
|
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
import argparse
|
|
|
4 |
from urllib.parse import urlparse
|
5 |
import json
|
|
|
|
|
|
|
6 |
from pathlib import Path
|
7 |
|
8 |
from gradio_client import Client, handle_file
|
|
|
9 |
import pandas as pd
|
10 |
+
from tqdm import tqdm
|
11 |
|
12 |
from project_settings import project_path, environment
|
13 |
|
|
|
62 |
|
63 |
df = pd.read_excel(args.audio_file)
|
64 |
|
65 |
+
for i, row in tqdm(df.iterrows(), total=len(df)):
|
66 |
name = row["name"]
|
67 |
scene_id = row["scene_id"]
|
68 |
audio_id = row["audio_id"]
|
main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
"""
|
4 |
-
docker build -t audio_edit:
|
5 |
|
6 |
docker stop audio_edit_7861 && docker rm audio_edit_7861
|
7 |
|
@@ -10,7 +10,7 @@ docker run -itd \
|
|
10 |
--restart=always \
|
11 |
--network host \
|
12 |
-e port=7861 \
|
13 |
-
audio_edit:
|
14 |
"""
|
15 |
import argparse
|
16 |
import asyncio
|
@@ -23,6 +23,9 @@ from typing import Tuple, List
|
|
23 |
import uuid
|
24 |
|
25 |
import gradio as gr
|
|
|
|
|
|
|
26 |
import numpy as np
|
27 |
from scipy.io import wavfile
|
28 |
|
@@ -72,6 +75,46 @@ def save_input_audio(sample_rate: int, signal: np.ndarray) -> str:
|
|
72 |
return filename
|
73 |
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def when_click_get_audio_info(audio_t, engine: str):
|
76 |
sample_rate, signal = audio_t
|
77 |
filename = save_input_audio(sample_rate, signal)
|
@@ -81,11 +124,18 @@ def when_click_get_audio_info(audio_t, engine: str):
|
|
81 |
try:
|
82 |
info: dict = get_audio_info(filename, engine)
|
83 |
result = json.dumps(info, ensure_ascii=False, indent=4)
|
|
|
|
|
|
|
|
|
|
|
84 |
except Exception as e:
|
85 |
result = None
|
86 |
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
|
|
|
|
87 |
|
88 |
-
return result, message
|
89 |
|
90 |
|
91 |
def when_click_audio_convert(audio_t,
|
@@ -485,6 +535,8 @@ def main():
|
|
485 |
with gr.Column(variant="panel", scale=5):
|
486 |
info_output = gr.Text(label="output")
|
487 |
info_log = gr.Text(label="log")
|
|
|
|
|
488 |
|
489 |
gr.Examples(
|
490 |
examples=[
|
@@ -492,13 +544,13 @@ def main():
|
|
492 |
for filename in examples_dir.glob("**/*.wav")
|
493 |
],
|
494 |
inputs=[info_audio, info_engine],
|
495 |
-
outputs=[info_output, info_log],
|
496 |
fn=when_click_get_audio_info,
|
497 |
)
|
498 |
info_button.click(
|
499 |
when_click_get_audio_info,
|
500 |
inputs=[info_audio, info_engine],
|
501 |
-
outputs=[info_output, info_log]
|
502 |
)
|
503 |
with gr.TabItem("convert"):
|
504 |
with gr.Row():
|
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
"""
|
4 |
+
docker build -t audio_edit:v20250521_0954 .
|
5 |
|
6 |
docker stop audio_edit_7861 && docker rm audio_edit_7861
|
7 |
|
|
|
10 |
--restart=always \
|
11 |
--network host \
|
12 |
-e port=7861 \
|
13 |
+
audio_edit:v20250521_0954
|
14 |
"""
|
15 |
import argparse
|
16 |
import asyncio
|
|
|
23 |
import uuid
|
24 |
|
25 |
import gradio as gr
|
26 |
+
import librosa
|
27 |
+
import librosa.display
|
28 |
+
import matplotlib.pyplot as plt
|
29 |
import numpy as np
|
30 |
from scipy.io import wavfile
|
31 |
|
|
|
75 |
return filename
|
76 |
|
77 |
|
78 |
+
def generate_spectrogram1(signal: np.ndarray, sample_rate: int = 8000, title: str = "Spectrogram"):
|
79 |
+
mag = np.abs(librosa.stft(signal))
|
80 |
+
# mag shape: [f, t]
|
81 |
+
|
82 |
+
# mag_db = librosa.amplitude_to_db(mag, ref=np.max)
|
83 |
+
mag_db = librosa.amplitude_to_db(mag, ref=20)
|
84 |
+
|
85 |
+
plt.figure(figsize=(10, 4))
|
86 |
+
librosa.display.specshow(mag_db, sr=sample_rate)
|
87 |
+
plt.title(title)
|
88 |
+
|
89 |
+
temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
|
90 |
+
plt.savefig(temp_file.name, bbox_inches="tight")
|
91 |
+
plt.close()
|
92 |
+
return temp_file.name
|
93 |
+
|
94 |
+
|
95 |
+
def generate_spectrogram2(signal: np.ndarray, sample_rate: int = 8000, title: str = "Spectrogram"):
|
96 |
+
mag = np.abs(librosa.stft(signal))
|
97 |
+
# mag shape: [f, t]
|
98 |
+
spectrum = mag
|
99 |
+
# spectrum shape: [t, f]
|
100 |
+
|
101 |
+
spectrum = np.log(spectrum)
|
102 |
+
|
103 |
+
xmax = 15
|
104 |
+
xmin = -40
|
105 |
+
gray = 255 * (spectrum - xmin) / (xmax - xmin)
|
106 |
+
gray = np.array(gray, dtype=np.uint8)
|
107 |
+
|
108 |
+
plt.figure(figsize=(10, 4))
|
109 |
+
librosa.display.specshow(gray, sr=sample_rate)
|
110 |
+
plt.title(title)
|
111 |
+
|
112 |
+
temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
|
113 |
+
plt.savefig(temp_file.name, bbox_inches="tight")
|
114 |
+
plt.close()
|
115 |
+
return temp_file.name
|
116 |
+
|
117 |
+
|
118 |
def when_click_get_audio_info(audio_t, engine: str):
|
119 |
sample_rate, signal = audio_t
|
120 |
filename = save_input_audio(sample_rate, signal)
|
|
|
124 |
try:
|
125 |
info: dict = get_audio_info(filename, engine)
|
126 |
result = json.dumps(info, ensure_ascii=False, indent=4)
|
127 |
+
|
128 |
+
signal_ = np.array(signal / (1 << 15), dtype=np.float32)
|
129 |
+
spec_image1 = generate_spectrogram1(signal_, title="spec")
|
130 |
+
spec_image2 = generate_spectrogram2(signal_, title="spec")
|
131 |
+
|
132 |
except Exception as e:
|
133 |
result = None
|
134 |
message = f"failed. error type: {type(e)}, error text: {str(e)}"
|
135 |
+
spec_image1 = None
|
136 |
+
spec_image2 = None
|
137 |
|
138 |
+
return result, message, spec_image1, spec_image2
|
139 |
|
140 |
|
141 |
def when_click_audio_convert(audio_t,
|
|
|
535 |
with gr.Column(variant="panel", scale=5):
|
536 |
info_output = gr.Text(label="output")
|
537 |
info_log = gr.Text(label="log")
|
538 |
+
info_spec_image1 = gr.Image(label="spec_image1")
|
539 |
+
info_spec_image2 = gr.Image(label="spec_image2")
|
540 |
|
541 |
gr.Examples(
|
542 |
examples=[
|
|
|
544 |
for filename in examples_dir.glob("**/*.wav")
|
545 |
],
|
546 |
inputs=[info_audio, info_engine],
|
547 |
+
outputs=[info_output, info_log, info_spec_image1, info_spec_image2],
|
548 |
fn=when_click_get_audio_info,
|
549 |
)
|
550 |
info_button.click(
|
551 |
when_click_get_audio_info,
|
552 |
inputs=[info_audio, info_engine],
|
553 |
+
outputs=[info_output, info_log, info_spec_image1, info_spec_image2]
|
554 |
)
|
555 |
with gr.TabItem("convert"):
|
556 |
with gr.Row():
|
toolbox/audio_edit/info.py
CHANGED
@@ -117,5 +117,5 @@ def main():
|
|
117 |
return
|
118 |
|
119 |
|
120 |
-
if __name__ ==
|
121 |
main()
|
|
|
117 |
return
|
118 |
|
119 |
|
120 |
+
if __name__ == "__main__":
|
121 |
main()
|