HoneyTian commited on
Commit
7b82434
·
1 Parent(s): 8bea69a
examples/jik_trim/step_1_download_audio.py CHANGED
@@ -7,6 +7,7 @@ from pathlib import Path
7
 
8
  import requests
9
  import pandas as pd
 
10
 
11
  from project_settings import project_path
12
 
@@ -35,7 +36,7 @@ def main():
35
 
36
  df = pd.read_excel(args.audio_file)
37
 
38
- for i, row in df.iterrows():
39
  name = row["name"]
40
  scene_id = row["scene_id"]
41
  audio_id = row["audio_id"]
@@ -47,6 +48,9 @@ def main():
47
  filename = output_dir / path[1:]
48
  filename.parent.mkdir(parents=True, exist_ok=True)
49
 
 
 
 
50
  resp = requests.get(audio_url)
51
  with open(filename.as_posix(), "wb") as f:
52
  f.write(resp.content)
 
7
 
8
  import requests
9
  import pandas as pd
10
+ from tqdm import tqdm
11
 
12
  from project_settings import project_path
13
 
 
36
 
37
  df = pd.read_excel(args.audio_file)
38
 
39
+ for i, row in tqdm(df.iterrows(), total=len(df)):
40
  name = row["name"]
41
  scene_id = row["scene_id"]
42
  audio_id = row["audio_id"]
 
48
  filename = output_dir / path[1:]
49
  filename.parent.mkdir(parents=True, exist_ok=True)
50
 
51
+ if filename.exists():
52
+ continue
53
+
54
  resp = requests.get(audio_url)
55
  with open(filename.as_posix(), "wb") as f:
56
  f.write(resp.content)
examples/jik_trim/step_2_trim_audio.py CHANGED
@@ -4,15 +4,11 @@ import argparse
4
  import shutil
5
  from urllib.parse import urlparse
6
  import json
7
-
8
- import librosa
9
- import numpy as np
10
  from pathlib import Path
11
 
12
  from gradio_client import Client, handle_file
13
- import requests
14
  import pandas as pd
15
- from scipy.io import wavfile
16
 
17
  from project_settings import project_path
18
 
@@ -52,7 +48,7 @@ def main():
52
 
53
  df = pd.read_excel(args.audio_file)
54
 
55
- for i, row in df.iterrows():
56
  name = row["name"]
57
  scene_id = row["scene_id"]
58
  audio_id = row["audio_id"]
@@ -65,8 +61,8 @@ def main():
65
 
66
  kwargs = {
67
  "silence_threshold": -40,
68
- "min_silence_len": 200,
69
- "min_kept_silence": 200,
70
  "mode": "trim"
71
  }
72
  kwargs = json.dumps(kwargs, ensure_ascii=False, indent=4)
 
4
  import shutil
5
  from urllib.parse import urlparse
6
  import json
 
 
 
7
  from pathlib import Path
8
 
9
  from gradio_client import Client, handle_file
 
10
  import pandas as pd
11
+ from tqdm import tqdm
12
 
13
  from project_settings import project_path
14
 
 
48
 
49
  df = pd.read_excel(args.audio_file)
50
 
51
+ for i, row in tqdm(df.iterrows(), total=len(df)):
52
  name = row["name"]
53
  scene_id = row["scene_id"]
54
  audio_id = row["audio_id"]
 
61
 
62
  kwargs = {
63
  "silence_threshold": -40,
64
+ "min_silence_len": 50,
65
+ "min_kept_silence": 0,
66
  "mode": "trim"
67
  }
68
  kwargs = json.dumps(kwargs, ensure_ascii=False, indent=4)
examples/jik_trim/step_3_upload_to_obs.py CHANGED
@@ -1,18 +1,13 @@
1
  #!/usr/bin/python3
2
  # -*- coding: utf-8 -*-
3
  import argparse
4
- import shutil
5
  from urllib.parse import urlparse
6
  import json
7
-
8
- import librosa
9
- import numpy as np
10
  from pathlib import Path
11
 
12
  from gradio_client import Client, handle_file
13
- import requests
14
  import pandas as pd
15
- from scipy.io import wavfile
16
 
17
  from project_settings import project_path, environment
18
 
@@ -67,7 +62,7 @@ def main():
67
 
68
  df = pd.read_excel(args.audio_file)
69
 
70
- for i, row in df.iterrows():
71
  name = row["name"]
72
  scene_id = row["scene_id"]
73
  audio_id = row["audio_id"]
 
1
  #!/usr/bin/python3
2
  # -*- coding: utf-8 -*-
3
  import argparse
 
4
  from urllib.parse import urlparse
5
  import json
 
 
 
6
  from pathlib import Path
7
 
8
  from gradio_client import Client, handle_file
 
9
  import pandas as pd
10
+ from tqdm import tqdm
11
 
12
  from project_settings import project_path, environment
13
 
 
62
 
63
  df = pd.read_excel(args.audio_file)
64
 
65
+ for i, row in tqdm(df.iterrows(), total=len(df)):
66
  name = row["name"]
67
  scene_id = row["scene_id"]
68
  audio_id = row["audio_id"]
main.py CHANGED
@@ -1,7 +1,7 @@
1
  #!/usr/bin/python3
2
  # -*- coding: utf-8 -*-
3
  """
4
- docker build -t audio_edit:v20250314_1357 .
5
 
6
  docker stop audio_edit_7861 && docker rm audio_edit_7861
7
 
@@ -10,7 +10,7 @@ docker run -itd \
10
  --restart=always \
11
  --network host \
12
  -e port=7861 \
13
- audio_edit:v20250314_1357
14
  """
15
  import argparse
16
  import asyncio
@@ -23,6 +23,9 @@ from typing import Tuple, List
23
  import uuid
24
 
25
  import gradio as gr
 
 
 
26
  import numpy as np
27
  from scipy.io import wavfile
28
 
@@ -72,6 +75,46 @@ def save_input_audio(sample_rate: int, signal: np.ndarray) -> str:
72
  return filename
73
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def when_click_get_audio_info(audio_t, engine: str):
76
  sample_rate, signal = audio_t
77
  filename = save_input_audio(sample_rate, signal)
@@ -81,11 +124,18 @@ def when_click_get_audio_info(audio_t, engine: str):
81
  try:
82
  info: dict = get_audio_info(filename, engine)
83
  result = json.dumps(info, ensure_ascii=False, indent=4)
 
 
 
 
 
84
  except Exception as e:
85
  result = None
86
  message = f"failed. error type: {type(e)}, error text: {str(e)}"
 
 
87
 
88
- return result, message
89
 
90
 
91
  def when_click_audio_convert(audio_t,
@@ -485,6 +535,8 @@ def main():
485
  with gr.Column(variant="panel", scale=5):
486
  info_output = gr.Text(label="output")
487
  info_log = gr.Text(label="log")
 
 
488
 
489
  gr.Examples(
490
  examples=[
@@ -492,13 +544,13 @@ def main():
492
  for filename in examples_dir.glob("**/*.wav")
493
  ],
494
  inputs=[info_audio, info_engine],
495
- outputs=[info_output, info_log],
496
  fn=when_click_get_audio_info,
497
  )
498
  info_button.click(
499
  when_click_get_audio_info,
500
  inputs=[info_audio, info_engine],
501
- outputs=[info_output, info_log]
502
  )
503
  with gr.TabItem("convert"):
504
  with gr.Row():
 
1
  #!/usr/bin/python3
2
  # -*- coding: utf-8 -*-
3
  """
4
+ docker build -t audio_edit:v20250521_0954 .
5
 
6
  docker stop audio_edit_7861 && docker rm audio_edit_7861
7
 
 
10
  --restart=always \
11
  --network host \
12
  -e port=7861 \
13
+ audio_edit:v20250521_0954
14
  """
15
  import argparse
16
  import asyncio
 
23
  import uuid
24
 
25
  import gradio as gr
26
+ import librosa
27
+ import librosa.display
28
+ import matplotlib.pyplot as plt
29
  import numpy as np
30
  from scipy.io import wavfile
31
 
 
75
  return filename
76
 
77
 
78
+ def generate_spectrogram1(signal: np.ndarray, sample_rate: int = 8000, title: str = "Spectrogram"):
79
+ mag = np.abs(librosa.stft(signal))
80
+ # mag shape: [f, t]
81
+
82
+ # mag_db = librosa.amplitude_to_db(mag, ref=np.max)
83
+ mag_db = librosa.amplitude_to_db(mag, ref=20)
84
+
85
+ plt.figure(figsize=(10, 4))
86
+ librosa.display.specshow(mag_db, sr=sample_rate)
87
+ plt.title(title)
88
+
89
+ temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
90
+ plt.savefig(temp_file.name, bbox_inches="tight")
91
+ plt.close()
92
+ return temp_file.name
93
+
94
+
95
+ def generate_spectrogram2(signal: np.ndarray, sample_rate: int = 8000, title: str = "Spectrogram"):
96
+ mag = np.abs(librosa.stft(signal))
97
+ # mag shape: [f, t]
98
+ spectrum = mag
99
+ # spectrum shape: [t, f]
100
+
101
+ spectrum = np.log(spectrum)
102
+
103
+ xmax = 15
104
+ xmin = -40
105
+ gray = 255 * (spectrum - xmin) / (xmax - xmin)
106
+ gray = np.array(gray, dtype=np.uint8)
107
+
108
+ plt.figure(figsize=(10, 4))
109
+ librosa.display.specshow(gray, sr=sample_rate)
110
+ plt.title(title)
111
+
112
+ temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
113
+ plt.savefig(temp_file.name, bbox_inches="tight")
114
+ plt.close()
115
+ return temp_file.name
116
+
117
+
118
  def when_click_get_audio_info(audio_t, engine: str):
119
  sample_rate, signal = audio_t
120
  filename = save_input_audio(sample_rate, signal)
 
124
  try:
125
  info: dict = get_audio_info(filename, engine)
126
  result = json.dumps(info, ensure_ascii=False, indent=4)
127
+
128
+ signal_ = np.array(signal / (1 << 15), dtype=np.float32)
129
+ spec_image1 = generate_spectrogram1(signal_, title="spec")
130
+ spec_image2 = generate_spectrogram2(signal_, title="spec")
131
+
132
  except Exception as e:
133
  result = None
134
  message = f"failed. error type: {type(e)}, error text: {str(e)}"
135
+ spec_image1 = None
136
+ spec_image2 = None
137
 
138
+ return result, message, spec_image1, spec_image2
139
 
140
 
141
  def when_click_audio_convert(audio_t,
 
535
  with gr.Column(variant="panel", scale=5):
536
  info_output = gr.Text(label="output")
537
  info_log = gr.Text(label="log")
538
+ info_spec_image1 = gr.Image(label="spec_image1")
539
+ info_spec_image2 = gr.Image(label="spec_image2")
540
 
541
  gr.Examples(
542
  examples=[
 
544
  for filename in examples_dir.glob("**/*.wav")
545
  ],
546
  inputs=[info_audio, info_engine],
547
+ outputs=[info_output, info_log, info_spec_image1, info_spec_image2],
548
  fn=when_click_get_audio_info,
549
  )
550
  info_button.click(
551
  when_click_get_audio_info,
552
  inputs=[info_audio, info_engine],
553
+ outputs=[info_output, info_log, info_spec_image1, info_spec_image2]
554
  )
555
  with gr.TabItem("convert"):
556
  with gr.Row():
toolbox/audio_edit/info.py CHANGED
@@ -117,5 +117,5 @@ def main():
117
  return
118
 
119
 
120
- if __name__ == '__main__':
121
  main()
 
117
  return
118
 
119
 
120
+ if __name__ == "__main__":
121
  main()