Fix: Improve spectrogram rendering stability and accuracy
Browse filesReplaced manual time scaling with librosa.time_to_frames to prevent the visualization from freezing at the end of the video. Added a safety check for empty spectrograms to avoid crashes with very short audio clips.
app.py
CHANGED
|
@@ -505,8 +505,6 @@ def process_audio_to_video(*args, progress=gr.Progress(track_tqdm=True)):
|
|
| 505 |
|
| 506 |
text_clips.append(txt_clip)
|
| 507 |
|
| 508 |
-
|
| 509 |
-
|
| 510 |
N_FFT, HOP_LENGTH, N_BANDS = 2048, 512, 32
|
| 511 |
MIN_DB, MAX_DB = -80.0, 0.0
|
| 512 |
|
|
@@ -529,8 +527,22 @@ def process_audio_to_video(*args, progress=gr.Progress(track_tqdm=True)):
|
|
| 529 |
if not image_clips:
|
| 530 |
for i in range(1, 9):
|
| 531 |
y_pos = int(i * (HEIGHT / 9)); frame[y_pos-1:y_pos, :] = grid_rgb
|
| 532 |
-
|
| 533 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
bar_width = WIDTH / N_BANDS
|
| 535 |
for i in range(N_BANDS):
|
| 536 |
energy_db = S_mel_db[i, time_idx]
|
|
|
|
| 505 |
|
| 506 |
text_clips.append(txt_clip)
|
| 507 |
|
|
|
|
|
|
|
| 508 |
N_FFT, HOP_LENGTH, N_BANDS = 2048, 512, 32
|
| 509 |
MIN_DB, MAX_DB = -80.0, 0.0
|
| 510 |
|
|
|
|
| 527 |
if not image_clips:
|
| 528 |
for i in range(1, 9):
|
| 529 |
y_pos = int(i * (HEIGHT / 9)); frame[y_pos-1:y_pos, :] = grid_rgb
|
| 530 |
+
|
| 531 |
+
# 1. Safety Check: If the spectrogram has no time frames (e.g., from an extremely short audio file),
|
| 532 |
+
# return a blank frame immediately to prevent an IndexError.
|
| 533 |
+
if S_mel_db.shape[1] == 0:
|
| 534 |
+
return frame
|
| 535 |
+
|
| 536 |
+
# 2. Use librosa.time_to_frames to accurately convert the video time `t`
|
| 537 |
+
# into a spectrogram frame index. This is far more reliable than manual scaling
|
| 538 |
+
# and solves the problem of missing content on the rightmost side of the video.
|
| 539 |
+
time_idx = librosa.time_to_frames(t, sr=current_sr, hop_length=HOP_LENGTH)
|
| 540 |
+
|
| 541 |
+
# 3. Boundary Protection: Although time_to_frames is accurate, this extra `min`
|
| 542 |
+
# call acts as a safeguard to ensure the index never exceeds the array's
|
| 543 |
+
# maximum valid index, preventing any edge-case errors.
|
| 544 |
+
time_idx = min(time_idx, S_mel_db.shape[1] - 1)
|
| 545 |
+
|
| 546 |
bar_width = WIDTH / N_BANDS
|
| 547 |
for i in range(N_BANDS):
|
| 548 |
energy_db = S_mel_db[i, time_idx]
|