root
commited on
Commit
·
50549e3
1
Parent(s):
70a625a
ss
Browse files
app.py
CHANGED
@@ -2539,7 +2539,9 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2539 |
|
2540 |
# Format the timeline
|
2541 |
timeline = "=== BEAT & SYLLABLE TIMELINE ===\n\n"
|
2542 |
-
|
|
|
|
|
2543 |
timeline += f"Time Signature: {beats_info['time_signature']}/4\n"
|
2544 |
timeline += f"Total Beats: {beats_info['beat_count']}\n\n"
|
2545 |
|
@@ -2549,6 +2551,10 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2549 |
|
2550 |
# Add beat-by-beat information
|
2551 |
for i, (time, strength) in enumerate(zip(beats_info['beat_times'], beats_info['beat_strengths'])):
|
|
|
|
|
|
|
|
|
2552 |
# Determine beat type based on strength
|
2553 |
if strength >= 0.8:
|
2554 |
beat_type = "STRONG"
|
@@ -2580,7 +2586,9 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2580 |
|
2581 |
# Calculate total duration and create time markers
|
2582 |
if beats_info['beat_times']:
|
2583 |
-
|
|
|
|
|
2584 |
else:
|
2585 |
total_duration = 30 # Default duration if no beats found
|
2586 |
|
@@ -2606,13 +2614,19 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2606 |
if i >= len(beats_info['beat_strengths']):
|
2607 |
break
|
2608 |
|
|
|
|
|
|
|
2609 |
# Determine position in the timeline
|
2610 |
-
pos = int(
|
2611 |
if pos >= len(beat_line):
|
2612 |
continue
|
2613 |
|
2614 |
# Determine beat type based on strength and position
|
2615 |
strength = beats_info['beat_strengths'][i]
|
|
|
|
|
|
|
2616 |
if i % beats_info['time_signature'] == 0:
|
2617 |
beat_line[pos] = "S" # Strong beat at start of measure
|
2618 |
elif strength >= 0.8:
|
@@ -2639,7 +2653,9 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2639 |
measure_starts = []
|
2640 |
for i, time in enumerate(beats_info['beat_times']):
|
2641 |
if i % beats_info['time_signature'] == 0: # Start of measure
|
2642 |
-
|
|
|
|
|
2643 |
|
2644 |
# Format measure information
|
2645 |
if measure_starts:
|
@@ -2653,7 +2669,9 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2653 |
if i < len(measure_starts) - 1:
|
2654 |
end_time = measure_starts[i+1][1]
|
2655 |
elif beats_info['beat_times']:
|
2656 |
-
|
|
|
|
|
2657 |
else:
|
2658 |
end_time = start_time + 2.0 # Default 2 seconds if no next measure
|
2659 |
|
@@ -2678,17 +2696,25 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2678 |
end_beat = phrase[-1]
|
2679 |
if start_beat >= len(beats_info['beat_times']) or end_beat >= len(beats_info['beat_times']):
|
2680 |
continue
|
2681 |
-
|
|
|
2682 |
phrase_start = beats_info['beat_times'][start_beat]
|
|
|
|
|
2683 |
phrase_end = beats_info['beat_times'][end_beat]
|
|
|
2684 |
|
2685 |
timeline += f"Phrase {i+1}: Beats {start_beat+1}-{end_beat+1} ({phrase_start:.2f}s - {phrase_end:.2f}s)\n"
|
2686 |
|
2687 |
# Create syllable template for this phrase
|
2688 |
phrase_beats = {
|
2689 |
-
"beat_times": [beats_info['beat_times'][j]
|
2690 |
-
|
2691 |
-
|
|
|
|
|
|
|
|
|
2692 |
"time_signature": beats_info['time_signature'],
|
2693 |
"phrases": [list(range(len(phrase)))]
|
2694 |
}
|
@@ -2715,6 +2741,7 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2715 |
for j in phrase:
|
2716 |
if j < len(beats_info['beat_times']):
|
2717 |
beat_time = beats_info['beat_times'][j]
|
|
|
2718 |
beat_pos = int(beat_time * 2)
|
2719 |
|
2720 |
if beat_pos < len(phrase_visualization) and beat_pos != start_pos and beat_pos != end_pos:
|
@@ -2766,8 +2793,13 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2766 |
if phrase and phrase[0] < len(beats_info['beat_times']) and phrase[-1] < len(beats_info['beat_times']):
|
2767 |
start_beat = phrase[0]
|
2768 |
end_beat = phrase[-1]
|
|
|
2769 |
start_time = beats_info['beat_times'][start_beat]
|
|
|
|
|
2770 |
end_time = beats_info['beat_times'][end_beat]
|
|
|
|
|
2771 |
timeline += f" Timing: {start_time:.2f}s - {end_time:.2f}s\n"
|
2772 |
|
2773 |
# Create a visualization of syllable alignment
|
@@ -2781,6 +2813,7 @@ def format_beat_timeline(audio_file, lyrics=None):
|
|
2781 |
for j in phrase:
|
2782 |
if j < len(beats_info['beat_times']):
|
2783 |
beat_time = beats_info['beat_times'][j]
|
|
|
2784 |
relative_pos = int((beat_time - start_time) / phrase_duration * syllable_count)
|
2785 |
|
2786 |
while len(syllable_viz) <= relative_pos:
|
|
|
2539 |
|
2540 |
# Format the timeline
|
2541 |
timeline = "=== BEAT & SYLLABLE TIMELINE ===\n\n"
|
2542 |
+
# Convert tempo to float before formatting if it's a numpy array
|
2543 |
+
tempo = float(beats_info['tempo']) if isinstance(beats_info['tempo'], np.ndarray) else beats_info['tempo']
|
2544 |
+
timeline += f"Tempo: {tempo:.1f} BPM\n"
|
2545 |
timeline += f"Time Signature: {beats_info['time_signature']}/4\n"
|
2546 |
timeline += f"Total Beats: {beats_info['beat_count']}\n\n"
|
2547 |
|
|
|
2551 |
|
2552 |
# Add beat-by-beat information
|
2553 |
for i, (time, strength) in enumerate(zip(beats_info['beat_times'], beats_info['beat_strengths'])):
|
2554 |
+
# Convert numpy values to Python float if needed
|
2555 |
+
time = float(time) if isinstance(time, np.ndarray) else time
|
2556 |
+
strength = float(strength) if isinstance(strength, np.ndarray) else strength
|
2557 |
+
|
2558 |
# Determine beat type based on strength
|
2559 |
if strength >= 0.8:
|
2560 |
beat_type = "STRONG"
|
|
|
2586 |
|
2587 |
# Calculate total duration and create time markers
|
2588 |
if beats_info['beat_times']:
|
2589 |
+
# Convert to float if it's a numpy array
|
2590 |
+
max_beat_time = float(max(beats_info['beat_times'])) if isinstance(max(beats_info['beat_times']), np.ndarray) else max(beats_info['beat_times'])
|
2591 |
+
total_duration = max_beat_time + 2 # Add 2 seconds of padding
|
2592 |
else:
|
2593 |
total_duration = 30 # Default duration if no beats found
|
2594 |
|
|
|
2614 |
if i >= len(beats_info['beat_strengths']):
|
2615 |
break
|
2616 |
|
2617 |
+
# Convert to float if it's a numpy array
|
2618 |
+
time_val = float(time) if isinstance(time, np.ndarray) else time
|
2619 |
+
|
2620 |
# Determine position in the timeline
|
2621 |
+
pos = int(time_val * 2) # Convert to position in the beat_line
|
2622 |
if pos >= len(beat_line):
|
2623 |
continue
|
2624 |
|
2625 |
# Determine beat type based on strength and position
|
2626 |
strength = beats_info['beat_strengths'][i]
|
2627 |
+
# Convert to float if it's a numpy array
|
2628 |
+
strength = float(strength) if isinstance(strength, np.ndarray) else strength
|
2629 |
+
|
2630 |
if i % beats_info['time_signature'] == 0:
|
2631 |
beat_line[pos] = "S" # Strong beat at start of measure
|
2632 |
elif strength >= 0.8:
|
|
|
2653 |
measure_starts = []
|
2654 |
for i, time in enumerate(beats_info['beat_times']):
|
2655 |
if i % beats_info['time_signature'] == 0: # Start of measure
|
2656 |
+
# Convert to float if it's a numpy array
|
2657 |
+
time_val = float(time) if isinstance(time, np.ndarray) else time
|
2658 |
+
measure_starts.append((i // beats_info['time_signature'] + 1, time_val))
|
2659 |
|
2660 |
# Format measure information
|
2661 |
if measure_starts:
|
|
|
2669 |
if i < len(measure_starts) - 1:
|
2670 |
end_time = measure_starts[i+1][1]
|
2671 |
elif beats_info['beat_times']:
|
2672 |
+
# Get the last beat time and convert to float if needed
|
2673 |
+
last_beat = beats_info['beat_times'][-1]
|
2674 |
+
end_time = float(last_beat) if isinstance(last_beat, np.ndarray) else last_beat
|
2675 |
else:
|
2676 |
end_time = start_time + 2.0 # Default 2 seconds if no next measure
|
2677 |
|
|
|
2696 |
end_beat = phrase[-1]
|
2697 |
if start_beat >= len(beats_info['beat_times']) or end_beat >= len(beats_info['beat_times']):
|
2698 |
continue
|
2699 |
+
|
2700 |
+
# Convert to float if needed
|
2701 |
phrase_start = beats_info['beat_times'][start_beat]
|
2702 |
+
phrase_start = float(phrase_start) if isinstance(phrase_start, np.ndarray) else phrase_start
|
2703 |
+
|
2704 |
phrase_end = beats_info['beat_times'][end_beat]
|
2705 |
+
phrase_end = float(phrase_end) if isinstance(phrase_end, np.ndarray) else phrase_end
|
2706 |
|
2707 |
timeline += f"Phrase {i+1}: Beats {start_beat+1}-{end_beat+1} ({phrase_start:.2f}s - {phrase_end:.2f}s)\n"
|
2708 |
|
2709 |
# Create syllable template for this phrase
|
2710 |
phrase_beats = {
|
2711 |
+
"beat_times": [float(beats_info['beat_times'][j]) if isinstance(beats_info['beat_times'][j], np.ndarray)
|
2712 |
+
else beats_info['beat_times'][j]
|
2713 |
+
for j in phrase if j < len(beats_info['beat_times'])],
|
2714 |
+
"beat_strengths": [float(beats_info['beat_strengths'][j]) if isinstance(beats_info['beat_strengths'][j], np.ndarray)
|
2715 |
+
else beats_info['beat_strengths'][j]
|
2716 |
+
for j in phrase if j < len(beats_info['beat_strengths'])],
|
2717 |
+
"tempo": float(beats_info['tempo']) if isinstance(beats_info['tempo'], np.ndarray) else beats_info['tempo'],
|
2718 |
"time_signature": beats_info['time_signature'],
|
2719 |
"phrases": [list(range(len(phrase)))]
|
2720 |
}
|
|
|
2741 |
for j in phrase:
|
2742 |
if j < len(beats_info['beat_times']):
|
2743 |
beat_time = beats_info['beat_times'][j]
|
2744 |
+
beat_time = float(beat_time) if isinstance(beat_time, np.ndarray) else beat_time
|
2745 |
beat_pos = int(beat_time * 2)
|
2746 |
|
2747 |
if beat_pos < len(phrase_visualization) and beat_pos != start_pos and beat_pos != end_pos:
|
|
|
2793 |
if phrase and phrase[0] < len(beats_info['beat_times']) and phrase[-1] < len(beats_info['beat_times']):
|
2794 |
start_beat = phrase[0]
|
2795 |
end_beat = phrase[-1]
|
2796 |
+
|
2797 |
start_time = beats_info['beat_times'][start_beat]
|
2798 |
+
start_time = float(start_time) if isinstance(start_time, np.ndarray) else start_time
|
2799 |
+
|
2800 |
end_time = beats_info['beat_times'][end_beat]
|
2801 |
+
end_time = float(end_time) if isinstance(end_time, np.ndarray) else end_time
|
2802 |
+
|
2803 |
timeline += f" Timing: {start_time:.2f}s - {end_time:.2f}s\n"
|
2804 |
|
2805 |
# Create a visualization of syllable alignment
|
|
|
2813 |
for j in phrase:
|
2814 |
if j < len(beats_info['beat_times']):
|
2815 |
beat_time = beats_info['beat_times'][j]
|
2816 |
+
beat_time = float(beat_time) if isinstance(beat_time, np.ndarray) else beat_time
|
2817 |
relative_pos = int((beat_time - start_time) / phrase_duration * syllable_count)
|
2818 |
|
2819 |
while len(syllable_viz) <= relative_pos:
|