Spaces:

jacob-c
/

syllables_matching_experiment

Paused

App Files Files Community

root commited on May 10

Commit

50549e3

1 Parent(s): 70a625a

ss

Browse files

Files changed (1) hide show

app.py +42 -9

app.py CHANGED Viewed

@@ -2539,7 +2539,9 @@ def format_beat_timeline(audio_file, lyrics=None):
         # Format the timeline
         timeline = "=== BEAT & SYLLABLE TIMELINE ===\n\n"
-        timeline += f"Tempo: {beats_info['tempo']:.1f} BPM\n"
         timeline += f"Time Signature: {beats_info['time_signature']}/4\n"
         timeline += f"Total Beats: {beats_info['beat_count']}\n\n"
@@ -2549,6 +2551,10 @@ def format_beat_timeline(audio_file, lyrics=None):
         # Add beat-by-beat information
         for i, (time, strength) in enumerate(zip(beats_info['beat_times'], beats_info['beat_strengths'])):
             # Determine beat type based on strength
             if strength >= 0.8:
                 beat_type = "STRONG"
@@ -2580,7 +2586,9 @@ def format_beat_timeline(audio_file, lyrics=None):
         # Calculate total duration and create time markers
         if beats_info['beat_times']:
-            total_duration = max(beats_info['beat_times']) + 2  # Add 2 seconds of padding
         else:
             total_duration = 30  # Default duration if no beats found
@@ -2606,13 +2614,19 @@ def format_beat_timeline(audio_file, lyrics=None):
             if i >= len(beats_info['beat_strengths']):
                 break
             # Determine position in the timeline
-            pos = int(time * 2)  # Convert to position in the beat_line
             if pos >= len(beat_line):
                 continue
             # Determine beat type based on strength and position
             strength = beats_info['beat_strengths'][i]
             if i % beats_info['time_signature'] == 0:
                 beat_line[pos] = "S"  # Strong beat at start of measure
             elif strength >= 0.8:
@@ -2639,7 +2653,9 @@ def format_beat_timeline(audio_file, lyrics=None):
         measure_starts = []
         for i, time in enumerate(beats_info['beat_times']):
             if i % beats_info['time_signature'] == 0:  # Start of measure
-                measure_starts.append((i // beats_info['time_signature'] + 1, time))
         # Format measure information
         if measure_starts:
@@ -2653,7 +2669,9 @@ def format_beat_timeline(audio_file, lyrics=None):
                 if i < len(measure_starts) - 1:
                     end_time = measure_starts[i+1][1]
                 elif beats_info['beat_times']:
-                    end_time = beats_info['beat_times'][-1]
                 else:
                     end_time = start_time + 2.0  # Default 2 seconds if no next measure
@@ -2678,17 +2696,25 @@ def format_beat_timeline(audio_file, lyrics=None):
                     end_beat = phrase[-1]
                     if start_beat >= len(beats_info['beat_times']) or end_beat >= len(beats_info['beat_times']):
                         continue
                     phrase_start = beats_info['beat_times'][start_beat]
                     phrase_end = beats_info['beat_times'][end_beat]
                     timeline += f"Phrase {i+1}: Beats {start_beat+1}-{end_beat+1} ({phrase_start:.2f}s - {phrase_end:.2f}s)\n"
                     # Create syllable template for this phrase
                     phrase_beats = {
-                        "beat_times": [beats_info['beat_times'][j] for j in phrase if j < len(beats_info['beat_times'])],
-                        "beat_strengths": [beats_info['beat_strengths'][j] for j in phrase if j < len(beats_info['beat_strengths'])],
-                        "tempo": beats_info['tempo'],
                         "time_signature": beats_info['time_signature'],
                         "phrases": [list(range(len(phrase)))]
                     }
@@ -2715,6 +2741,7 @@ def format_beat_timeline(audio_file, lyrics=None):
                         for j in phrase:
                             if j < len(beats_info['beat_times']):
                                 beat_time = beats_info['beat_times'][j]
                                 beat_pos = int(beat_time * 2)
                                 if beat_pos < len(phrase_visualization) and beat_pos != start_pos and beat_pos != end_pos:
@@ -2766,8 +2793,13 @@ def format_beat_timeline(audio_file, lyrics=None):
                     if phrase and phrase[0] < len(beats_info['beat_times']) and phrase[-1] < len(beats_info['beat_times']):
                         start_beat = phrase[0]
                         end_beat = phrase[-1]
                         start_time = beats_info['beat_times'][start_beat]
                         end_time = beats_info['beat_times'][end_beat]
                         timeline += f"  Timing: {start_time:.2f}s - {end_time:.2f}s\n"
                         # Create a visualization of syllable alignment
@@ -2781,6 +2813,7 @@ def format_beat_timeline(audio_file, lyrics=None):
                         for j in phrase:
                             if j < len(beats_info['beat_times']):
                                 beat_time = beats_info['beat_times'][j]
                                 relative_pos = int((beat_time - start_time) / phrase_duration * syllable_count)
                                 while len(syllable_viz) <= relative_pos:

         # Format the timeline
         timeline = "=== BEAT & SYLLABLE TIMELINE ===\n\n"
+        # Convert tempo to float before formatting if it's a numpy array
+        tempo = float(beats_info['tempo']) if isinstance(beats_info['tempo'], np.ndarray) else beats_info['tempo']
+        timeline += f"Tempo: {tempo:.1f} BPM\n"
         timeline += f"Time Signature: {beats_info['time_signature']}/4\n"
         timeline += f"Total Beats: {beats_info['beat_count']}\n\n"
         # Add beat-by-beat information
         for i, (time, strength) in enumerate(zip(beats_info['beat_times'], beats_info['beat_strengths'])):
+            # Convert numpy values to Python float if needed
+            time = float(time) if isinstance(time, np.ndarray) else time
+            strength = float(strength) if isinstance(strength, np.ndarray) else strength
             # Determine beat type based on strength
             if strength >= 0.8:
                 beat_type = "STRONG"
         # Calculate total duration and create time markers
         if beats_info['beat_times']:
+            # Convert to float if it's a numpy array
+            max_beat_time = float(max(beats_info['beat_times'])) if isinstance(max(beats_info['beat_times']), np.ndarray) else max(beats_info['beat_times'])
+            total_duration = max_beat_time + 2  # Add 2 seconds of padding
         else:
             total_duration = 30  # Default duration if no beats found
             if i >= len(beats_info['beat_strengths']):
                 break
+            # Convert to float if it's a numpy array
+            time_val = float(time) if isinstance(time, np.ndarray) else time
             # Determine position in the timeline
+            pos = int(time_val * 2)  # Convert to position in the beat_line
             if pos >= len(beat_line):
                 continue
             # Determine beat type based on strength and position
             strength = beats_info['beat_strengths'][i]
+            # Convert to float if it's a numpy array
+            strength = float(strength) if isinstance(strength, np.ndarray) else strength
             if i % beats_info['time_signature'] == 0:
                 beat_line[pos] = "S"  # Strong beat at start of measure
             elif strength >= 0.8:
         measure_starts = []
         for i, time in enumerate(beats_info['beat_times']):
             if i % beats_info['time_signature'] == 0:  # Start of measure
+                # Convert to float if it's a numpy array
+                time_val = float(time) if isinstance(time, np.ndarray) else time
+                measure_starts.append((i // beats_info['time_signature'] + 1, time_val))
         # Format measure information
         if measure_starts:
                 if i < len(measure_starts) - 1:
                     end_time = measure_starts[i+1][1]
                 elif beats_info['beat_times']:
+                    # Get the last beat time and convert to float if needed
+                    last_beat = beats_info['beat_times'][-1]
+                    end_time = float(last_beat) if isinstance(last_beat, np.ndarray) else last_beat
                 else:
                     end_time = start_time + 2.0  # Default 2 seconds if no next measure
                     end_beat = phrase[-1]
                     if start_beat >= len(beats_info['beat_times']) or end_beat >= len(beats_info['beat_times']):
                         continue
+                    # Convert to float if needed
                     phrase_start = beats_info['beat_times'][start_beat]
+                    phrase_start = float(phrase_start) if isinstance(phrase_start, np.ndarray) else phrase_start
                     phrase_end = beats_info['beat_times'][end_beat]
+                    phrase_end = float(phrase_end) if isinstance(phrase_end, np.ndarray) else phrase_end
                     timeline += f"Phrase {i+1}: Beats {start_beat+1}-{end_beat+1} ({phrase_start:.2f}s - {phrase_end:.2f}s)\n"
                     # Create syllable template for this phrase
                     phrase_beats = {
+                        "beat_times": [float(beats_info['beat_times'][j]) if isinstance(beats_info['beat_times'][j], np.ndarray)
+                                      else beats_info['beat_times'][j]
+                                      for j in phrase if j < len(beats_info['beat_times'])],
+                        "beat_strengths": [float(beats_info['beat_strengths'][j]) if isinstance(beats_info['beat_strengths'][j], np.ndarray)
+                                          else beats_info['beat_strengths'][j]
+                                          for j in phrase if j < len(beats_info['beat_strengths'])],
+                        "tempo": float(beats_info['tempo']) if isinstance(beats_info['tempo'], np.ndarray) else beats_info['tempo'],
                         "time_signature": beats_info['time_signature'],
                         "phrases": [list(range(len(phrase)))]
                     }
                         for j in phrase:
                             if j < len(beats_info['beat_times']):
                                 beat_time = beats_info['beat_times'][j]
+                                beat_time = float(beat_time) if isinstance(beat_time, np.ndarray) else beat_time
                                 beat_pos = int(beat_time * 2)
                                 if beat_pos < len(phrase_visualization) and beat_pos != start_pos and beat_pos != end_pos:
                     if phrase and phrase[0] < len(beats_info['beat_times']) and phrase[-1] < len(beats_info['beat_times']):
                         start_beat = phrase[0]
                         end_beat = phrase[-1]
                         start_time = beats_info['beat_times'][start_beat]
+                        start_time = float(start_time) if isinstance(start_time, np.ndarray) else start_time
                         end_time = beats_info['beat_times'][end_beat]
+                        end_time = float(end_time) if isinstance(end_time, np.ndarray) else end_time
                         timeline += f"  Timing: {start_time:.2f}s - {end_time:.2f}s\n"
                         # Create a visualization of syllable alignment
                         for j in phrase:
                             if j < len(beats_info['beat_times']):
                                 beat_time = beats_info['beat_times'][j]
+                                beat_time = float(beat_time) if isinstance(beat_time, np.ndarray) else beat_time
                                 relative_pos = int((beat_time - start_time) / phrase_duration * syllable_count)
                                 while len(syllable_viz) <= relative_pos: