root commited on
Commit
3feaaf4
·
1 Parent(s): 173048d
Files changed (1) hide show
  1. app.py +494 -7
app.py CHANGED
@@ -6,6 +6,7 @@ import numpy as np
6
  import re
7
  import pronouncing # Add this to requirements.txt for syllable counting
8
  import functools # Add this for lru_cache functionality
 
9
  from transformers import (
10
  AutoModelForAudioClassification,
11
  AutoFeatureExtractor,
@@ -2298,6 +2299,109 @@ Improved lyrics with fixed rhythm:
2298
 
2299
  return lyrics
2300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2301
  def process_audio(audio_file):
2302
  """Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
2303
  if audio_file is None:
@@ -2524,7 +2628,17 @@ def process_audio(audio_file):
2524
  except Exception as e:
2525
  error_msg = f"Error processing audio: {str(e)}"
2526
  print(error_msg)
2527
- return error_msg, None, []
 
 
 
 
 
 
 
 
 
 
2528
 
2529
  # Create enhanced Gradio interface with tabs for better organization
2530
  with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
@@ -2569,6 +2683,290 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
2569
  with gr.TabItem("Rhythm Analysis"):
2570
  rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
2571
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2572
  with gr.TabItem("Syllable Analysis"):
2573
  syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
2574
  prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
@@ -2576,7 +2974,12 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
2576
  # Processing function with better handling of results
2577
  def display_results(audio_file):
2578
  if audio_file is None:
2579
- return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.", "No syllable analysis available.", "No prompt template available."
 
 
 
 
 
2580
 
2581
  try:
2582
  # Process audio and get results
@@ -2584,9 +2987,19 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
2584
 
2585
  # Check if we got an error message instead of results
2586
  if isinstance(results, str) and "Error" in results:
2587
- return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available"
 
 
 
 
 
2588
  elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
2589
- return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available"
 
 
 
 
 
2590
 
2591
  # For backwards compatibility, handle both dictionary and tuple returns
2592
  if isinstance(results, dict):
@@ -2673,19 +3086,87 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
2673
  else:
2674
  ast_text = "No valid audio classification results available."
2675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2676
  # Return all results including new fields
2677
- return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis, syllable_analysis, prompt_template
2678
 
2679
  except Exception as e:
2680
  error_msg = f"Error: {str(e)}"
2681
  print(error_msg)
2682
- return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available"
 
 
 
 
 
 
 
 
 
 
2683
 
2684
  # Connect the button to the display function with updated outputs
2685
  submit_btn.click(
2686
  fn=display_results,
2687
  inputs=[audio_input],
2688
- outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output, syllable_analysis_output, prompt_template_output]
2689
  )
2690
 
2691
  # Enhanced explanation of how the system works
@@ -2722,6 +3203,12 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
2722
 
2723
  8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
2724
 
 
 
 
 
 
 
2725
  This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
2726
  """)
2727
 
 
6
  import re
7
  import pronouncing # Add this to requirements.txt for syllable counting
8
  import functools # Add this for lru_cache functionality
9
+ import json # Add this for JSON serialization
10
  from transformers import (
11
  AutoModelForAudioClassification,
12
  AutoFeatureExtractor,
 
2299
 
2300
  return lyrics
2301
 
2302
+ def prepare_beat_synced_lyrics(audio_data, lyrics, beats_info):
2303
+ """
2304
+ Prepare data for the beat-synced lyrics viewer
2305
+
2306
+ Parameters:
2307
+ audio_data: Dictionary containing audio features
2308
+ lyrics: String containing generated lyrics
2309
+ beats_info: Dictionary containing beat analysis data
2310
+
2311
+ Returns:
2312
+ Dictionary containing data for the beat-synced lyrics viewer
2313
+ """
2314
+ # Extract necessary data for visualization
2315
+ beat_times = beats_info.get("beat_times", [])
2316
+ beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
2317
+ tempo = beats_info.get("tempo", 120)
2318
+
2319
+ # Clean lyrics - remove section markers and annotations
2320
+ clean_lyrics = lyrics
2321
+ if isinstance(lyrics, str):
2322
+ # Remove "[Verse]", "[Chorus]", etc.
2323
+ clean_lyrics = re.sub(r'\[\w+\]', '', lyrics)
2324
+ # Remove any rhythm analysis notes
2325
+ if "[Note:" in clean_lyrics:
2326
+ clean_lyrics = clean_lyrics.split("[Note:")[0].strip()
2327
+ # Remove any rhythm analysis section
2328
+ if "[RHYTHM_ANALYSIS_SECTION]" in clean_lyrics:
2329
+ clean_lyrics = clean_lyrics.split("[RHYTHM_ANALYSIS_SECTION]")[0].strip()
2330
+
2331
+ # Split into lines
2332
+ lines = [line.strip() for line in clean_lyrics.split('\n') if line.strip()]
2333
+
2334
+ # Split each line into words and estimate timing
2335
+ lyrics_data = []
2336
+
2337
+ # Estimate start time for lyrics - allow a small intro period
2338
+ lyrics_start_time = beat_times[0] if len(beat_times) > 0 else 0
2339
+
2340
+ # Simple approach: distribute lines evenly across available beats
2341
+ if len(lines) > 0 and len(beat_times) > 0:
2342
+ beats_per_line = max(1, len(beat_times) // len(lines))
2343
+
2344
+ for i, line in enumerate(lines):
2345
+ # Determine beat range for this line
2346
+ start_beat_idx = min(i * beats_per_line, len(beat_times) - 1)
2347
+ end_beat_idx = min(start_beat_idx + beats_per_line, len(beat_times) - 1)
2348
+
2349
+ # Get time range
2350
+ line_start_time = beat_times[start_beat_idx]
2351
+ line_end_time = beat_times[end_beat_idx] if end_beat_idx < len(beat_times) else audio_data["duration"]
2352
+
2353
+ # Split line into words
2354
+ words = re.findall(r'\b\w+\b|-|\s+|[^\w\s]', line)
2355
+ filtered_words = [w for w in words if w.strip()]
2356
+
2357
+ if filtered_words:
2358
+ # Distribute words across beats for this line
2359
+ word_data = []
2360
+
2361
+ # Get beat times for this line
2362
+ line_beat_times = beat_times[start_beat_idx:end_beat_idx+1]
2363
+ if len(line_beat_times) < 2:
2364
+ line_beat_times = [line_start_time, line_end_time]
2365
+
2366
+ # Distribute words evenly if we have enough beats
2367
+ if len(filtered_words) <= len(line_beat_times):
2368
+ for j, word in enumerate(filtered_words):
2369
+ beat_idx = min(j, len(line_beat_times) - 1)
2370
+ word_time = line_beat_times[beat_idx]
2371
+ word_data.append({
2372
+ "text": word,
2373
+ "time": word_time,
2374
+ "is_strong": j == 0 or word[0].isupper() # Simple heuristic for strong beats
2375
+ })
2376
+ else:
2377
+ # More words than beats, distribute evenly
2378
+ word_duration = (line_end_time - line_start_time) / len(filtered_words)
2379
+ for j, word in enumerate(filtered_words):
2380
+ word_time = line_start_time + j * word_duration
2381
+ word_data.append({
2382
+ "text": word,
2383
+ "time": word_time,
2384
+ "is_strong": j == 0 or word[0].isupper()
2385
+ })
2386
+
2387
+ lyrics_data.append({
2388
+ "line": line,
2389
+ "start_time": line_start_time,
2390
+ "end_time": line_end_time,
2391
+ "words": word_data
2392
+ })
2393
+
2394
+ # Create visualization data
2395
+ visualization_data = {
2396
+ "duration": audio_data["duration"],
2397
+ "tempo": tempo,
2398
+ "beat_times": beat_times,
2399
+ "beat_strengths": beat_strengths,
2400
+ "lyrics_data": lyrics_data
2401
+ }
2402
+
2403
+ return visualization_data
2404
+
2405
  def process_audio(audio_file):
2406
  """Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
2407
  if audio_file is None:
 
2628
  except Exception as e:
2629
  error_msg = f"Error processing audio: {str(e)}"
2630
  print(error_msg)
2631
+
2632
+ # Use a raw string literal to avoid f-string backslash issues
2633
+ error_html = """<script>
2634
+ document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
2635
+ document.getElementById('loading-message').style.display = 'block';
2636
+ document.getElementById('loading-message').innerText = 'Error processing audio';
2637
+ document.getElementById('beat-sync-timeline').style.display = 'none';
2638
+ document.getElementById('lyrics-display').style.display = 'none';
2639
+ </script>"""
2640
+
2641
+ return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", audio_file, error_html, "No syllable analysis available", "No prompt template available"
2642
 
2643
  # Create enhanced Gradio interface with tabs for better organization
2644
  with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
 
2683
  with gr.TabItem("Rhythm Analysis"):
2684
  rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
2685
 
2686
+ with gr.TabItem("Beat-Synced Lyrics"):
2687
+ # Create a container for the beat-synced lyrics viewer
2688
+ synced_audio_output = gr.Audio(label="Playback with Synced Lyrics", type="filepath")
2689
+
2690
+ # Create a custom JavaScript component for the beat-synced lyrics viewer
2691
+ lyrics_viewer_html = gr.HTML(
2692
+ """
2693
+ <div id="beat-sync-container" style="width:100%; height:400px; position:relative;">
2694
+ <div id="loading-message">Please analyze audio to view beat-synced lyrics</div>
2695
+ <div id="beat-sync-timeline" style="display:none; width:100%; height:80px; background:#f0f0f0; position:relative; overflow:hidden; margin-bottom:10px;">
2696
+ <div id="playhead" style="position:absolute; width:2px; height:100%; background:red; top:0; left:0; z-index:10;"></div>
2697
+ <div id="beat-markers" style="position:absolute; width:100%; height:100%; top:0; left:0;"></div>
2698
+ </div>
2699
+ <div id="lyrics-display" style="display:none; width:100%; height:300px; overflow-y:auto; font-size:16px; line-height:1.6;"></div>
2700
+ </div>
2701
+
2702
+ <script>
2703
+ let beatSyncData = null;
2704
+ let isPlaying = false;
2705
+ let audioElement = null;
2706
+ let playheadInterval = null;
2707
+ let lastHighlightedWord = -1;
2708
+
2709
+ function initBeatSyncViewer(data) {
2710
+ beatSyncData = data;
2711
+ const container = document.getElementById('beat-sync-container');
2712
+ const timeline = document.getElementById('beat-sync-timeline');
2713
+ const beatMarkers = document.getElementById('beat-markers');
2714
+ const lyricsDisplay = document.getElementById('lyrics-display');
2715
+ const loadingMessage = document.getElementById('loading-message');
2716
+
2717
+ // Clear previous content
2718
+ beatMarkers.innerHTML = '';
2719
+ lyricsDisplay.innerHTML = '';
2720
+
2721
+ // Show the viewer elements, hide loading message
2722
+ loadingMessage.style.display = 'none';
2723
+ timeline.style.display = 'block';
2724
+ lyricsDisplay.style.display = 'block';
2725
+
2726
+ // Create beat markers
2727
+ const duration = data.duration;
2728
+ const beatTimes = data.beat_times;
2729
+ const beatStrengths = data.beat_strengths;
2730
+
2731
+ if (beatTimes && beatTimes.length > 0) {
2732
+ for (let i = 0; i < beatTimes.length; i++) {
2733
+ const beatTime = beatTimes[i];
2734
+ const beatStrength = beatStrengths && beatStrengths[i] ? beatStrengths[i] : 1.0;
2735
+ const position = (beatTime / duration) * 100;
2736
+
2737
+ // Create marker with height based on beat strength
2738
+ const marker = document.createElement('div');
2739
+ const height = 30 + (beatStrength * 50); // Scale between 30-80px
2740
+
2741
+ marker.className = 'beat-marker';
2742
+ marker.style.position = 'absolute';
2743
+ marker.style.left = `${position}%`;
2744
+ marker.style.top = `${(80 - height) / 2}px`;
2745
+ marker.style.width = '2px';
2746
+ marker.style.height = `${height}px`;
2747
+ marker.style.background = beatStrength > 0.7 ? '#2d7dd2' : '#97c6e3';
2748
+ marker.setAttribute('data-time', beatTime);
2749
+
2750
+ beatMarkers.appendChild(marker);
2751
+ }
2752
+ }
2753
+
2754
+ // Create lyrics display
2755
+ if (data.lyrics_data && data.lyrics_data.length > 0) {
2756
+ for (let i = 0; i < data.lyrics_data.length; i++) {
2757
+ const line = data.lyrics_data[i];
2758
+ const lineElement = document.createElement('div');
2759
+ lineElement.className = 'lyric-line';
2760
+ lineElement.style.marginBottom = '15px';
2761
+
2762
+ // Create word elements for the line
2763
+ line.words.forEach((word, j) => {
2764
+ const wordSpan = document.createElement('span');
2765
+ wordSpan.innerText = word.text + ' ';
2766
+ wordSpan.className = 'lyric-word';
2767
+ wordSpan.style.display = 'inline-block';
2768
+ wordSpan.style.transition = 'color 0.1s, transform 0.1s';
2769
+ wordSpan.setAttribute('data-time', word.time);
2770
+ wordSpan.setAttribute('data-word-index', j);
2771
+ wordSpan.setAttribute('data-line-index', i);
2772
+
2773
+ if (word.is_strong) {
2774
+ wordSpan.style.fontWeight = 'bold';
2775
+ }
2776
+
2777
+ lineElement.appendChild(wordSpan);
2778
+ });
2779
+
2780
+ lyricsDisplay.appendChild(lineElement);
2781
+ }
2782
+ } else {
2783
+ lyricsDisplay.innerHTML = '<p>No lyrics data available or could not align lyrics with beats.</p>';
2784
+ }
2785
+
2786
+ // Add timeline click/drag handler for scrubbing
2787
+ timeline.addEventListener('click', function(e) {
2788
+ if (!audioElement) return;
2789
+
2790
+ const rect = timeline.getBoundingClientRect();
2791
+ const clickPosition = (e.clientX - rect.left) / rect.width;
2792
+ const newTime = clickPosition * duration;
2793
+
2794
+ // Set audio to new position
2795
+ audioElement.currentTime = newTime;
2796
+
2797
+ // Update playhead and lyrics
2798
+ updatePlayhead(newTime);
2799
+ highlightLyricsAtTime(newTime);
2800
+ });
2801
+ }
2802
+
2803
+ function connectAudio(audioSelector) {
2804
+ // Find the audio element from Gradio's component
2805
+ const audioContainer = document.querySelector(audioSelector);
2806
+ if (!audioContainer) return;
2807
+
2808
+ audioElement = audioContainer.querySelector('audio');
2809
+ if (!audioElement) return;
2810
+
2811
+ // Add event listeners to the audio element
2812
+ audioElement.addEventListener('play', startPlayheadMovement);
2813
+ audioElement.addEventListener('pause', stopPlayheadMovement);
2814
+ audioElement.addEventListener('ended', stopPlayheadMovement);
2815
+ audioElement.addEventListener('seeked', function() {
2816
+ updatePlayhead(audioElement.currentTime);
2817
+ highlightLyricsAtTime(audioElement.currentTime);
2818
+ });
2819
+ }
2820
+
2821
+ function startPlayheadMovement() {
2822
+ isPlaying = true;
2823
+ if (playheadInterval) clearInterval(playheadInterval);
2824
+
2825
+ playheadInterval = setInterval(() => {
2826
+ if (!audioElement || !isPlaying) return;
2827
+ updatePlayhead(audioElement.currentTime);
2828
+ highlightLyricsAtTime(audioElement.currentTime);
2829
+ }, 50); // Update every 50ms
2830
+ }
2831
+
2832
+ function stopPlayheadMovement() {
2833
+ isPlaying = false;
2834
+ if (playheadInterval) {
2835
+ clearInterval(playheadInterval);
2836
+ playheadInterval = null;
2837
+ }
2838
+ }
2839
+
2840
+ function updatePlayhead(currentTime) {
2841
+ if (!beatSyncData) return;
2842
+
2843
+ const playhead = document.getElementById('playhead');
2844
+ const position = (currentTime / beatSyncData.duration) * 100;
2845
+ playhead.style.left = `${position}%`;
2846
+ }
2847
+
2848
+ function highlightLyricsAtTime(currentTime) {
2849
+ if (!beatSyncData || !beatSyncData.lyrics_data) return;
2850
+
2851
+ // Reset all word styling
2852
+ const words = document.querySelectorAll('.lyric-word');
2853
+ words.forEach(word => {
2854
+ word.style.color = 'black';
2855
+ word.style.transform = 'scale(1)';
2856
+ });
2857
+
2858
+ // Find the current word to highlight
2859
+ let currentWordElement = null;
2860
+ let bestTimeDiff = Infinity;
2861
+
2862
+ words.forEach(word => {
2863
+ const wordTime = parseFloat(word.getAttribute('data-time'));
2864
+
2865
+ // Highlight words that have already been passed or are coming up soon
2866
+ if (wordTime <= currentTime + 0.2) {
2867
+ const timeDiff = Math.abs(wordTime - currentTime);
2868
+
2869
+ // Mark past words as "read"
2870
+ if (wordTime < currentTime - 0.5) {
2871
+ word.style.color = '#666666';
2872
+ }
2873
+
2874
+ // Find the closest word to current time
2875
+ if (timeDiff < bestTimeDiff) {
2876
+ bestTimeDiff = timeDiff;
2877
+ currentWordElement = word;
2878
+ }
2879
+ }
2880
+ });
2881
+
2882
+ // Highlight current word
2883
+ if (currentWordElement) {
2884
+ currentWordElement.style.color = '#e63946';
2885
+ currentWordElement.style.transform = 'scale(1.1)';
2886
+
2887
+ // Scroll to keep the current line visible
2888
+ const lineIndex = parseInt(currentWordElement.getAttribute('data-line-index'));
2889
+ const lineElement = document.querySelectorAll('.lyric-line')[lineIndex];
2890
+
2891
+ if (lineElement) {
2892
+ const lyricsDisplay = document.getElementById('lyrics-display');
2893
+ const displayRect = lyricsDisplay.getBoundingClientRect();
2894
+ const lineRect = lineElement.getBoundingClientRect();
2895
+
2896
+ // Check if the line is outside the visible area
2897
+ if (lineRect.top < displayRect.top || lineRect.bottom > displayRect.bottom) {
2898
+ lineElement.scrollIntoView({ behavior: 'smooth', block: 'center' });
2899
+ }
2900
+ }
2901
+ }
2902
+ }
2903
+
2904
+ // Wait for Gradio to fully load the components
2905
+ function waitForGradio() {
2906
+ // Connect to the audio element when available
2907
+ setTimeout(() => {
2908
+ connectAudio('#component-17'); // Replace with the actual selector
2909
+
2910
+ // Check for data updates from Gradio
2911
+ const observer = new MutationObserver((mutations) => {
2912
+ for (const mutation of mutations) {
2913
+ if (mutation.type === 'attributes' &&
2914
+ mutation.target.id === 'beat-sync-container' &&
2915
+ mutation.target.hasAttribute('data-sync-info')) {
2916
+
2917
+ const dataStr = mutation.target.getAttribute('data-sync-info');
2918
+ try {
2919
+ const data = JSON.parse(dataStr);
2920
+ initBeatSyncViewer(data);
2921
+ } catch (e) {
2922
+ console.error('Error parsing beat sync data:', e);
2923
+ }
2924
+ }
2925
+ }
2926
+ });
2927
+
2928
+ observer.observe(document.getElementById('beat-sync-container'), {
2929
+ attributes: true,
2930
+ attributeFilter: ['data-sync-info']
2931
+ });
2932
+
2933
+ // Try to find all audio elements and add a more robust connection method
2934
+ function tryConnectAudio() {
2935
+ const audioElements = document.querySelectorAll('audio');
2936
+ for (const audio of audioElements) {
2937
+ if (audio.parentElement.closest('#component-17') ||
2938
+ audio.parentElement.closest('.beat-synced-lyrics-tab')) {
2939
+ audioElement = audio;
2940
+ audioElement.addEventListener('play', startPlayheadMovement);
2941
+ audioElement.addEventListener('pause', stopPlayheadMovement);
2942
+ audioElement.addEventListener('ended', stopPlayheadMovement);
2943
+ audioElement.addEventListener('seeked', function() {
2944
+ updatePlayhead(audioElement.currentTime);
2945
+ highlightLyricsAtTime(audioElement.currentTime);
2946
+ });
2947
+ return true;
2948
+ }
2949
+ }
2950
+ return false;
2951
+ }
2952
+
2953
+ // Keep trying until we find the audio element
2954
+ if (!tryConnectAudio()) {
2955
+ setTimeout(tryConnectAudio, 1000); // Retry after 1 second
2956
+ }
2957
+ }, 2000);
2958
+ }
2959
+
2960
+ // Initialize when DOM is ready
2961
+ if (document.readyState === 'loading') {
2962
+ document.addEventListener('DOMContentLoaded', waitForGradio);
2963
+ } else {
2964
+ waitForGradio();
2965
+ }
2966
+ </script>
2967
+ """
2968
+ )
2969
+
2970
  with gr.TabItem("Syllable Analysis"):
2971
  syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
2972
  prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
 
2974
  # Processing function with better handling of results
2975
  def display_results(audio_file):
2976
  if audio_file is None:
2977
+ return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.", audio_file, """<script>
2978
+ document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
2979
+ document.getElementById('loading-message').style.display = 'block';
2980
+ document.getElementById('beat-sync-timeline').style.display = 'none';
2981
+ document.getElementById('lyrics-display').style.display = 'none';
2982
+ </script>""", "No syllable analysis available.", "No prompt template available."
2983
 
2984
  try:
2985
  # Process audio and get results
 
2987
 
2988
  # Check if we got an error message instead of results
2989
  if isinstance(results, str) and "Error" in results:
2990
+ return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", audio_file, """<script>
2991
+ document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
2992
+ document.getElementById('loading-message').style.display = 'block';
2993
+ document.getElementById('beat-sync-timeline').style.display = 'none';
2994
+ document.getElementById('lyrics-display').style.display = 'none';
2995
+ </script>""", "No syllable analysis available", "No prompt template available"
2996
  elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
2997
+ return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", audio_file, """<script>
2998
+ document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
2999
+ document.getElementById('loading-message').style.display = 'block';
3000
+ document.getElementById('beat-sync-timeline').style.display = 'none';
3001
+ document.getElementById('lyrics-display').style.display = 'none';
3002
+ </script>""", "No syllable analysis available", "No prompt template available"
3003
 
3004
  # For backwards compatibility, handle both dictionary and tuple returns
3005
  if isinstance(results, dict):
 
3086
  else:
3087
  ast_text = "No valid audio classification results available."
3088
 
3089
+ # Prepare beat-synced lyrics visualization data
3090
+ try:
3091
+ audio_data = extract_audio_features(audio_file)
3092
+
3093
+ # Get beat information
3094
+ y, sr = load_audio(audio_file, SAMPLE_RATE)
3095
+ beats_info = detect_beats(y, sr)
3096
+
3097
+ # Prepare data for beat-synced lyrics
3098
+ visualization_data = prepare_beat_synced_lyrics(audio_data, clean_lyrics, beats_info)
3099
+
3100
+ # Convert to JSON for JavaScript
3101
+ visualization_json = json.dumps(visualization_data)
3102
+
3103
+ # Create HTML with the data injected - avoid using f-string for the entire HTML
3104
+ # Handle string escaping separately to avoid f-string backslash issues
3105
+ escaped_json = visualization_json.replace("'", "\\'")
3106
+
3107
+ # Create HTML in parts to avoid f-string backslash issues
3108
+ html_start = """<div id="beat-sync-container" data-sync-info='"""
3109
+ html_middle = """' style="width:100%; height:400px; position:relative;">
3110
+ <div id="loading-message">Loading beat-synced lyrics viewer...</div>
3111
+ <div id="beat-sync-timeline" style="display:none; width:100%; height:80px; background:#f0f0f0; position:relative; overflow:hidden; margin-bottom:10px;">
3112
+ <div id="playhead" style="position:absolute; width:2px; height:100%; background:red; top:0; left:0; z-index:10;"></div>
3113
+ <div id="beat-markers" style="position:absolute; width:100%; height:100%; top:0; left:0;"></div>
3114
+ </div>
3115
+ <div id="lyrics-display" style="display:none; width:100%; height:300px; overflow-y:auto; font-size:16px; line-height:1.6;"></div>
3116
+ </div>
3117
+ <script>
3118
+ // Signal to the viewer that new data is available
3119
+ const container = document.getElementById('beat-sync-container');
3120
+ if (container) {
3121
+ // This will trigger the mutation observer
3122
+ container.setAttribute('data-sync-info', '"""
3123
+ html_end = """');
3124
+ }
3125
+ </script>"""
3126
+
3127
+ # Combine parts without using f-strings in the parts that don't need variables
3128
+ beat_sync_html = html_start + visualization_json + html_middle + escaped_json + html_end
3129
+ except Exception as e:
3130
+ print(f"Error creating beat-synced lyrics: {str(e)}")
3131
+ # Handle string escaping separately to avoid f-string backslash issues
3132
+ escaped_error = str(e).replace("'", "\\'")
3133
+
3134
+ # Use regular strings instead of f-strings to avoid backslash issues
3135
+ html_start = """<script>
3136
+ document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
3137
+ document.getElementById('loading-message').style.display = 'block';
3138
+ document.getElementById('loading-message').innerText = 'Error creating beat-synced lyrics: """
3139
+ html_end = """';
3140
+ document.getElementById('beat-sync-timeline').style.display = 'none';
3141
+ document.getElementById('lyrics-display').style.display = 'none';
3142
+ </script>"""
3143
+
3144
+ # Combine parts without using f-strings
3145
+ beat_sync_html = html_start + escaped_error + html_end
3146
+
3147
  # Return all results including new fields
3148
+ return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis, audio_file, beat_sync_html, syllable_analysis, prompt_template
3149
 
3150
  except Exception as e:
3151
  error_msg = f"Error: {str(e)}"
3152
  print(error_msg)
3153
+
3154
+ # Use a raw string literal to avoid f-string backslash issues
3155
+ error_html = """<script>
3156
+ document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
3157
+ document.getElementById('loading-message').style.display = 'block';
3158
+ document.getElementById('loading-message').innerText = 'Error processing audio';
3159
+ document.getElementById('beat-sync-timeline').style.display = 'none';
3160
+ document.getElementById('lyrics-display').style.display = 'none';
3161
+ </script>"""
3162
+
3163
+ return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", audio_file, error_html, "No syllable analysis available", "No prompt template available"
3164
 
3165
  # Connect the button to the display function with updated outputs
3166
  submit_btn.click(
3167
  fn=display_results,
3168
  inputs=[audio_input],
3169
+ outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output, synced_audio_output, lyrics_viewer_html, syllable_analysis_output, prompt_template_output]
3170
  )
3171
 
3172
  # Enhanced explanation of how the system works
 
3203
 
3204
  8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
3205
 
3206
+ 9. **Beat-Synced Visualization**: The beat-synced lyrics viewer shows you exactly how the lyrics align with the music:
3207
+ - Beat markers show the song's rhythmic structure
3208
+ - Words are highlighted in sync with the music
3209
+ - Strong beats and stressed syllables are emphasized
3210
+ - You can scrub through the song to see how lyrics and music match at any point
3211
+
3212
  This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
3213
  """)
3214