diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -4,8 +4,8 @@ import gradio as gr
 import torch
 import numpy as np
 import re
-import pronouncing  # Add this to requirements.txt for syllable counting
-import functools  # Add this for lru_cache functionality
+import pronouncing
+import functools
 from transformers import (
     AutoModelForAudioClassification,
     AutoFeatureExtractor,
@@ -22,8 +22,12 @@ from utils import (
     format_genre_results,
     ensure_cuda_availability
 )
-from emotionanalysis import MusicAnalyzer
+from emotionanalysis import MusicAnalyzer 
 import librosa
+from beat_analysis import BeatAnalyzer  # Import the BeatAnalyzer class
+
+# Initialize beat analyzer
+beat_analyzer = BeatAnalyzer()
 
 # Login to Hugging Face Hub if token is provided
 if "HF_TOKEN" in os.environ:
@@ -38,3971 +42,945 @@ SAMPLE_RATE = 22050  # Standard sample rate for audio processing
 # Check CUDA availability (for informational purposes)
 CUDA_AVAILABLE = ensure_cuda_availability()
 
-# Create music detection pipeline
-print(f"Loading music detection model: {MUSIC_DETECTION_MODEL}")
+# Load models at initialization time
+print("Loading genre classification model...")
 try:
-    music_detector = pipeline(
-        "audio-classification",
-        model=MUSIC_DETECTION_MODEL,
-        device=0 if CUDA_AVAILABLE else -1
+    genre_feature_extractor = AutoFeatureExtractor.from_pretrained(GENRE_MODEL_NAME)
+    genre_model = AutoModelForAudioClassification.from_pretrained(
+        GENRE_MODEL_NAME,
+        device_map="auto" if CUDA_AVAILABLE else None
     )
-    print("Successfully loaded music detection pipeline")
+    # Create a convenience wrapper function with the same interface as before
+    def get_genre_model():
+        return genre_model, genre_feature_extractor
 except Exception as e:
-    print(f"Error creating music detection pipeline: {str(e)}")
-    # Fallback to manual loading
-    try:
-        music_processor = AutoFeatureExtractor.from_pretrained(MUSIC_DETECTION_MODEL)
-        music_model = AutoModelForAudioClassification.from_pretrained(MUSIC_DETECTION_MODEL)
-        print("Successfully loaded music detection model and feature extractor")
-    except Exception as e2:
-        print(f"Error loading music detection model components: {str(e2)}")
-        raise RuntimeError(f"Could not load music detection model: {str(e2)}")
+    print(f"Error loading genre model: {str(e)}")
+    genre_model = None
+    genre_feature_extractor = None
 
-# Create genre classification pipeline
-print(f"Loading audio classification model: {GENRE_MODEL_NAME}")
+# Load LLM and tokenizer at initialization time
+print("Loading Qwen LLM model with 4-bit quantization...")
 try:
-    genre_classifier = pipeline(
-        "audio-classification",
-        model=GENRE_MODEL_NAME,
-        device=0 if CUDA_AVAILABLE else -1
+    # Configure 4-bit quantization for better performance
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True
+    )
+    
+    llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
+    llm_model = AutoModelForCausalLM.from_pretrained(
+        LLM_MODEL_NAME,
+        quantization_config=quantization_config,
+        device_map="auto",
+        trust_remote_code=True,
+        torch_dtype=torch.float16,
+        use_cache=True
     )
-    print("Successfully loaded audio classification pipeline")
 except Exception as e:
-    print(f"Error creating pipeline: {str(e)}")
-    # Fallback to manual loading
-    try:
-        genre_processor = AutoFeatureExtractor.from_pretrained(GENRE_MODEL_NAME)
-        genre_model = AutoModelForAudioClassification.from_pretrained(GENRE_MODEL_NAME)
-        print("Successfully loaded audio classification model and feature extractor")
-    except Exception as e2:
-        print(f"Error loading model components: {str(e2)}")
-        raise RuntimeError(f"Could not load genre classification model: {str(e2)}")
-
-# Load LLM with appropriate quantization for T4 GPU
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.float16,
-)
-
-llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
-llm_model = AutoModelForCausalLM.from_pretrained(
-    LLM_MODEL_NAME,
-    device_map="auto",
-    quantization_config=bnb_config,
-    torch_dtype=torch.float16,
-)
-
-# Create LLM pipeline
-llm_pipeline = pipeline(
-    "text-generation",
-    model=llm_model,
-    tokenizer=llm_tokenizer,
-    max_new_tokens=512,
-)
+    print(f"Error loading LLM model: {str(e)}")
+    llm_tokenizer = None
+    llm_model = None
 
-# Initialize music emotion analyzer
+# Create music analyzer instance
 music_analyzer = MusicAnalyzer()
 
-# New global function moved outside of verify_flexible_syllable_counts
-@functools.lru_cache(maxsize=512)
-def cached_phones_for_word(word):
-    """Get word pronunciations with caching for better performance."""
-    return pronouncing.phones_for_word(word)
-
-@functools.lru_cache(maxsize=512)
-def count_syllables_for_word(word):
-    """Count syllables in a single word with caching for performance."""
-    # Try using pronouncing library first
-    pronunciations = cached_phones_for_word(word.lower())
-    if pronunciations:
-        return pronouncing.syllable_count(pronunciations[0])
-    
-    # Fallback method for words not in the pronouncing dictionary
-    vowels = "aeiouy"
-    word = word.lower()
-    count = 0
-    prev_is_vowel = False
-    
-    for char in word:
-        is_vowel = char in vowels
-        if is_vowel and not prev_is_vowel:
-            count += 1
-        prev_is_vowel = is_vowel
-    
-    # Handle special cases
-    if word.endswith('e') and not word.endswith('le'):
-        count -= 1
-    if word.endswith('le') and len(word) > 2 and word[-3] not in vowels:
-        count += 1
-    if count == 0:
-        count = 1
-    
-    return count
-
-@functools.lru_cache(maxsize=512)
-def get_word_stress(word):
-    """Get the stress pattern for a word with improved fallback handling."""
-    pronunciations = cached_phones_for_word(word.lower())
-    if pronunciations:
-        return pronouncing.stresses(pronunciations[0])
-    
-    # Enhanced fallback for words not in the dictionary
-    syllables = count_syllables_for_word(word)
-    
-    # Common English stress patterns by word length
-    if syllables == 1:
-        return "1"  # Single syllable words are stressed
-    elif syllables == 2:
-        # Most 2-syllable nouns and adjectives stress first syllable
-        # Common endings that indicate second-syllable stress
-        second_syllable_stress = ["ing", "er", "or", "ize", "ise", "ate", "ect", "end", "ure"]
-        if any(word.endswith(ending) for ending in second_syllable_stress):
-            return "01"
-        else:
-            return "10"  # Default for 2-syllable words
-    elif syllables == 3:
-        # Common endings for specific stress patterns in 3-syllable words
-        if any(word.endswith(ending) for ending in ["ity", "ety", "ify", "ogy", "graphy"]):
-            return "100"  # First syllable stress
-        elif any(word.endswith(ending) for ending in ["ation", "ious", "itis"]):
-            return "010"  # Middle syllable stress
-        else:
-            return "100"  # Default for 3-syllable words
-    else:
-        # For longer words, use common English patterns
-        return "1" + "0" * (syllables - 1)
-
-# New function: Count syllables in text
-def count_syllables(text):
-    """Count syllables in a given text using the pronouncing library."""
-    words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
-    syllable_count = 0
-    
-    for word in words:
-        syllable_count += count_syllables_for_word(word)
+# Process uploaded audio file
+def process_audio(audio_file):
+    if audio_file is None:
+        return "No audio file provided", None, None, None, None, None, None, None
     
-    return syllable_count
-
-def extract_audio_features(audio_file):
-    """Extract audio features from an audio file."""
     try:
-        # Load the audio file using utility function
-        y, sr = load_audio(audio_file, SAMPLE_RATE)
+        # Load and analyze audio
+        y, sr = load_audio(audio_file, sr=SAMPLE_RATE)
         
-        if y is None or sr is None:
-            raise ValueError("Failed to load audio data")
-        
-        # Get audio duration in seconds
+        # Basic audio information
         duration = extract_audio_duration(y, sr)
         
-        # Extract MFCCs for genre classification (may not be needed with the pipeline)
-        mfccs_mean = extract_mfcc_features(y, sr, n_mfcc=20)
+        # Detect time signature using the advanced method in BeatAnalyzer
+        time_sig_result = beat_analyzer.detect_time_signature(audio_file)
+        time_signature = time_sig_result["time_signature"]
         
-        return {
-            "features": mfccs_mean,
-            "duration": duration,
-            "waveform": y,
-            "sample_rate": sr,
-            "path": audio_file  # Keep path for the pipeline
-        }
-    except Exception as e:
-        print(f"Error extracting audio features: {str(e)}")
-        raise ValueError(f"Failed to extract audio features: {str(e)}")
-
-def classify_genre(audio_data):
-    """Classify the genre of the audio using the loaded model."""
-    try:
-        # First attempt: Try using the pipeline if available
-        if 'genre_classifier' in globals():
-            results = genre_classifier(audio_data["path"])
-            # Transform pipeline results to our expected format
-            top_genres = [(result["label"], result["score"]) for result in results[:3]]
-            return top_genres
+        # Analyze music with MusicAnalyzer for emotion and theme analysis
+        music_analysis = music_analyzer.analyze_music(audio_file)
         
-        # Second attempt: Use manually loaded model components
-        elif 'genre_processor' in globals() and 'genre_model' in globals():
-            # Process audio input with feature extractor
-            inputs = genre_processor(
-                audio_data["waveform"], 
-                sampling_rate=audio_data["sample_rate"], 
+        # Override MusicAnalyzer's time signature with the one detected by BeatAnalyzer
+        music_analysis["rhythm_analysis"]["estimated_time_signature"] = time_signature
+        
+        # Extract key information
+        tempo = music_analysis["rhythm_analysis"]["tempo"]
+        emotion = music_analysis["emotion_analysis"]["primary_emotion"]
+        theme = music_analysis["theme_analysis"]["primary_theme"]
+        
+        # Use genre classification directly instead of pipeline
+        if genre_model is not None and genre_feature_extractor is not None:
+            # Resample audio to 16000 Hz for the genre model
+            y_16k = librosa.resample(y, orig_sr=sr, target_sr=16000)
+            
+            # Extract features
+            inputs = genre_feature_extractor(
+                y_16k, 
+                sampling_rate=16000, 
                 return_tensors="pt"
-            )
+            ).to(genre_model.device)
             
+            # Classify genre
             with torch.no_grad():
                 outputs = genre_model(**inputs)
-                predictions = outputs.logits.softmax(dim=-1)
-            
-            # Get the top 3 genres
-            values, indices = torch.topk(predictions, 3)
-            
-            # Map indices to genre labels
-            genre_labels = genre_model.config.id2label
-            
-            top_genres = []
-            for i, (value, index) in enumerate(zip(values[0], indices[0])):
-                genre = genre_labels[index.item()]
-                confidence = value.item()
-                top_genres.append((genre, confidence))
-            
-            return top_genres
+                logits = outputs.logits
+                probs = torch.nn.functional.softmax(logits, dim=-1)
+                
+            # Get top genres
+            values, indices = torch.topk(probs[0], k=5)
+            top_genres = [(genre_model.config.id2label[idx.item()], val.item()) for val, idx in zip(values, indices)]
+        else:
+            # Fallback if model loading failed
+            top_genres = [("Unknown", 1.0)]
+        
+        # Format genre results for display
+        genre_results_text = format_genre_results(top_genres)
+        primary_genre = top_genres[0][0]
         
+        # Override time signature for pop and disco genres to always be 4/4
+        if any(genre.lower() in primary_genre.lower() for genre in ['pop', 'disco']):
+            music_analysis["rhythm_analysis"]["estimated_time_signature"] = "4/4"
+            time_signature = "4/4"
         else:
-            raise ValueError("No genre classification model available")
-            
-    except Exception as e:
-        print(f"Error in genre classification: {str(e)}")
-        # Fallback: return a default genre if everything fails
-        return [("rock", 1.0)]
+            # Ensure time signature is one of the supported ones (4/4, 3/4, 6/8)
+            if time_signature not in ["4/4", "3/4", "6/8"]:
+                time_signature = "4/4"  # Default to 4/4 if unsupported
+                music_analysis["rhythm_analysis"]["estimated_time_signature"] = time_signature
+        
+        # Analyze beat patterns and create lyrics template using the time signature
+        beat_analysis = beat_analyzer.analyze_beat_pattern(audio_file, time_signature=time_signature, auto_detect=False)
+        lyric_templates = beat_analyzer.create_lyric_template(beat_analysis)
+        
+        # Store these in the music_analysis dict for use in lyrics generation
+        music_analysis["beat_analysis"] = beat_analysis
+        music_analysis["lyric_templates"] = lyric_templates
+        
+        # Prepare analysis summary
+        analysis_summary = f"""
+### Music Analysis Results
+
+**Duration:** {duration:.2f} seconds
+**Tempo:** {tempo:.1f} BPM
+**Time Signature:** {time_signature} (Confidence: {time_sig_result["confidence"]:.1%})
+**Key:** {music_analysis["tonal_analysis"]["key"]} {music_analysis["tonal_analysis"]["mode"]}
+**Primary Emotion:** {emotion}
+**Primary Theme:** {theme}
+**Top Genre:** {primary_genre}
+
+{genre_results_text}
+"""
 
-def detect_music(audio_data):
-    """Detect if the audio is music using the MIT AST model."""
-    try:
-        # First attempt: Try using the pipeline if available
-        if 'music_detector' in globals():
-            results = music_detector(audio_data["path"])
-            # Look for music-related classes in the results
-            music_confidence = 0.0
-            for result in results:
-                label = result["label"].lower()
-                if any(music_term in label for music_term in ["music", "song", "singing", "instrument"]):
-                    music_confidence = max(music_confidence, result["score"])
-            return music_confidence >= 0.2, results
+        # Add beat analysis summary
+        if lyric_templates:
+            analysis_summary += f"""
+### Beat Analysis
+
+**Total Phrases:** {len(lyric_templates)}
+**Average Beats Per Phrase:** {np.mean([t['num_beats'] for t in lyric_templates]):.1f}
+**Beat Pattern Examples:** 
+- Phrase 1: {lyric_templates[0]['stress_pattern'] if lyric_templates else 'N/A'}
+- Phrase 2: {lyric_templates[1]['stress_pattern'] if len(lyric_templates) > 1 else 'N/A'}
+"""
         
-        # Second attempt: Use manually loaded model components
-        elif 'music_processor' in globals() and 'music_model' in globals():
-            # Process audio input with feature extractor
-            inputs = music_processor(
-                audio_data["waveform"], 
-                sampling_rate=audio_data["sample_rate"], 
-                return_tensors="pt"
-            )
-            
-            with torch.no_grad():
-                outputs = music_model(**inputs)
-                predictions = outputs.logits.softmax(dim=-1)
-            
-            # Get the top predictions
-            values, indices = torch.topk(predictions, 5)
-            
-            # Map indices to labels
-            labels = music_model.config.id2label
-            
-            # Check for music-related classes
-            music_confidence = 0.0
-            results = []
-            
-            for i, (value, index) in enumerate(zip(values[0], indices[0])):
-                label = labels[index.item()].lower()
-                score = value.item()
-                results.append({"label": label, "score": score})
-                
-                if any(music_term in label for music_term in ["music", "song", "singing", "instrument"]):
-                    music_confidence = max(music_confidence, score)
-            
-            return music_confidence >= 0.2, results
-            
+        # Check if genre is supported for lyrics generation
+        # Use the supported_genres list from BeatAnalyzer
+        genre_supported = any(genre.lower() in primary_genre.lower() for genre in beat_analyzer.supported_genres)
+        
+        # Generate lyrics only for supported genres
+        if genre_supported:
+            lyrics = generate_lyrics(music_analysis, primary_genre, duration)
+            beat_match_analysis = analyze_lyrics_rhythm_match(lyrics, lyric_templates, primary_genre)
         else:
-            raise ValueError("No music detection model available")
-            
+            supported_genres_str = ", ".join([genre.capitalize() for genre in beat_analyzer.supported_genres])
+            lyrics = f"Lyrics generation is only supported for the following genres: {supported_genres_str}.\n\nDetected genre '{primary_genre}' doesn't have strong syllable-to-beat patterns required for our lyric generation algorithm."
+            beat_match_analysis = "Lyrics generation not available for this genre."
+        
+        return analysis_summary, lyrics, tempo, time_signature, emotion, theme, primary_genre, beat_match_analysis
+    
     except Exception as e:
-        print(f"Error in music detection: {str(e)}")
-        return False, []
+        error_msg = f"Error processing audio: {str(e)}"
+        print(error_msg)
+        return error_msg, None, None, None, None, None, None, None
 
-def detect_beats(y, sr):
-    """Enhanced beat detection with adaptive threshold analysis, improved time signature detection and scientific confidence metrics."""
-    # STEP 1: Improved pre-processing with robustness for quiet sections
-    # Apply a small floor to avoid division-by-zero issues
-    y = np.clip(y, 1e-10, None)  # Prevent extreme quiet sections from causing NaN
-    
-    # Separate harmonic and percussive components
-    y_harmonic, y_percussive = librosa.effects.hpss(y)
-    
-    # Generate multiple onset envelopes with smoothing for stability
-    onset_env_full = librosa.onset.onset_strength(y=y, sr=sr)
-    onset_env_perc = librosa.onset.onset_strength(y=y_percussive, sr=sr)
-    
-    # Apply small smoothing to handle quiet sections
-    onset_env_full = np.maximum(onset_env_full, 1e-6)  # Minimum threshold to avoid NaN
-    onset_env_perc = np.maximum(onset_env_perc, 1e-6)
-    
-    # Create weighted combination
-    combined_onset = onset_env_full * 0.3 + onset_env_perc * 0.7
-    
-    # STEP 2: Multi-strategy tempo and beat detection with confidence tracking
-    tempo_candidates = []
-    beat_candidates = []
-    consistency_metrics = []
-    
-    # Strategy 1: Standard detection
-    tempo1, beats1 = librosa.beat.beat_track(
-        onset_envelope=combined_onset, 
-        sr=sr,
-        tightness=100  # More sensitive tracking
-    )
-    tempo_candidates.append(tempo1)
-    beat_candidates.append(beats1)
-    
-    # Calculate autocorrelation-based confidence for this tempo
-    ac = librosa.autocorrelate(combined_onset)
-    estimated_period = int(sr * 60.0 / (tempo1 * librosa.get_duration(y=y, sr=sr) / len(combined_onset)))
-    if estimated_period < len(ac) and estimated_period > 0:
-        # Measure peak height relative to surroundings
-        local_ac = ac[max(0, estimated_period-5):min(len(ac), estimated_period+6)]
-        if np.max(local_ac) > 0:
-            tempo1_confidence = ac[estimated_period] / np.max(local_ac)
-        else:
-            tempo1_confidence = 0.5
-    else:
-        tempo1_confidence = 0.5
-    consistency_metrics.append(tempo1_confidence)
-    
-    # Strategy 2: Try with different tempo range for complex signatures
-    tempo2, beats2 = librosa.beat.beat_track(
-        onset_envelope=combined_onset,
-        sr=sr,
-        tightness=100,
-        start_bpm=60  # Lower starting BPM helps find different time signatures
-    )
-    tempo_candidates.append(tempo2)
-    beat_candidates.append(beats2)
-    
-    # Calculate confidence for the second tempo estimate
-    estimated_period2 = int(sr * 60.0 / (tempo2 * librosa.get_duration(y=y, sr=sr) / len(combined_onset)))
-    if estimated_period2 < len(ac) and estimated_period2 > 0:
-        local_ac2 = ac[max(0, estimated_period2-5):min(len(ac), estimated_period2+6)]
-        if np.max(local_ac2) > 0:
-            tempo2_confidence = ac[estimated_period2] / np.max(local_ac2)
-        else:
-            tempo2_confidence = 0.5
-    else:
-        tempo2_confidence = 0.5
-    consistency_metrics.append(tempo2_confidence)
-    
-    # Strategy 3: Use dynamic programming for beat tracking
+def generate_lyrics(music_analysis, genre, duration):
     try:
-        tempo3, beats3 = librosa.beat.beat_track(
-            onset_envelope=combined_onset,
-            sr=sr,
-            tightness=300,  # Higher tightness for more structured detection
-            trim=False
-        )
-        tempo_candidates.append(tempo3)
-        beat_candidates.append(beats3)
+        # Extract meaningful information for context
+        tempo = music_analysis["rhythm_analysis"]["tempo"]
+        key = music_analysis["tonal_analysis"]["key"]
+        mode = music_analysis["tonal_analysis"]["mode"]
+        emotion = music_analysis["emotion_analysis"]["primary_emotion"]
+        theme = music_analysis["theme_analysis"]["primary_theme"]
+        
+        # Get beat analysis and templates
+        lyric_templates = music_analysis.get("lyric_templates", [])
         
-        # Calculate DP-based confidence
-        if len(beats3) > 1:
-            beat_times3 = librosa.frames_to_time(beats3, sr=sr)
-            intervals3 = np.diff(beat_times3)
-            tempo3_consistency = 1.0 / (1.0 + np.std(intervals3)/np.mean(intervals3)) if np.mean(intervals3) > 0 else 0.5
+        # Define num_phrases here to ensure it's available in all code paths
+        num_phrases = len(lyric_templates) if lyric_templates else 4
+        
+        # Verify LLM is loaded
+        if llm_model is None or llm_tokenizer is None:
+            return "Error: LLM model not properly loaded"
+
+        # If no templates, fall back to original method
+        if not lyric_templates:
+            # Simplified prompt
+            prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM. The emotion is {emotion} and theme is {theme}.
+
+ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
+"""
         else:
-            tempo3_consistency = 0.5
-        consistency_metrics.append(tempo3_consistency)
-    except Exception:
-        # Skip if this approach fails
-        pass
-    
-    # Select the best strategy based on improved consistency measurement
-    beat_consistency = []
-    for i, beats in enumerate(beat_candidates):
-        if len(beats) <= 1:
-            beat_consistency.append(0)
-            continue
+            # Calculate the typical syllable range for this genre
+            if num_phrases > 0:
+                # Get max syllables per line from templates
+                max_syllables = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates[0].get('max_expected') else 7
+                min_syllables = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates[0].get('min_expected') else 2
+                avg_syllables = (min_syllables + max_syllables) // 2
+            else:
+                min_syllables = 2
+                max_syllables = 7
+                avg_syllables = 4
+            
+            # Create random examples based on the song's theme and emotion
+            # to avoid the LLM copying our examples directly
+            example_themes = [
+                {"emotion": "love", "fragments": ["I see your face", "across the room", "my heart beats fast", "can't look away"]},
+                {"emotion": "sadness", "fragments": ["tears fall like rain", "on empty streets", "memories fade", "into the dark"]},
+                {"emotion": "nostalgia", "fragments": ["old photographs", "dusty and worn", "remind me of when", "we were young"]},
+                {"emotion": "hope", "fragments": ["dawn breaks through clouds", "new day begins", "darkness recedes", "light fills my soul"]},
+                {"emotion": "longing", "fragments": ["miles apart now", "under same stars", "thinking of you", "across the distance"]}
+            ]
+            
+            # Select a theme that doesn't match the song's emotion to avoid copying
+            selected_themes = [t for t in example_themes if t["emotion"].lower() != emotion.lower()]
+            if not selected_themes:
+                selected_themes = example_themes
+                
+            import random
+            example_theme = random.choice(selected_themes)
+            example_fragments = example_theme["fragments"]
+            random.shuffle(example_fragments)  # Randomize order
+            
+            # Create example 1 - grammatical connection with conjunction
+            ex1_line1 = example_fragments[0] if len(example_fragments) > 0 else "The morning sun"
+            ex1_line2 = example_fragments[1] if len(example_fragments) > 1 else "breaks through clouds"
+            ex1_line3 = example_fragments[2] if len(example_fragments) > 2 else "as birds begin"
+            ex1_line4 = example_fragments[3] if len(example_fragments) > 3 else "their dawn chorus"
+            
+            # Create example 2 - prepositional connection
+            ex2_fragments = [
+                "She walks alone",
+                "through crowded streets",
+                "with memories",
+                "of better days"
+            ]
+            random.shuffle(ex2_fragments)
             
-        times = librosa.frames_to_time(beats, sr=sr)
-        intervals = np.diff(times)
+            # Create a more direct prompt with examples and specific syllable count guidance
+            prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM.
+
+PRIMARY THEME: {theme}
+EMOTION: {emotion}
+
+I need EXACTLY {num_phrases} lines of lyrics with these STRICT requirements:
+
+CRITICAL INSTRUCTIONS:
+1. EXTREMELY SHORT LINES: Each line MUST be between {min_syllables}-{max_syllables} syllables MAXIMUM
+2. ENFORCE BREVITY: NO exceptions to the syllable limit - not a single line should exceed {max_syllables} syllables
+3. FRAGMENT STYLE: Use sentence fragments and short phrases instead of complete sentences
+4. CONNECTED THOUGHTS: Use prepositions and conjunctions at the start of lines to connect ideas
+5. SIMPLE WORDS: Choose one or two-syllable words whenever possible
+6. CONCRETE IMAGERY: Use specific, tangible details rather than abstract concepts
+7. NO CLICHÉS: Avoid common phrases like "time slips away" or "memories fade"
+8. ONE THOUGHT PER LINE: Express just one simple idea in each line
+
+FORMAT:
+- Write exactly {num_phrases} short text lines
+- No annotations, explanations, or line numbers
+- Do not count syllables in the output
+
+IMPORTANT: If you can't express an idea in {max_syllables} or fewer syllables, break it across two lines or choose a simpler way to express it.
+
+===== EXAMPLES OF CORRECT LENGTH =====
+
+Example 1 (short fragments connected by flow):
+Cold tea cup (3 syllables)
+on windowsill (3 syllables)
+cat watches rain (3 syllables)
+through foggy glass (3 syllables)
+
+Example 2 (prepositional connections):
+Keys dropped here (3 syllables)
+by the front door (3 syllables)
+where shoes pile up (3 syllables)
+since you moved in (3 syllables)
+
+DO NOT copy my examples. Create ENTIRELY NEW lyrics about {theme} with {emotion} feeling.
+
+REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most important rule!
+"""
+
+        # Generate lyrics using the LLM model
+        messages = [
+            {"role": "user", "content": prompt}
+        ]
+        
+        # Apply chat template
+        text = llm_tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        
+        # Tokenize and move to model device
+        model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
+        
+        # Generate with optimized parameters
+        generated_ids = llm_model.generate(
+            **model_inputs,
+            max_new_tokens=1024,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.2,
+            pad_token_id=llm_tokenizer.eos_token_id
+        )
+        
+        # Decode the output
+        output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+        lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+        
+        # ULTRA AGGRESSIVE CLEANING - COMPLETELY REVISED
+        # ------------------------------------------------
+        
+        # 1. First, look for any standard dividers that might separate thinking from lyrics
+        divider_patterns = [
+            r'Here are the lyrics:',
+            r'Here is my song:',
+            r'The lyrics:',
+            r'My lyrics:',
+            r'Song lyrics:',
+            r'\*\*\*+',
+            r'===+',
+            r'---+',
+            r'```',
+            r'Lyrics:'
+        ]
+        
+        for pattern in divider_patterns:
+            matches = re.finditer(pattern, lyrics, re.IGNORECASE)
+            for match in matches:
+                # Keep only content after the divider
+                lyrics = lyrics[match.end():].strip()
+        
+        # 2. Remove thinking tags completely before splitting into lines
+        lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL)
+        lyrics = re.sub(r'\[thinking\].*?\[/thinking\]', '', lyrics, flags=re.DOTALL)
+        lyrics = re.sub(r'<think>', '', lyrics, flags=re.DOTALL)
+        lyrics = re.sub(r'</think>', '', lyrics, flags=re.DOTALL)
+        lyrics = re.sub(r'\[thinking\]', '', lyrics, flags=re.DOTALL)
+        lyrics = re.sub(r'\[/thinking\]', '', lyrics, flags=re.DOTALL)
+        
+        # 3. Split text into lines for aggressive line-by-line filtering
+        lines = lyrics.strip().split('\n')
+        clean_lines = []
+        
+        # 4. Define comprehensive patterns for non-lyrical content
+        non_lyric_patterns = [
+            # Meta-commentary
+            r'^(note|thinking|thoughts|let me|i will|i am going|i would|i can|i need to|i have to|i should|let\'s|here|now)',
+            r'^(first|second|third|next|finally|importantly|remember|so|ok|okay|as requested|as asked|considering)',
+            # Explanations
+            r'syllable[s]?|phrase|rhythm|beats?|tempo|bpm|instruction|follow|alignment|match|corresponding',
+            r'verses?|chorus|bridge|section|stanza|part|template|format|pattern|example',
+            r'requirements?|guidelines?|song structure|stressed|unstressed',
+            # Technical language
+            r'generated|output|result|provide|create|write|draft|version',
+            # Annotations and numbering
+            r'^line \d+|^\d+[\.\):]|^\[\w+\]|^[\*\-\+] ',
+            # Questions or analytical statements
+            r'\?$|analysis|evaluate|review|check|ensure',
+            # Instruction-like statements
+            r'make sure|please note|important|notice|pay attention'
+        ]
         
-        # Comprehensive consistency metrics with better statistical justification
-        if np.mean(intervals) > 0:
-            # Combine coefficient of variation with autocorrelation confidence
-            cv = np.std(intervals)/np.mean(intervals)  # Lower is better
+        # 5. Identify which lines are likely actual lyrics vs non-lyrics
+        for line in lines:
+            line = line.strip()
             
-            # Add adjustments for beat count reasonability
-            duration = librosa.get_duration(y=y, sr=sr)
-            expected_beats = duration * tempo_candidates[i] / 60
-            beats_ratio = min(len(beats) / expected_beats, expected_beats / len(beats)) if expected_beats > 0 else 0.5
+            # Skip empty lines or lines with just spaces/tabs
+            if not line or line.isspace():
+                continue
             
-            # Combine metrics with scientific weighting
-            consistency = (0.7 * (1.0 / (1.0 + cv))) + (0.3 * consistency_metrics[i]) + (0.2 * beats_ratio)
-            beat_consistency.append(consistency)
-        else:
-            beat_consistency.append(0)
-    
-    # Select best model with scientific confidence calculation
-    if beat_consistency:
-        best_idx = np.argmax(beat_consistency)
-        best_confidence = beat_consistency[best_idx] * 100  # Convert to percentage
-    else:
-        best_idx = 0
-        best_confidence = 50.0  # Default 50% confidence if no good metrics
-    
-    tempo = tempo_candidates[best_idx]
-    beat_frames = beat_candidates[best_idx]
-    
-    # Calculate beat entropy - scientific measure of beat pattern predictability
-    beat_entropy = 0.0
-    if len(beat_frames) > 2:
-        times = librosa.frames_to_time(beat_frames, sr=sr)
-        intervals = np.diff(times)
-        
-        # Quantize intervals to detect patterns
-        if len(intervals) > 0 and np.std(intervals) > 0:
-            quantized = np.round(intervals / np.min(intervals))
-            # Count frequencies of each interval type
-            unique, counts = np.unique(quantized, return_counts=True)
-            probs = counts / np.sum(counts)
-            # Calculate Shannon entropy
-            beat_entropy = -np.sum(probs * np.log2(probs))
-    
-    # STEP 3: Improved beat strength extraction
-    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
-    
-    # Vectorized extraction of beat strengths with improved error handling
-    beat_strengths = []
-    if len(beat_frames) > 0:
-        # Filter out beat frames that exceed the onset envelope length
-        valid_frames = [frame for frame in beat_frames if frame < len(combined_onset)]
-        if valid_frames:
-            # Vectorized extraction with normalization for consistency
-            raw_strengths = combined_onset[valid_frames]
+            # Skip lines that match any non-lyric pattern
+            should_skip = False
+            for pattern in non_lyric_patterns:
+                if re.search(pattern, line.lower()):
+                    should_skip = True
+                    break
             
-            # Normalize strengths to [0,1] for scientific consistency
-            if np.max(raw_strengths) > 0:
-                normalized_strengths = raw_strengths / np.max(raw_strengths)
-            else:
-                normalized_strengths = np.ones_like(raw_strengths)
+            if should_skip:
+                continue
             
-            beat_strengths = normalized_strengths.tolist()
+            # Skip section headers
+            if (line.startswith('[') and ']' in line) or (line.startswith('(') and ')' in line and len(line) < 20):
+                continue
             
-            # Handle remaining beats with interpolation instead of constant values
-            if len(beat_times) > len(beat_strengths):
-                missing_count = len(beat_times) - len(beat_strengths)
-                # Use linear interpolation for more scientific approach
-                if beat_strengths:
-                    last_strength = beat_strengths[-1]
-                    decay_factor = 0.9  # Gradual decay for trailing beats
-                    beat_strengths.extend([last_strength * (decay_factor ** (i+1)) 
-                                        for i in range(missing_count)])
-                else:
-                    beat_strengths = [1.0] * len(beat_times)
-        else:
-            beat_strengths = [1.0] * len(beat_times)
-    else:
-        beat_strengths = [1.0] * len(beat_times)
-    
-    # STEP 4: Calculate intervals between beats
-    intervals = np.diff(beat_times).tolist() if len(beat_times) > 1 else []
-    
-    # STEP 5: Improved time signature detection with scientific confidence
-    # Start with default assumption
-    time_signature = 4
-    time_sig_confidence = 70.0  # Default confidence
-    
-    if len(beat_strengths) > 8:
-        # Use autocorrelation to find periodicity in beat strengths
-        if len(beat_strengths) > 4:
-            # Normalize beat strengths for better pattern detection
-            norm_strengths = np.array(beat_strengths)
-            if np.max(norm_strengths) > 0:
-                norm_strengths = norm_strengths / np.max(norm_strengths)
+            # Skip lines that look like annotations (not prose-like)
+            if ':' in line and not any(word in line.lower() for word in ['like', 'when', 'where', 'how', 'why', 'what']):
+                if len(line.split(':')[0]) < 15:  # Short prefixes followed by colon are likely annotations
+                    continue
+            
+            # Skip very short lines that aren't likely to be lyrics (unless it's just a few words which could be valid)
+            if len(line) < 3:
+                continue
             
-            # Compute autocorrelation to find periodic patterns (N)
-            ac = librosa.autocorrelate(norm_strengths, max_size=len(norm_strengths)//2)
+            # Skip lines that are numbered or bulleted
+            if re.match(r'^\d+\.|\(#\d+\)|\d+\)', line):
+                continue
+            
+            # Skip markdown-style emphasis or headers
+            if re.match(r'^#{1,6} |^\*\*|^__', line):
+                continue
             
-            # Find peaks in autocorrelation (indicates periodicity)
-            if len(ac) > 3:  # Need enough data for peak picking
-                # Find peaks after lag 0
-                peaks = librosa.util.peak_pick(ac[1:], pre_max=1, post_max=1, pre_avg=1, post_avg=1, delta=0.1, wait=1)
-                peaks = peaks + 1  # Adjust for the removed lag 0
+            # Skip lines with think tags
+            if '<think>' in line.lower() or '</think>' in line.lower() or '[thinking]' in line.lower() or '[/thinking]' in line.lower():
+                continue
                 
-                if len(peaks) > 0:
-                    # Get the first significant peak position (cycle length N)
-                    peak_idx = peaks[0]
-                    N = peak_idx
+            # Add this line as it passed all filters
+            clean_lines.append(line)
+        
+        # 6. Additional block-level filters for common patterns
+        # Check beginning of lyrics for common prefixes
+        if clean_lines and any(clean_lines[0].lower().startswith(prefix) for prefix in 
+                            ['here are', 'these are', 'below are', 'following are']):
+            clean_lines = clean_lines[1:]  # Skip the first line
+        
+        # 7. Process blocks of lines to detect explanation blocks
+        if len(clean_lines) > 3:
+            # Check for explanation blocks at the beginning
+            first_three = ' '.join(clean_lines[:3]).lower()
+            if any(term in first_three for term in ['i will', 'i have created', 'i\'ll provide', 'i\'ll write']):
+                # This looks like an explanation, skip the first few lines
+                start_idx = 0
+                for i, line in enumerate(clean_lines):
+                    if i >= 3 and not any(term in line.lower() for term in ['i will', 'created', 'write', 'provide']):
+                        start_idx = i
+                        break
+                clean_lines = clean_lines[start_idx:]
+            
+            # Check for explanation blocks at the end
+            last_three = ' '.join(clean_lines[-3:]).lower()
+            if any(term in last_three for term in ['hope this', 'these lyrics', 'as you can see', 'this song', 'i have']):
+                # This looks like an explanation at the end, truncate
+                end_idx = len(clean_lines)
+                for i in range(len(clean_lines) - 1, max(0, len(clean_lines) - 4), -1):
+                    if i < len(clean_lines) and not any(term in clean_lines[i].lower() for term in 
+                                                    ['hope', 'these lyrics', 'as you can see', 'this song']):
+                        end_idx = i + 1
+                        break
+                clean_lines = clean_lines[:end_idx]
+        
+        # 8. Cleanup - Remove remaining annotations or thinking
+        for i in range(len(clean_lines)):
+            # Remove trailing thoughts/annotations
+            clean_lines[i] = re.sub(r'\s+//.*$', '', clean_lines[i])
+            clean_lines[i] = re.sub(r'\s+\(.*?\)$', '', clean_lines[i])
+            
+            # Remove thinking tags completely
+            clean_lines[i] = re.sub(r'<think>.*?</think>', '', clean_lines[i], flags=re.DOTALL)
+            clean_lines[i] = re.sub(r'\[thinking\].*?\[/thinking\]', '', clean_lines[i], flags=re.DOTALL)
+            clean_lines[i] = re.sub(r'<think>', '', clean_lines[i])
+            clean_lines[i] = re.sub(r'</think>', '', clean_lines[i])
+            clean_lines[i] = re.sub(r'\[thinking\]', '', clean_lines[i])
+            clean_lines[i] = re.sub(r'\[/thinking\]', '', clean_lines[i])
+            
+            # Remove syllable count annotations
+            clean_lines[i] = re.sub(r'\s*\(\d+\s*syllables?\)', '', clean_lines[i])
+        
+        # 9. Filter out any remaining empty lines after tag removal
+        clean_lines = [line for line in clean_lines if line.strip() and not line.isspace()]
+        
+        # 10. NEW: Apply strict syllable enforcement - split or truncate lines that are too long
+        # This is a critical step to ensure no line exceeds our max syllable count
+        if lyric_templates:
+            max_allowed_syllables = min(7, max([t.get('max_expected', 6) for t in lyric_templates]))
+        else:
+            max_allowed_syllables = 6
+            
+        clean_lines = enforce_syllable_limits(clean_lines, max_allowed_syllables)
+        
+        # 11. NEW: Check for template copying or clichéd phrases
+        cliched_patterns = [
+            r'moonlight (shimmers?|falls?|dances?)',
+            r'shadows? (dance|play|fall|stretch)',
+            r'time slips? away',
+            r'whispers? (fade|in the)',
+            r'silence speaks',
+            r'stars? shine',
+            r'hearts? beat',
+            r'footsteps (fade|echo)',
+            r'gentle wind',
+            r'(old|empty) (roads?|chair)',
+            r'night (holds?|falls?)',
+            r'memories fade',
+            r'dreams (linger|drift)'
+        ]
+        
+        cliche_count = 0
+        for line in clean_lines:
+            for pattern in cliched_patterns:
+                if re.search(pattern, line.lower()):
+                    cliche_count += 1
+                    break
+        
+        # Calculate percentage of clichéd lines
+        if clean_lines:
+            cliche_percentage = (cliche_count / len(clean_lines)) * 100
+        else:
+            cliche_percentage = 0
+            
+        # 12. If we have lyric templates, ensure we have the correct number of lines
+        if lyric_templates:
+            num_required = len(lyric_templates)
+            
+            # If we have too many lines, keep just the best ones
+            if len(clean_lines) > num_required:
+                # Keep the first num_required lines
+                clean_lines = clean_lines[:num_required]
+            
+            # If we don't have enough lines, generate placeholders that fit the syllable count
+            while len(clean_lines) < num_required:
+                i = len(clean_lines)
+                if i < len(lyric_templates):
+                    template = lyric_templates[i]
+                    target_syllables = min(max_allowed_syllables - 1, (template.get('min_expected', 2) + template.get('max_expected', 6)) // 2)
+                    
+                    # Generate more creative, contextual placeholders with specificity
+                    # Avoid clichés like "moonlight shimmers" or "time slips away"
+                    specific_placeholders = {
+                        # 2-3 syllables - specific, concrete phrases
+                        2: [
+                            "Phone rings twice", 
+                            "Dogs bark loud", 
+                            "Keys dropped here", 
+                            "Train rolls by", 
+                            "Birds take flight"
+                        ],
+                        # 3-4 syllables - specific contexts
+                        3: [
+                            "Coffee gets cold", 
+                            "Fan blades spin", 
+                            "Pages turn slow", 
+                            "Neighbors talk", 
+                            "Radio hums soft"
+                        ],
+                        # 4-5 syllables - specific details
+                        4: [
+                            "Fingers tap table", 
+                            "Taxi waits in rain", 
+                            "Laptop screen blinks", 
+                            "Ring left on sink", 
+                            "Church bells ring loud"
+                        ],
+                        # 5-6 syllables - context rich
+                        5: [
+                            "Letters with no stamps", 
+                            "Watch shows wrong time", 
+                            "Jeans with torn knees", 
+                            "Dog barks next door", 
+                            "Smoke alarm beeps"
+                        ]
+                    }
                     
-                    # Calculate confidence based on peak prominence
-                    if peak_idx < len(ac):
-                        peak_height = ac[peak_idx]
-                        local_prominence = peak_height / np.mean(ac[max(0, peak_idx-2):min(len(ac), peak_idx+3)])
-                        time_sig_confidence = min(95, 60 + 35 * local_prominence)  # Scale between 60-95%
+                    # Make theme and emotion specific placeholders to add to the list
+                    theme_specific = []
+                    if theme.lower() in ["love", "relationship", "romance"]:
+                        theme_specific = ["Lipstick on glass", "Text left on read", "Scent on your coat"]
+                    elif theme.lower() in ["loss", "grief", "sadness"]:
+                        theme_specific = ["Chair sits empty", "Photos face down", "Clothes in closet"]
+                    elif theme.lower() in ["hope", "inspiration", "triumph"]:
+                        theme_specific = ["Seeds start to grow", "Finish line waits", "New day breaks through"]
                     
-                    # Map common cycle lengths to time signatures with improved musical theory
-                    if N == 2:
-                        time_signature = 2  # Clear binary meter (2/4, 2/2, etc.)
-                        time_sig_confidence += 5  # Boost for simple meter
-                    elif N == 3:
-                        time_signature = 3  # Clear triple meter (3/4, 3/8, etc.)
-                        time_sig_confidence += 5  # Boost for simple meter
-                    elif 4 <= N <= 5:
-                        time_signature = N  # Direct mapping for common cases (4/4 or 5/4)
-                    elif N == 6:
-                        # Could be 6/8 (compound duple) or 3/4 with subdivisions
-                        # Further analyze to distinguish
-                        group_3_count = 0
-                        for i in range(0, len(beat_strengths) - 6, 3):
-                            if i + 2 < len(beat_strengths):
-                                if beat_strengths[i] > beat_strengths[i+1] and beat_strengths[i] > beat_strengths[i+2]:
-                                    group_3_count += 1
+                    # Get the closest matching syllable group
+                    closest_group = min(specific_placeholders.keys(), key=lambda k: abs(k - target_syllables))
+                    
+                    # Create pool of available placeholders from both specific and theme specific options
+                    all_placeholders = specific_placeholders[closest_group] + theme_specific
+                    
+                    # Choose a placeholder that hasn't been used yet
+                    available_placeholders = [p for p in all_placeholders if p not in clean_lines]
+                    
+                    if available_placeholders:
+                        # Use modulo for more variation
+                        idx = (i * 17 + len(clean_lines) * 13) % len(available_placeholders)
+                        placeholder = available_placeholders[idx]
+                    else:
+                        # If we've used all placeholders, create something random and specific
+                        subjects = ["Car", "Dog", "Kid", "Clock", "Phone", "Tree", "Book", "Door", "Light"]
+                        verbs = ["waits", "moves", "stops", "falls", "breaks", "turns", "sleeps"]
                         
-                        group_2_count = 0
-                        for i in range(0, len(beat_strengths) - 4, 2):
-                            if i + 1 < len(beat_strengths):
-                                if beat_strengths[i] > beat_strengths[i+1]:
-                                    group_2_count += 1
-                                    
-                        # Determine if it's grouped in 2s or 3s
-                        time_signature = 3 if group_3_count > group_2_count else 6
-                    elif N == 8:
-                        time_signature = 4  # 4/4 with embellishments
-                    elif N == 5 or N == 7:
-                        time_signature = N  # Odd time signatures like 5/4 or 7/8
-    
-    # STEP 6: Enhanced phrase detection with adaptive thresholds and scientific justification
-    phrases = []
-    current_phrase = []
-    
-    if len(beat_times) > 0:
-        # Calculate adaptive thresholds using percentiles instead of fixed ratios
-        if len(beat_strengths) > 4:
-            # Define thresholds based on distribution rather than fixed values
-            strong_threshold = np.percentile(beat_strengths, 75)  # Top 25% are "strong" beats
-            # For gaps, calculate significant deviation using z-scores if we have intervals
-            if intervals:
-                mean_interval = np.mean(intervals)
-                std_interval = np.std(intervals)
-                # A significant gap is > 1.5 standard deviations above mean (95th percentile)
-                significant_gap = mean_interval + (1.5 * std_interval) if std_interval > 0 else mean_interval * 1.3
-            else:
-                significant_gap = 0
-        else:
-            # Fallback for limited data
-            strong_threshold = np.max(beat_strengths) * 0.8 if beat_strengths else 1.0
-            significant_gap = 0
-    
-        # Identify phrase boundaries with improved musical heuristics
-        for i in range(len(beat_times)):
-            current_phrase.append(i)
-            
-            # Check for phrase boundary conditions
-            if i < len(beat_times) - 1:
-                # Strong beat coming up (using adaptive threshold)
-                is_stronger_next = False
-                if i < len(beat_strengths) - 1:
-                    is_stronger_next = beat_strengths[i+1] > strong_threshold and beat_strengths[i+1] > beat_strengths[i] * 1.1
-                
-                # Significant gap (using adaptive threshold)
-                is_longer_gap = False
-                if i < len(beat_times) - 1 and intervals and i < len(intervals):
-                    is_longer_gap = intervals[i] > significant_gap
-                
-                # Measure boundary based on time signature
-                is_measure_boundary = (i + 1) % time_signature == 0 and i > 0
-                
-                # Check for significant dip in onset strength (phrase boundary often has reduced energy)
-                is_energy_dip = False
-                if i < len(beat_strengths) - 1:
-                    onset_ratio = beat_strengths[i+1] / max(beat_strengths[i], 0.001)
-                    is_energy_dip = onset_ratio < 0.6
-                
-                # Combined decision for phrase boundary with scientific weighting
-                phrase_boundary_score = (
-                    (1.5 if is_stronger_next else 0) + 
-                    (2.0 if is_longer_gap else 0) + 
-                    (1.0 if is_measure_boundary else 0) +
-                    (0.5 if is_energy_dip else 0)
-                )
-                
-                if (phrase_boundary_score >= 1.5 and len(current_phrase) >= 2) or \
-                   (is_measure_boundary and len(current_phrase) >= time_signature):
-                    phrases.append(current_phrase)
-                    current_phrase = []
-    
-    # Add the last phrase if not empty
-    if current_phrase and len(current_phrase) >= 2:
-        phrases.append(current_phrase)
-    
-    # Ensure we have at least one phrase
-    if not phrases and len(beat_times) >= 2:
-        # Default to grouping by measures based on detected time signature
-        for i in range(0, len(beat_times), time_signature):
-            end = min(i + time_signature, len(beat_times))
-            if end - i >= 2:  # Ensure at least 2 beats per phrase
-                phrases.append(list(range(i, end)))
-    
-    # Calculate beat periodicity (average time between beats)
-    beat_periodicity = np.mean(intervals) if intervals else (60 / tempo)
-    
-    # Return enhanced results with scientific confidence metrics
-    return {
-        "tempo": tempo,
-        "tempo_confidence": best_confidence,  # New scientific confidence metric
-        "time_signature": time_signature,
-        "time_sig_confidence": time_sig_confidence,  # New scientific confidence metric
-        "beat_frames": beat_frames,
-        "beat_times": beat_times,
-        "beat_count": len(beat_times),
-        "beat_strengths": beat_strengths,
-        "intervals": intervals,
-        "phrases": phrases,
-        "beat_periodicity": beat_periodicity,
-        "beat_entropy": beat_entropy  # New scientific measure of rhythm complexity
-    }
-
-def detect_beats_and_subbeats(y, sr, subdivision=4):
-    """
-    Detect main beats and interpolate subbeats between consecutive beats.
-    
-    Parameters:
-        y: Audio time series
-        sr: Sample rate
-        subdivision: Number of subdivisions between beats (default: 4 for quarter beats)
+                        # Ensure randomness with seed that changes with each call
+                        import random
+                        random.seed(len(clean_lines) * 27 + i * 31)
+                        
+                        subj = random.choice(subjects)
+                        verb = random.choice(verbs)
+                        
+                        placeholder = f"{subj} {verb}"
+                else:
+                    placeholder = "Page turns slow"
+                    
+                clean_lines.append(placeholder)
         
-    Returns:
-        Dictionary containing beat times, subbeat times, and tempo information
-    """
-    # Detect main beats using librosa
-    try:
-        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
-        beat_times = librosa.frames_to_time(beat_frames, sr=sr)
+        # Assemble final lyrics
+        final_lyrics = '\n'.join(clean_lines)
         
-        # Convert numpy values to native Python types
-        if isinstance(tempo, np.ndarray) or isinstance(tempo, np.number):
-            tempo = float(tempo)
+        # Add a warning if we detected too many clichés
+        if cliche_percentage >= 40:
+            final_lyrics = f"""WARNING: These lyrics contain several overused phrases and clichés.
+Try regenerating for more original content.
+
+{final_lyrics}"""
             
-        # Convert beat_times to a list of floats
-        if isinstance(beat_times, np.ndarray):
-            beat_times = [float(t) for t in beat_times]
+        # 13. Final sanity check - if we have nothing or garbage, return an error
+        if not final_lyrics or len(final_lyrics) < 10:
+            return "The model generated only thinking content but no actual lyrics. Please try again."
+        
+        return final_lyrics
+    
     except Exception as e:
-        print(f"Error in beat detection: {e}")
-        # Default fallbacks
-        tempo = 120.0
-        beat_times = []
+        error_msg = f"Error generating lyrics: {str(e)}"
+        print(error_msg)
+        return error_msg
+
+def analyze_lyrics_rhythm_match(lyrics, lyric_templates, genre="pop"):
+    """Analyze how well the generated lyrics match the beat patterns and syllable requirements"""
+    if not lyric_templates or not lyrics:
+        return "No beat templates or lyrics available for analysis."
     
-    # Create subbeats by interpolating between main beats
-    subbeat_times = []
+    # Split lyrics into lines
+    lines = lyrics.strip().split('\n')
+    lines = [line for line in lines if line.strip()]  # Remove empty lines
+    
+    # Prepare analysis result
+    result = "### Beat & Syllable Match Analysis\n\n"
+    result += "| Line | Syllables | Target Range | Match | Stress Pattern |\n"
+    result += "| ---- | --------- | ------------ | ----- | -------------- |\n"
+    
+    # Maximum number of lines to analyze (either all lines or all templates)
+    line_count = min(len(lines), len(lyric_templates))
+    
+    # Track overall match statistics
+    total_matches = 0
+    total_range_matches = 0
+    total_stress_matches = 0
+    total_stress_percentage = 0
+    total_ideal_matches = 0
+    
+    for i in range(line_count):
+        line = lines[i]
+        template = lyric_templates[i]
+        
+        # Check match between line and template with genre awareness
+        check_result = beat_analyzer.check_syllable_stress_match(line, template, genre)
+        
+        # Get match symbols
+        if check_result["close_to_ideal"]:
+            syllable_match = "✓"  # Ideal or very close
+        elif check_result["within_range"]:
+            syllable_match = "✓*"  # Within range but not ideal
+        else:
+            syllable_match = "✗"  # Outside range
+            
+        stress_match = "✓" if check_result["stress_matches"] else f"{int(check_result['stress_match_percentage']*100)}%"
+        
+        # Update stats
+        if check_result["close_to_ideal"]:
+            total_matches += 1
+            total_ideal_matches += 1
+        elif check_result["within_range"]:
+            total_range_matches += 1
+            
+        if check_result["stress_matches"]:
+            total_stress_matches += 1
+        total_stress_percentage += check_result["stress_match_percentage"]
+        
+        # Create visual representation of the stress pattern
+        stress_visual = ""
+        for char in template['stress_pattern']:
+            if char == "S":
+                stress_visual += "X"  # Strong
+            elif char == "M":
+                stress_visual += "x"  # Medium
+            else:
+                stress_visual += "."  # Weak
+        
+        # Add line to results table
+        result += f"| {i+1} | {check_result['syllable_count']} | {check_result['min_expected']}-{check_result['max_expected']} | {syllable_match} | {stress_visual} |\n"
+    
+    # Add summary statistics
+    if line_count > 0:
+        exact_match_rate = (total_matches / line_count) * 100
+        range_match_rate = ((total_matches + total_range_matches) / line_count) * 100
+        ideal_match_rate = (total_ideal_matches / line_count) * 100
+        stress_match_rate = (total_stress_matches / line_count) * 100
+        avg_stress_percentage = (total_stress_percentage / line_count) * 100
+        
+        result += f"\n**Summary:**\n"
+        result += f"- Ideal or near-ideal syllable match rate: {exact_match_rate:.1f}%\n"
+        result += f"- Genre-appropriate syllable range match rate: {range_match_rate:.1f}%\n"
+        result += f"- Perfect stress pattern match rate: {stress_match_rate:.1f}%\n"
+        result += f"- Average stress pattern accuracy: {avg_stress_percentage:.1f}%\n"
+        result += f"- Overall rhythmic accuracy: {((range_match_rate + avg_stress_percentage) / 2):.1f}%\n"
+        
+        # Analyze sentence flow across lines
+        sentence_flow_analysis = analyze_sentence_flow(lines)
+        result += f"\n**Sentence Flow Analysis:**\n"
+        result += f"- Connected thought groups: {sentence_flow_analysis['connected_groups']} detected\n"
+        result += f"- Average lines per thought: {sentence_flow_analysis['avg_lines_per_group']:.1f}\n"
+        result += f"- Flow quality: {sentence_flow_analysis['flow_quality']}\n"
+        
+        # Add guidance on ideal distribution for syllables and sentence flow
+        result += f"\n**Syllable & Flow Guidance:**\n"
+        result += f"- Aim for {min([t.get('min_expected', 3) for t in lyric_templates])}-{max([t.get('max_expected', 7) for t in lyric_templates])} syllables per line\n"
+        result += f"- Break complete thoughts across 2-3 lines for natural flow\n"
+        result += f"- Connect your lyrics with sentence fragments that flow across lines\n"
+        result += f"- Use conjunctions, prepositions, and dependent clauses to connect lines\n"
+        
+        # Add genre-specific notes
+        result += f"\n**Genre Notes ({genre}):**\n"
+        
+        # Add appropriate genre notes based on genre
+        if genre.lower() == "pop":
+            result += "- Pop lyrics work well with thoughts spanning 2-3 musical phrases\n"
+            result += "- Create flow by connecting lines with transitions like 'as', 'when', 'through'\n"
+        elif genre.lower() == "rock":
+            result += "- Rock lyrics benefit from short phrases that build into complete thoughts\n"
+            result += "- Use line breaks strategically to emphasize key words\n"
+        elif genre.lower() == "country":
+            result += "- Country lyrics tell stories that flow naturally across multiple lines\n"
+            result += "- Connect narrative elements across phrases for authentic storytelling\n"
+        elif genre.lower() == "disco":
+            result += "- Disco lyrics work well with phrases that create rhythmic momentum\n"
+            result += "- Use line transitions that maintain energy and flow\n"
+        elif genre.lower() == "metal":
+            result += "- Metal lyrics can create intensity by breaking phrases at dramatic points\n"
+            result += "- Connect lines to build tension and release across measures\n"
+        else:
+            result += "- This genre works well with connected thoughts across multiple lines\n"
+            result += "- Aim for natural speech flow rather than complete thoughts per line\n"
     
-    # Early return if no beats detected
-    if not beat_times or len(beat_times) < 2:
+    return result
+
+def analyze_sentence_flow(lines):
+    """Analyze how well the lyrics create sentence flow across multiple lines"""
+    if not lines or len(lines) < 2:
         return {
-            "tempo": float(tempo) if tempo is not None else 120.0,
-            "beat_times": beat_times,
-            "subbeat_times": []
+            "connected_groups": 0,
+            "avg_lines_per_group": 0,
+            "flow_quality": "Insufficient lines to analyze"
         }
     
-    for i in range(len(beat_times) - 1):
-        # Get current and next beat time
-        try:
-            current_beat = float(beat_times[i])
-            next_beat = float(beat_times[i + 1])
-        except (IndexError, ValueError, TypeError):
-            continue
-        
-        # Calculate time interval between beats
-        interval = (next_beat - current_beat) / subdivision
-        
-        # Add the main beat
-        subbeat_times.append({
-            "time": float(current_beat),
-            "type": "main",
-            "strength": 1.0,
-            "beat_index": i
-        })
+    # Simplified analysis looking for grammatical clues of sentence continuation
+    continuation_starters = [
+        'and', 'but', 'or', 'nor', 'for', 'yet', 'so',  # Coordinating conjunctions
+        'as', 'when', 'while', 'before', 'after', 'since', 'until', 'because', 'although', 'though',  # Subordinating conjunctions
+        'with', 'without', 'through', 'throughout', 'beyond', 'beneath', 'under', 'over', 'into', 'onto',  # Prepositions
+        'to', 'from', 'by', 'at', 'in', 'on', 'of',  # Common prepositions
+        'where', 'how', 'who', 'whom', 'whose', 'which', 'that',  # Relative pronouns
+        'if', 'then',  # Conditional connectors
+    ]
+    
+    # Check for lines that likely continue a thought from previous line
+    connected_lines = []
+    potential_groups = []
+    current_group = [0]  # Start with first line
+    
+    for i in range(1, len(lines)):
+        # Check if line starts with a continuation word
+        words = lines[i].lower().split()
         
-        # Add subbeats
-        for j in range(1, subdivision):
-            subbeat_time = current_beat + j * interval
-            # Calculate strength based on position
-            # For 4/4 time, beat 3 is stronger than beats 2 and 4
-            if j == subdivision // 2 and subdivision == 4:
-                strength = 0.8  # Stronger subbeat (e.g., beat 3 in 4/4)
-            else:
-                strength = 0.5  # Weaker subbeat
+        # Empty line or no words
+        if not words:
+            if len(current_group) > 1:  # Only consider groups of 2+ lines
+                potential_groups.append(current_group.copy())
+            current_group = [i]
+            continue
             
-            subbeat_times.append({
-                "time": float(subbeat_time),
-                "type": "sub",
-                "strength": float(strength),
-                "beat_index": i,
-                "subbeat_index": j
-            })
-    
-    # Add the last main beat
-    if beat_times:
-        try:
-            subbeat_times.append({
-                "time": float(beat_times[-1]),
-                "type": "main",
-                "strength": 1.0,
-                "beat_index": len(beat_times) - 1
-            })
-        except (ValueError, TypeError):
-            # Skip if conversion fails
-            pass
+        # Check first word for continuation clues
+        first_word = words[0].strip(',.!?;:')
+        if first_word in continuation_starters:
+            connected_lines.append(i)
+            current_group.append(i)
+        # Check for absence of capitalization as continuation clue
+        elif not first_word[0].isupper() and first_word[0].isalpha():
+            connected_lines.append(i)
+            current_group.append(i)
+        # Check if current line is very short (likely part of a continued thought)
+        elif len(words) <= 3 and i < len(lines) - 1:
+            # Look ahead to see if next line could be a continuation
+            if i+1 < len(lines):
+                next_words = lines[i+1].lower().split()
+                if next_words and next_words[0] in continuation_starters:
+                    connected_lines.append(i)
+                    current_group.append(i)
+                else:
+                    # This might end a group
+                    if len(current_group) > 1:  # Only consider groups of 2+ lines
+                        potential_groups.append(current_group.copy())
+                    current_group = [i]
+        else:
+            # This likely starts a new thought
+            if len(current_group) > 1:  # Only consider groups of 2+ lines
+                potential_groups.append(current_group.copy())
+            current_group = [i]
+    
+    # Add the last group if it has multiple lines
+    if len(current_group) > 1:
+        potential_groups.append(current_group)
+    
+    # Calculate metrics
+    connected_groups = len(potential_groups)
+    
+    if connected_groups > 0:
+        avg_lines_per_group = sum(len(group) for group in potential_groups) / connected_groups
+        
+        # Determine flow quality
+        if connected_groups >= len(lines) / 3 and avg_lines_per_group >= 2.5:
+            flow_quality = "Excellent - multiple connected thoughts across lines"
+        elif connected_groups >= len(lines) / 4 and avg_lines_per_group >= 2:
+            flow_quality = "Good - some connected thoughts across lines"
+        elif connected_groups > 0:
+            flow_quality = "Fair - limited connection between lines"
+        else:
+            flow_quality = "Poor - mostly independent lines"
+    else:
+        avg_lines_per_group = 0
+        flow_quality = "Poor - no connected thoughts detected"
     
     return {
-        "tempo": float(tempo) if tempo is not None else 120.0,
-        "beat_times": beat_times,
-        "subbeat_times": subbeat_times
+        "connected_groups": connected_groups,
+        "avg_lines_per_group": avg_lines_per_group,
+        "flow_quality": flow_quality
     }
 
-def map_beats_to_seconds(subbeat_times, duration, fps=1.0):
-    """
-    Map beats and subbeats to second-level intervals.
-    
-    Parameters:
-        subbeat_times: List of dictionaries containing beat and subbeat information
-        duration: Total duration of the audio in seconds
-        fps: Frames per second (default: 1.0 for one-second intervals)
-        
-    Returns:
-        List of dictionaries, each containing beats within a time window
-    """
-    # Safety check for input parameters
-    if not isinstance(subbeat_times, list):
-        print("Warning: subbeat_times is not a list")
-        subbeat_times = []
-    
-    try:
-        duration = float(duration)
-    except (ValueError, TypeError):
-        print("Warning: duration is not convertible to float, defaulting to 30")
-        duration = 30.0
-    
-    # Calculate number of time windows
-    num_windows = int(duration * fps) + 1
-    
-    # Initialize time windows
-    time_windows = []
-    
-    for i in range(num_windows):
-        # Calculate window boundaries
-        start_time = i / fps
-        end_time = (i + 1) / fps
-        
-        # Find beats and subbeats within this window
-        window_beats = []
-        
-        for beat in subbeat_times:
-            # Safety check for beat object
-            if not isinstance(beat, dict):
-                continue
-                
-            # Safely access beat time
-            try:
-                beat_time = float(beat.get("time", 0))
-            except (ValueError, TypeError):
-                continue
-                
-            if start_time <= beat_time < end_time:
-                # Safely extract beat properties with defaults
-                beat_type = beat.get("type", "sub")
-                if not isinstance(beat_type, str):
-                    beat_type = "sub"
-                    
-                # Safely handle strength
-                try:
-                    strength = float(beat.get("strength", 0.5))
-                except (ValueError, TypeError):
-                    strength = 0.5
-                
-                # Add beat to this window
-                window_beats.append({
-                    "time": beat_time,
-                    "type": beat_type,
-                    "strength": strength,
-                    "relative_pos": (beat_time - start_time) / (1/fps)  # Position within window (0-1)
-                })
-        
-        # Add window to list
-        time_windows.append({
-            "second": i,
-            "start": start_time,
-            "end": end_time,
-            "beats": window_beats
-        })
-    
-    return time_windows
-
-def create_second_level_templates(sec_map, tempo, genre=None):
+def enforce_syllable_limits(lines, max_syllables=6):
     """
-    Create syllable templates for each second-level window.
-    
-    Parameters:
-        sec_map: List of second-level time windows with beat information
-        tempo: Tempo in BPM
-        genre: Optional genre for genre-specific adjustments
-        
-    Returns:
-        List of template strings, one for each second
+    Enforce syllable limits by splitting or truncating lines that are too long.
+    Returns a modified list of lines where no line exceeds max_syllables.
     """
-    # Helper function to map tempo to base syllable count
-    def tempo_to_syllable_base(tempo):
-        """Continuous function mapping tempo to syllable base count"""
-        # Sigmoid-like function that smoothly transitions between syllable counts
-        if tempo > 180:
-            return 1.0
-        elif tempo > 140: 
-            return 1.0 + (180 - tempo) * 0.02  # Gradual increase 1.0 → 1.8
-        elif tempo > 100:
-            return 1.8 + (140 - tempo) * 0.01  # Gradual increase 1.8 → 2.2
-        elif tempo > 70:
-            return 2.2 + (100 - tempo) * 0.02  # Gradual increase 2.2 → 2.8
-        else:
-            return 2.8 + max(0, (70 - tempo) * 0.04)  # Continue increasing for very slow tempos
-    
-    # Calculate base syllable count from tempo
-    base_syllables = tempo_to_syllable_base(tempo)
-    
-    # Apply genre-specific adjustments
-    genre_factor = 1.0
-    if genre:
-        genre_lower = genre.lower()
-        if any(term in genre_lower for term in ["rap", "hip hop", "hip-hop"]):
-            genre_factor = 1.4  # Much higher syllable density for rap
-        elif any(term in genre_lower for term in ["folk", "country", "ballad"]):
-            genre_factor = 0.8  # Lower density for folk styles
+    if not lines:
+        return []
     
-    # Create templates for each second
-    templates = []
+    result_lines = []
     
-    for window in sec_map:
-        beats = window["beats"]
-        
-        # If no beats in this second, create a default template
-        if not beats:
-            templates.append("w(0.5):1")
+    for line in lines:
+        words = line.split()
+        if not words:
             continue
-        
-        # Create beat patterns for this second
-        beat_patterns = []
-        
-        for beat in beats:
-            # Ensure we're dealing with a dictionary and that it has a "strength" key
-            if not isinstance(beat, dict):
-                continue  # Skip this beat if it's not a dictionary
             
-            # Safely get beat type and strength
-            if "type" not in beat or not isinstance(beat["type"], str):
-                beat_type = "w"  # Default to weak if type is missing or not a string
-            else:
-                beat_type = "S" if beat["type"] == "main" else "m" if beat.get("strength", 0) >= 0.7 else "w"
+        # Count syllables in the line
+        syllable_count = sum(beat_analyzer.count_syllables(word) for word in words)
+        
+        # If within limits, keep the line as is
+        if syllable_count <= max_syllables:
+            result_lines.append(line)
+            continue
             
-            # Safely get strength value with fallback
-            try:
-                strength = float(beat.get("strength", 0.5))
-            except (ValueError, TypeError):
-                strength = 0.5  # Default if conversion fails
+        # Line is too long - we need to split or truncate it
+        current_line = []
+        current_syllables = 0
+        
+        for word in words:
+            word_syllables = beat_analyzer.count_syllables(word)
             
-            # Adjust syllable count based on beat type and strength
-            if beat_type == "S":
-                syllable_factor = 1.2  # More syllables for strong beats
-            elif beat_type == "m":
-                syllable_factor = 1.0  # Normal for medium beats
+            # If adding this word would exceed the limit, start a new line
+            if current_syllables + word_syllables > max_syllables and current_line:
+                result_lines.append(" ".join(current_line))
+                current_line = [word]
+                current_syllables = word_syllables
             else:
-                syllable_factor = 0.8  # Fewer for weak beats
-            
-            # Calculate final syllable count
-            syllable_count = base_syllables * syllable_factor * genre_factor
-            
-            # Round to half-syllable precision
-            syllable_count = round(syllable_count * 2) / 2
-            
-            # Ensure reasonable limits
-            syllable_count = max(0.5, min(4, syllable_count))
+                # Add the word to the current line
+                current_line.append(word)
+                current_syllables += word_syllables
+        
+        # Don't forget the last line if there are words left
+        if current_line:
+            result_lines.append(" ".join(current_line))
+    
+    return result_lines
+
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Music Analysis & Lyrics Generator") as demo:
+        gr.Markdown("# Music Analysis & Lyrics Generator")
+        gr.Markdown("Upload a music file or record audio to analyze it and generate matching lyrics")
+        
+        with gr.Row():
+            with gr.Column(scale=1):
+                audio_input = gr.Audio(
+                    label="Upload or Record Audio", 
+                    type="filepath",
+                    sources=["upload", "microphone"]
+                )
+                analyze_btn = gr.Button("Analyze and Generate Lyrics", variant="primary")
             
-            # Format with embedded strength value
-            strength_pct = round(strength * 100) / 100
-            beat_patterns.append(f"{beat_type}({strength_pct}):{syllable_count}")
+            with gr.Column(scale=2):
+                with gr.Tab("Analysis"):
+                    analysis_output = gr.Textbox(label="Music Analysis Results", lines=10)
+                    
+                    with gr.Row():
+                        tempo_output = gr.Number(label="Tempo (BPM)")
+                        time_sig_output = gr.Textbox(label="Time Signature")
+                        emotion_output = gr.Textbox(label="Primary Emotion")
+                        theme_output = gr.Textbox(label="Primary Theme")
+                        genre_output = gr.Textbox(label="Primary Genre")
+                
+                with gr.Tab("Generated Lyrics"):
+                    lyrics_output = gr.Textbox(label="Generated Lyrics", lines=20)
+                
+                with gr.Tab("Beat Matching"):
+                    beat_match_output = gr.Markdown(label="Beat & Syllable Matching Analysis")
+        
+        # Set up event handlers
+        analyze_btn.click(
+            fn=process_audio,
+            inputs=[audio_input],
+            outputs=[analysis_output, lyrics_output, tempo_output, time_sig_output, 
+                    emotion_output, theme_output, genre_output, beat_match_output]
+        )
         
-        # Join patterns with dashes - ensure we have at least one pattern
-        if not beat_patterns:
-            templates.append("w(0.5):1")  # Default if no valid patterns were created
-        else:
-            second_template = "-".join(beat_patterns)
-            templates.append(second_template)
+        # Format supported genres for display
+        supported_genres_md = "\n".join([f"- {genre.capitalize()}" for genre in beat_analyzer.supported_genres])
+        
+        gr.Markdown(f"""
+        ## How it works
+        1. Upload or record a music file
+        2. The system analyzes tempo, beats, time signature and other musical features
+        3. It detects emotion, theme, and music genre
+        4. Using beat patterns and syllable stress analysis, it generates perfectly aligned lyrics
+        5. Each line of the lyrics is matched to the beat pattern of the corresponding musical phrase
+        
+        ## Supported Genres
+        **Note:** Lyrics generation is currently only supported for the following genres:
+        {supported_genres_md}
+        
+        These genres have consistent syllable-to-beat patterns that work well with our algorithm.
+        For other genres, only music analysis will be provided.
+        """)
     
-    return templates
-
-def detect_sections(y, sr):
-    """
-    Detect musical segments without classifying them by type (verse, chorus, etc.).
-    
-    Parameters:
-        y: Audio time series
-        sr: Sample rate
-    
-    Returns:
-        A list of section dictionaries with start time, end time, and duration
-    """
-    # Step 1: Extract rich feature set for comprehensive analysis
-    # ----------------------------------------------------------------------
-    hop_length = 512  # Common hop length for feature extraction
-    
-    # Spectral features
-    S = np.abs(librosa.stft(y, hop_length=hop_length))
-    contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
-    
-    # Harmonic features with CQT-based chroma (better for harmonic analysis)
-    chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
-    
-    # Timbral features
-    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=hop_length)
-    
-    # Energy features
-    rms = librosa.feature.rms(y=y, hop_length=hop_length)
-    
-    # Harmonic-percussive source separation for better rhythm analysis
-    y_harmonic, y_percussive = librosa.effects.hpss(y)
-    
-    # Step 2: Adaptive determination of segment count based on song complexity
-    # ----------------------------------------------------------------------
-    duration = librosa.get_duration(y=y, sr=sr)
-    
-    # Feature preparation for adaptive segmentation
-    # Stack features with proper normalization (addressing the scale issue)
-    feature_stack = np.vstack([
-        librosa.util.normalize(contrast),
-        librosa.util.normalize(chroma),
-        librosa.util.normalize(mfcc),
-        librosa.util.normalize(rms)
-    ])
-    
-    # Transpose to get time as first dimension
-    feature_matrix = feature_stack.T
-    
-    # Step 3: Feature fusion using dimensionality reduction
-    # ----------------------------------------------------------------------
-    from sklearn.decomposition import PCA
-    
-    # Handle very short audio files
-    n_components = min(8, feature_matrix.shape[0], feature_matrix.shape[1])
-    
-    if feature_matrix.shape[0] > n_components and feature_matrix.shape[1] > 0:
-        try:
-            pca = PCA(n_components=n_components)
-            reduced_features = pca.fit_transform(feature_matrix)
-        except Exception as e:
-            print(f"PCA failed, falling back to original features: {e}")
-            # Fallback to simpler approach if PCA fails
-            reduced_features = feature_matrix
-    else:
-        # Not enough data for PCA
-        reduced_features = feature_matrix
-    
-    # Step 4: Adaptive determination of optimal segment count
-    # ----------------------------------------------------------------------
-    
-    # Initialize range of segment counts to try
-    min_segments = max(2, int(duration / 60))  # At least 2 segments, roughly 1 per minute
-    max_segments = min(10, int(duration / 20))  # At most 10 segments, roughly 1 per 20 seconds
-    
-    # Ensure reasonable bounds
-    min_segments = max(2, min(min_segments, 4))
-    max_segments = max(min_segments + 1, min(max_segments, 8))
-    
-    # Try different segment counts and evaluate with silhouette score
-    best_segments = min_segments
-    best_score = -1
-    
-    from sklearn.metrics import silhouette_score
-    from sklearn.cluster import AgglomerativeClustering
-    
-    # Only do this analysis if we have enough data
-    if reduced_features.shape[0] > max_segments:
-        for n_segments in range(min_segments, max_segments + 1):
-            try:
-                # Perform agglomerative clustering
-                clustering = AgglomerativeClustering(n_clusters=n_segments)
-                labels = clustering.fit_predict(reduced_features)
-                
-                # Calculate silhouette score if we have enough samples
-                if len(np.unique(labels)) > 1 and len(labels) > n_segments + 1:
-                    score = silhouette_score(reduced_features, labels)
-                    
-                    if score > best_score:
-                        best_score = score
-                        best_segments = n_segments
-            except Exception as e:
-                print(f"Clustering with {n_segments} segments failed: {e}")
-                continue
-    
-    # Use the optimal segment count for final segmentation
-    n_segments = best_segments
-    
-    # Step 5: Final segmentation using the optimal segment count
-    # ----------------------------------------------------------------------
-    
-    # Method 1: Use agglomerative clustering on the reduced features
-    try:
-        clustering = AgglomerativeClustering(n_clusters=n_segments)
-        labels = clustering.fit_predict(reduced_features)
-        
-        # Convert cluster labels to boundaries by finding where labels change
-        boundaries = [0]  # Start with the beginning
-        
-        for i in range(1, len(labels)):
-            if labels[i] != labels[i-1]:
-                boundaries.append(i)
-        
-        boundaries.append(len(labels))  # Add the end
-        
-        # Convert to frames
-        bounds_frames = np.array(boundaries)
-        
-    except Exception as e:
-        print(f"Final clustering failed: {e}")
-        # Fallback to librosa's agglomerative clustering on original features
-        bounds_frames = librosa.segment.agglomerative(feature_stack, n_segments)
-    
-    # Step 6: Convert boundaries to time and create sections
-    # ----------------------------------------------------------------------
-    bounds_times = librosa.frames_to_time(bounds_frames, sr=sr, hop_length=hop_length)
-    
-    # Create sections from the boundaries
-    sections = []
-    
-    for i in range(len(bounds_times) - 1):
-        start = bounds_times[i]
-        end = bounds_times[i+1]
-        duration = end - start
-        
-        # Skip extremely short sections
-        if duration < 4 and i > 0 and i < len(bounds_times) - 2:
-            continue
-            
-        # Add section to the list (without classifying as verse/chorus/etc)
-        sections.append({
-            "type": "segment",  # Generic type instead of verse/chorus/etc
-            "start": start,
-            "end": end,
-            "duration": duration
-        })
-    
-    # Filter out any remaining extremely short sections
-    sections = [s for s in sections if s["duration"] >= 5]
-    
-    return sections
-
-def create_flexible_syllable_templates(beats_info, genre=None, phrase_mode='default'):
-    """
-    Create enhanced syllable templates based on beat patterns with improved musical intelligence.
-    
-    Parameters:
-        beats_info: Dictionary containing beat analysis data
-        genre: Optional genre to influence template creation
-        phrase_mode: 'default' uses provided phrases, 'auto' forces recalculation
-        
-    Returns:
-        String of syllable templates with embedded strength values and flexible timing
-    """
-    import numpy as np
-    from sklearn.cluster import KMeans
-    
-    # Convert any numpy values to native Python types for safety - directly handle conversions
-    # Process the dictionary to convert numpy values to Python native types
-    if isinstance(beats_info, dict):
-        processed_beats_info = {}
-        for k, v in beats_info.items():
-            if isinstance(v, np.ndarray):
-                if v.size == 1:
-                    processed_beats_info[k] = float(v.item())
-                else:
-                    processed_beats_info[k] = [float(x) if isinstance(x, np.number) else x for x in v]
-            elif isinstance(v, np.number):
-                processed_beats_info[k] = float(v)
-            elif isinstance(v, list):
-                processed_beats_info[k] = [float(x) if isinstance(x, np.number) else x for x in v]
-            else:
-                processed_beats_info[k] = v
-        beats_info = processed_beats_info
-    
-    # Extract basic beat information
-    beat_times = beats_info.get("beat_times", [])
-    beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
-    tempo = beats_info.get("tempo", 120)
-    time_signature = beats_info.get("time_signature", 4)
-    
-    # Early return for insufficient data
-    if len(beat_times) < 2:
-        return "S(1.0):1-w(0.5):1|S(1.0):1-w(0.5):1"  # Default fallback pattern
-    
-    # Step 1: Improved adaptive thresholding using k-means clustering
-    # ----------------------------------------------------------------------
-    if len(beat_strengths) >= 6:  # Need enough data points for clustering
-        # Reshape for k-means
-        X = np.array(beat_strengths).reshape(-1, 1)
-        
-        # Use k-means with 3 clusters for Strong, Medium, Weak classification
-        kmeans = KMeans(n_clusters=3, random_state=0, n_init=10).fit(X)
-        
-        # Find the centroid values and sort them
-        centroids = sorted([float(c[0]) for c in kmeans.cluster_centers_])
-        
-        # Map to thresholds (using the midpoints between centroids)
-        if len(centroids) >= 3:
-            medium_threshold = (centroids[0] + centroids[1]) / 2
-            strong_threshold = (centroids[1] + centroids[2]) / 2
-        else:
-            # Fallback if clustering doesn't work well
-            medium_threshold = np.percentile(beat_strengths, 33)
-            strong_threshold = np.percentile(beat_strengths, 66)
-    else:
-        # For limited data, use percentile-based approach
-        medium_threshold = np.percentile(beat_strengths, 33)
-        strong_threshold = np.percentile(beat_strengths, 66)
-    
-    # Step 2: Create or refine phrases based on mode
-    # ----------------------------------------------------------------------
-    phrases = beats_info.get("phrases", [])
-    
-    if phrase_mode == 'auto' or not phrases:
-        # Create phrases based on time signature and beat strengths
-        phrases = []
-        current_phrase = []
-        
-        for i in range(len(beat_times)):
-            current_phrase.append(i)
-            
-            # Check for natural phrase endings
-            if (i + 1) % time_signature == 0 or i == len(beat_times) - 1:
-                if len(current_phrase) >= 2:  # Ensure minimum phrase length
-                    phrases.append(current_phrase)
-                    current_phrase = []
-        
-        # Add any remaining beats
-        if current_phrase and len(current_phrase) >= 2:
-            phrases.append(current_phrase)
-    
-    # Step 3: Improved continuous tempo-to-syllable mapping function
-    # ----------------------------------------------------------------------
-    def tempo_to_syllable_base(tempo):
-        """Continuous function mapping tempo to syllable base count with scientific curve"""
-        # Sigmoid-like function with more scientific parameters
-        # Using logistic function: L/(1+e^(-k(x-x0))) to create smooth transitions
-        if tempo < 40:  # Very slow tempos
-            return 3.5  # Maximum syllables for extremely slow tempos
-        elif tempo > 200:  # Very fast tempos
-            return 0.8  # Minimum syllables for extremely fast tempos
-        else:
-            # Scientific logistic function for middle range (40-200 BPM)
-            L = 3.5  # Upper limit
-            k = 0.04  # Steepness of curve
-            x0 = 120  # Midpoint (inflection point at normal tempo)
-            return L / (1 + np.exp(k * (tempo - x0)))
-    
-    # Step 4: Generate enhanced templates with flexible timing
-    # ----------------------------------------------------------------------
-    syllable_templates = []
-    
-    for phrase in phrases:
-        # Skip empty phrases
-        if not phrase:
-            continue
-        
-        # Extract beat strengths for this phrase
-        phrase_strengths = [beat_strengths[i] for i in phrase if i < len(beat_strengths)]
-        if not phrase_strengths:
-            phrase_strengths = [1.0] * len(phrase)
-        
-        # Apply improved adaptive thresholding for stress pattern detection
-        stress_pattern = []
-        for i, strength in enumerate(phrase_strengths):
-            # Consider both strength and metrical position with improved weighting
-            metrical_position = i % time_signature
-            
-            # Apply improved position boosting based on musical theory
-            # In common time signatures, first beat gets strong emphasis,
-            # third beat gets moderate emphasis (in 4/4)
-            if metrical_position == 0:  # Downbeat (first beat)
-                position_boost = 0.18  # Stronger boost for downbeats
-            elif time_signature == 4 and metrical_position == 2:  # Third beat in 4/4
-                position_boost = 0.1   # Moderate boost for third beat
-            elif time_signature == 3 and metrical_position == 1:  # Second beat in 3/4
-                position_boost = 0.05  # Slight boost for second beat in 3/4
-            else:
-                position_boost = 0     # No boost for other beats
-                
-            effective_strength = strength + position_boost
-            
-            if effective_strength >= strong_threshold:
-                stress_pattern.append(("S", effective_strength))  # Strong beat with strength
-            elif effective_strength >= medium_threshold:
-                stress_pattern.append(("m", effective_strength))  # Medium beat with strength
-            else:
-                stress_pattern.append(("w", effective_strength))  # Weak beat with strength
-        
-        # Step 5: Calculate syllable counts using improved continuous function
-        # ----------------------------------------------------------------------
-        detailed_template = []
-        
-        for i, (stress_type, strength) in enumerate(stress_pattern):
-            # Get base syllable count from tempo with more nuanced mapping
-            base_syllables = tempo_to_syllable_base(tempo)
-            
-            # Adjust based on both stress type AND metrical position
-            metrical_position = i % time_signature
-            position_factor = 1.2 if metrical_position == 0 else 1.0
-            
-            # More nuanced adjustment based on stress type
-            if stress_type == "S":
-                syllable_factor = 1.2 * position_factor  # Emphasize strong beats more
-            elif stress_type == "m":
-                syllable_factor = 1.0 * position_factor  # Medium beats
-            else:
-                syllable_factor = 0.8  # Weak beats
-            
-            # Apply improved genre-specific adjustments with more granular factors
-            genre_factor = 1.0
-            if genre:
-                genre = genre.lower()
-                if "rap" in genre or "hip" in genre:
-                    genre_factor = 1.5  # Significantly higher syllable density for rap
-                elif "folk" in genre or "country" in genre or "ballad" in genre:
-                    genre_factor = 0.7  # Lower density for folk styles  
-                elif "metal" in genre or "rock" in genre:
-                    genre_factor = 1.1  # Slightly higher density for rock/metal
-                elif "jazz" in genre:
-                    genre_factor = 1.2  # Higher density for jazz (complex rhythms)
-                elif "classical" in genre:
-                    genre_factor = 0.9  # More moderate for classical
-            
-            # Calculate adjusted syllable count with scientific weighting
-            raw_count = base_syllables * syllable_factor * genre_factor
-            
-            # Use more precise rounding that preserves subtle differences
-            # Round to quarters rather than halves for more precision
-            rounded_count = round(raw_count * 4) / 4
-            
-            # Limit to reasonable range (0.5 to 4) with improved bounds
-            syllable_count = max(0.5, min(4, rounded_count))
-            
-            # Format with embedded strength value for reversibility
-            # Convert strength to 2-decimal precision percentage
-            strength_pct = round(strength * 100) / 100
-            detailed_template.append(f"{stress_type}({strength_pct}):{syllable_count}")
-        
-        # Join beat templates for this phrase
-        phrase_template = "-".join(detailed_template)
-        syllable_templates.append(phrase_template)
-    
-    # Step 6: Ensure valid output with improved defaults
-    # ----------------------------------------------------------------------
-    if not syllable_templates:
-        # Create sensible defaults based on time signature that reflect musical theory
-        if time_signature == 3:  # 3/4 time - waltz pattern
-            syllable_templates = ["S(0.95):2-w(0.4):1-w(0.35):1"]  # 3/4 default
-        elif time_signature == 2:  # 2/4 time - march pattern
-            syllable_templates = ["S(0.95):1.5-w(0.4):1"]  # 2/4 default
-        else:  # 4/4 time - common time
-            syllable_templates = ["S(0.95):2-w(0.4):1-m(0.7):1.5-w(0.35):1"]  # 4/4 default
-    
-    # Join all phrase templates with the original separator for compatibility
-    return "|".join(syllable_templates)
-
-def format_syllable_templates_for_prompt(syllable_templates, arrow="→", line_wrap=10, 
-                                         structured_output=False, beat_types=None):
-    """
-    Convert technical syllable templates into clear, human-readable instructions with
-    enhanced flexibility and customization options.
-    
-    Parameters:
-        syllable_templates: String or list of templates
-        arrow: Symbol to use between beats (default: "→")
-        line_wrap: Number of beats before automatic line wrapping (0 = no wrapping)
-        structured_output: If True, return structured data instead of text
-        beat_types: Custom mapping for beat types (default: None, uses standard mapping)
-        
-    Returns:
-        Human-readable instructions or structured data depending on parameters
-    """
-    if not syllable_templates:
-        return {} if structured_output else ""
-    
-    # Define standard beat type mapping (extensible)
-    default_beat_types = {
-        "S": {"name": "STRONG", "description": "stressed syllable"},
-        "m": {"name": "medium", "description": "medium-stressed syllable"},
-        "w": {"name": "weak", "description": "unstressed syllable"},
-        "X": {"name": "EXTRA", "description": "extra strong syllable"},
-        "L": {"name": "legato", "description": "connected/tied syllable"}
-    }
-    
-    # Use custom mapping if provided, otherwise use default
-    beat_types = beat_types or default_beat_types
-    
-    # Initialize structured output if requested
-    structured_data = {"lines": [], "explanations": []} if structured_output else None
-    
-    # Improved format detection - more robust than just checking for "|"
-    is_enhanced_format = False
-    
-    # Check if it's a string with enhanced format patterns
-    if isinstance(syllable_templates, str):
-        # Look for enhanced format patterns - check for beat type indicators
-        if any(bt + "(" in syllable_templates or bt + ":" in syllable_templates or bt + "[" in syllable_templates 
-               for bt in beat_types.keys()):
-            is_enhanced_format = True
-        # Secondary check for the "|" delimiter between phrases
-        elif "|" in syllable_templates:
-            is_enhanced_format = True
-    
-    # Initialize the output with a brief explanatory header
-    output = []
-    
-    if is_enhanced_format:
-        # Split into individual phrase templates
-        phrases = syllable_templates.split("|") if "|" in syllable_templates else [syllable_templates]
-        
-        # Process each phrase into human-readable instructions
-        for i, phrase in enumerate(phrases):
-            # Check for special annotations
-            has_swing = "(swing)" in phrase
-            if has_swing:
-                phrase = phrase.replace("(swing)", "")  # Remove annotation for processing
-            
-            beats = phrase.split("-")
-            beat_instructions = []
-            
-            # Process each beat in the phrase
-            for j, beat in enumerate(beats):
-                # Extract beat type and information
-                beat_info = {"original": beat, "type": None, "count": None, "strength": None}
-                
-                # Handle enhanced format with embedded strength values: S(0.95):2
-                if "(" in beat and ")" in beat and ":" in beat:
-                    parts = beat.split(":")
-                    beat_type = parts[0].split("(")[0]  # Extract beat type
-                    strength = parts[0].split("(")[1].rstrip(")")  # Extract strength value
-                    count = parts[1]  # Extract syllable count
-                    
-                    beat_info["type"] = beat_type
-                    beat_info["count"] = count
-                    beat_info["strength"] = strength
-                
-                # Handle simpler format: S2, m1, w1
-                elif any(beat.startswith(bt) for bt in beat_types.keys()) and len(beat) > 1:
-                    beat_type = beat[0]
-                    count = beat[1:]
-                    
-                    beat_info["type"] = beat_type
-                    beat_info["count"] = count
-                
-                # Fallback for any other format
-                else:
-                    beat_instructions.append(beat)
-                    continue
-                
-                # Format the beat instruction based on type
-                if beat_info["type"] in beat_types:
-                    type_name = beat_types[beat_info["type"]]["name"]
-                    if beat_info["strength"]:
-                        beat_instructions.append(f"{type_name}({beat_info['count']}) [{beat_info['strength']}]")
-                    else:
-                        beat_instructions.append(f"{type_name}({beat_info['count']})")
-                else:
-                    # Unknown beat type, use as-is
-                    beat_instructions.append(beat)
-            
-            # Handle line wrapping for readability
-            if line_wrap > 0 and len(beat_instructions) > line_wrap:
-                wrapped_instructions = []
-                for k in range(0, len(beat_instructions), line_wrap):
-                    section = beat_instructions[k:k+line_wrap]
-                    wrapped_instructions.append(f"{arrow} ".join(section))
-                line_desc = f"\n    {arrow} ".join(wrapped_instructions)
-            else:
-                line_desc = f" {arrow} ".join(beat_instructions)
-            
-            # Add swing notation if present
-            if has_swing:
-                line_desc += " [with swing feel]"
-            
-            # Add to output
-            line_output = f"Line {i+1}: {line_desc}"
-            output.append(line_output)
-            
-            if structured_output:
-                structured_data["lines"].append({
-                    "line_number": i+1,
-                    "beats": [{"original": beats[j], 
-                              "type": beat_info.get("type"),
-                              "count": beat_info.get("count"),
-                              "strength": beat_info.get("strength")} 
-                             for j, beat_info in enumerate([b for b in beats if isinstance(b, dict)])],
-                    "has_swing": has_swing
-                })
-        
-        # Add explanation of notation after the lines
-        explanation = [
-            "\n📝 UNDERSTANDING THE NOTATION:"
-        ]
-        
-        # Add descriptions for each beat type that was actually used
-        used_beat_types = set()
-        for phrase in phrases:
-            for beat in phrase.split("-"):
-                for bt in beat_types.keys():
-                    if beat.startswith(bt):
-                        used_beat_types.add(bt)
-        
-        for bt in used_beat_types:
-            if bt in beat_types:
-                name = beat_types[bt]["name"]
-                desc = beat_types[bt]["description"]
-                explanation.append(f"- {name}(n): Place a {desc} here, plus (n-1) unstressed syllables")
-        
-        explanation.extend([
-            f"- {arrow}: Indicates flow from one beat to the next",
-            "- [0.xx]: Beat strength value (higher = more emphasis needed)"
-        ])
-        
-        output.extend(explanation)
-        
-        if structured_output:
-            structured_data["explanations"] = explanation
-        
-        # Add examples for half-syllable values if they appear in the templates
-        has_half_syllables = any((".5" in beat) for phrase in phrases for beat in phrase.split("-"))
-        if has_half_syllables:
-            half_syllable_examples = [
-                "\n🎵 HALF-SYLLABLE EXAMPLES:",
-                "- STRONG(1.5): One stressed syllable followed by an unstressed half-syllable",
-                "  Example: \"LOVE you\" where \"LOVE\" is stressed and \"you\" is quick",
-                "- medium(2.5): One medium syllable plus one-and-a-half unstressed syllables",
-                "  Example: \"Wait for the\" where \"Wait\" is medium-stressed and \"for the\" is quick"
-            ]
-            output.extend(half_syllable_examples)
-            
-            if structured_output:
-                structured_data["half_syllable_examples"] = half_syllable_examples
-        
-        # Add swing explanation if needed
-        if any("swing" in phrase for phrase in phrases):
-            swing_guide = [
-                "\n🎶 SWING RHYTHM GUIDE:",
-                "- In swing, syllables should be unevenly timed (long-short pattern)",
-                "- Example: \"SUM-mer TIME\" in swing feels like \"SUM...mer-TIME\" with delay"
-            ]
-            output.extend(swing_guide)
-            
-            if structured_output:
-                structured_data["swing_guide"] = swing_guide
-    
-    # Handle the original format or segment dictionaries
-    else:
-        formatted_lines = []
-        
-        if isinstance(syllable_templates, list):
-            for i, template in enumerate(syllable_templates):
-                if isinstance(template, dict) and "syllable_template" in template:
-                    line = f"Line {i+1}: {template['syllable_template']} syllables"
-                    formatted_lines.append(line)
-                    
-                    if structured_output:
-                        structured_data["lines"].append({
-                            "line_number": i+1,
-                            "syllable_count": template["syllable_template"]
-                        })
-                elif isinstance(template, str):
-                    line = f"Line {i+1}: {template} syllables"
-                    formatted_lines.append(line)
-                    
-                    if structured_output:
-                        structured_data["lines"].append({
-                            "line_number": i+1,
-                            "syllable_count": template
-                        })
-            
-            output = formatted_lines
-        else:
-            output = [str(syllable_templates)]
-            
-            if structured_output:
-                structured_data["raw_content"] = str(syllable_templates)
-    
-    # Add general application advice
-    application_tips = [
-        "\n💡 APPLICATION TIPS:",
-        "1. Strong beats need naturally stressed syllables (like the START of \"RE-mem-ber\")",
-        "2. Place important words on strong beats for natural emphasis",
-        "3. Vowel sounds work best for sustained or emphasized syllables",
-        "4. Keep consonant clusters (like \"str\" or \"thr\") on weak beats"
-    ]
-    output.extend(application_tips)
-    
-    if structured_output:
-        structured_data["application_tips"] = application_tips
-        return structured_data
-    
-    return "\n".join(output)
-
-def verify_flexible_syllable_counts(lyrics, templates, second_level_templates=None):
-    """
-    Enhanced verification of syllable counts and stress patterns with precise alignment analysis
-    for both phrase-level and second-level templates.
-    """
-    import re
-    import pronouncing
-    import numpy as np
-    import functools
-    from itertools import chain
-    
-    print(f"DEBUG: In verify_flexible_syllable_counts, type of lyrics={type(lyrics)}")
-    print(f"DEBUG: Type of templates={type(templates)}")
-    
-    # Ensure lyrics is a string
-    if not isinstance(lyrics, str):
-        print(f"DEBUG: lyrics is not a string, it's {type(lyrics)}")
-        # Convert to string if possible
-        try:
-            lyrics = str(lyrics)
-        except Exception as e:
-            print(f"DEBUG: Cannot convert lyrics to string: {str(e)}")
-            return "Error: Cannot process non-string lyrics"
-    
-    # Ensure templates is a list
-    if not isinstance(templates, list):
-        print(f"DEBUG: templates is not a list, it's {type(templates)}")
-        # If it's not a list, create a single-item list
-        if templates is not None:
-            templates = [templates]
-        else:
-            templates = []
-    
-    # Split lyrics into lines
-    lines = [line.strip() for line in lyrics.split("\n") if line.strip()]
-    
-    # Initialize tracking variables
-    verification_notes = []
-    detailed_analysis = []
-    stress_misalignments = []
-    total_mismatch_count = 0
-    
-    # Process each lyric line against its template
-    for i, line in enumerate(lines):
-        if i >= len(templates):
-            break
-            
-        template = templates[i]
-        print(f"DEBUG: Processing template {i+1}, type={type(template)}")
-        
-        # Extract the template string from different possible formats
-        template_str = None
-        if isinstance(template, dict) and "syllable_template" in template:
-            template_str = template["syllable_template"]
-        elif isinstance(template, str):
-            template_str = template
-        else:
-            print(f"DEBUG: Skipping template {i+1}, not a string or dict with syllable_template")
-            continue
-        
-        if not isinstance(template_str, str):
-            print(f"DEBUG: template_str is not a string, it's {type(template_str)}")
-            continue
-            
-        # Handle multiple phrases in template - process ALL phrases, not just the first
-        template_phrases = [template_str]
-        if "|" in template_str:
-            template_phrases = template_str.split("|")
-        
-        # Check against all phrases and find the best match
-        best_match_diff = float('inf')
-        best_match_phrase = None
-        best_phrase_beats = None
-        actual_count = count_syllables(line)
-        
-        for phrase_idx, phrase in enumerate(template_phrases):
-            # Extract beat patterns and expected syllable counts from template
-            beats_info = []
-            total_expected = 0
-            
-            # Enhanced template parsing
-            if "-" in phrase:
-                beat_templates = phrase.split("-")
-                
-                # Parse each beat template
-                for beat in beat_templates:
-                    beat_info = {"original": beat, "type": None, "count": 1, "strength": None}
-                    
-                    # Handle templates with embedded strength values: S(0.95):2
-                    if "(" in beat and ")" in beat and ":" in beat:
-                        parts = beat.split(":")
-                        beat_type = parts[0].split("(")[0]
-                        try:
-                            strength = float(parts[0].split("(")[1].rstrip(")"))
-                        except ValueError:
-                            strength = 1.0
-                        
-                        # Handle potential float syllable counts
-                        try:
-                            count = float(parts[1])
-                            # Convert to int if it's a whole number
-                            if count == int(count):
-                                count = int(count)
-                        except ValueError:
-                            count = 1
-                        
-                        beat_info.update({
-                            "type": beat_type,
-                            "count": count,
-                            "strength": strength
-                        })
-                    
-                    # Handle simple format: S2, m1, w1
-                    elif any(beat.startswith(x) for x in ["S", "m", "w", "X", "L"]):
-                        beat_type = beat[0]
-                        
-                        # Extract count, supporting float values
-                        try:
-                            count_str = beat[1:]
-                            count = float(count_str)
-                            if count == int(count):
-                                count = int(count)
-                        except ValueError:
-                            count = 1
-                        
-                        beat_info.update({
-                            "type": beat_type,
-                            "count": count
-                        })
-                    
-                    # Legacy format - just numbers
-                    else:
-                        try:
-                            count = float(beat)
-                            if count == int(count):
-                                count = int(count)
-                            beat_info["count"] = count
-                        except ValueError:
-                            pass
-                    
-                    beats_info.append(beat_info)
-                    total_expected += beat_info["count"]
-                
-                # Compare this phrase to actual syllable count
-                phrase_diff = abs(actual_count - total_expected)
-                
-                # Adaptive threshold based on expected syllables
-                expected_ratio = 0.15 if total_expected > 10 else 0.25
-                phrase_threshold = max(1, round(total_expected * expected_ratio))
-                
-                # If this is the best match so far, store it
-                if phrase_diff < best_match_diff:
-                    best_match_diff = phrase_diff
-                    best_match_phrase = phrase
-                    best_phrase_beats = beats_info
-            
-            # For very simple templates without "-"
-            else:
-                try:
-                    total_expected = float(phrase)
-                    phrase_diff = abs(actual_count - total_expected)
-                    if phrase_diff < best_match_diff:
-                        best_match_diff = phrase_diff
-                        best_match_phrase = phrase
-                        best_phrase_beats = [{"count": total_expected}]
-                except ValueError:
-                    pass
-        
-        # If we found a reasonable match, proceed with analysis
-        if best_match_phrase and best_phrase_beats:
-            total_expected = sum(beat["count"] for beat in best_phrase_beats)
-            
-            # Calculate adaptive threshold based on expected syllables
-            expected_ratio = 0.15 if total_expected > 10 else 0.25
-            threshold = max(1, round(total_expected * expected_ratio))
-            
-            # Check if total syllable count is significantly off
-            if total_expected > 0 and best_match_diff > threshold:
-                verification_notes.append(f"Line {i+1}: Expected {total_expected} syllables, got {actual_count}")
-                total_mismatch_count += 1
-                
-                # Extract words and perform detailed alignment analysis
-                words = re.findall(r'\b[a-zA-Z]+\b', line.lower())
-                
-                # Get syllable count and stress for each word
-                word_analysis = []
-                cumulative_syllables = 0
-                
-                for word in words:
-                    syllable_count = count_syllables_for_word(word)
-                    
-                    # Get stress pattern
-                    stress_pattern = get_word_stress(word)
-                    
-                    word_analysis.append({
-                        "word": word,
-                        "syllables": syllable_count,
-                        "stress_pattern": stress_pattern,
-                        "position": cumulative_syllables
-                    })
-                    
-                    cumulative_syllables += syllable_count
-                
-                # Analyze alignment with beats - only if there are beat types
-                if best_phrase_beats and any(b.get("type") == "S" for b in best_phrase_beats if "type" in b):
-                    # Identify positions where strong syllables should fall
-                    strong_positions = []
-                    current_pos = 0
-                    
-                    for beat in best_phrase_beats:
-                        if beat.get("type") == "S":
-                            strong_positions.append(current_pos)
-                        current_pos += beat.get("count", 1)
-                    
-                    # Check if strong syllables align with strong beats
-                    alignment_issues = []
-                    
-                    for pos in strong_positions:
-                        # Find which word contains this position
-                        misaligned_word = None
-                        
-                        for word_info in word_analysis:
-                            word_start = word_info["position"]
-                            word_end = word_start + word_info["syllables"]
-                            
-                            if word_start <= pos < word_end:
-                                # Check if a stressed syllable falls on this position
-                                syllable_in_word = pos - word_start
-                                
-                                # Get stress pattern for this word
-                                stress = word_info["stress_pattern"]
-                                
-                                # If we have stress information and this syllable isn't stressed
-                                if stress and syllable_in_word < len(stress) and stress[syllable_in_word] != '1':
-                                    misaligned_word = word_info["word"]
-                                    alignment_issues.append(f"'{word_info['word']}' (unstressed syllable on strong beat)")
-                                    stress_misalignments.append({
-                                        "line": i+1,
-                                        "word": word_info["word"],
-                                        "position": pos,
-                                        "suggestion": get_stress_aligned_alternatives(word_info["word"], syllable_in_word)
-                                    })
-                                break
-                    
-                    if alignment_issues:
-                        verification_notes.append(f"  → Stress misalignments: {', '.join(alignment_issues)}")
-                    
-                    # Generate a visual alignment map for better understanding
-                    alignment_map = generate_alignment_visualization(line, best_phrase_beats, word_analysis)
-                    if alignment_map:
-                        detailed_analysis.append(f"Line {i+1} Alignment Analysis:\n{alignment_map}")
-        else:
-            # If no matching template was found
-            verification_notes.append(f"Line {i+1}: Unable to find matching template pattern")
-    
-    # Add second-level verification if templates are provided
-    if second_level_templates:
-        verification_notes.append("\n=== SECOND-LEVEL VERIFICATION ===\n")
-        
-        # Check each second against corresponding line
-        for i, template in enumerate(second_level_templates):
-            if i >= len(lines):
-                break
-                
-            line = lines[i]
-            
-            # Skip section headers
-            if line.startswith('[') and ']' in line:
-                continue
-                
-            actual_count = count_syllables(line)
-            
-            # Parse template to get expected syllable count
-            total_expected = 0
-            beat_patterns = []
-            
-            # Handle templates with beat patterns like "S(0.95):2-w(0.4):1"
-            if isinstance(template, str) and "-" in template:
-                for beat in template.split("-"):
-                    if ":" in beat:
-                        try:
-                            count_part = beat.split(":")[1]
-                            count = float(count_part)
-                            total_expected += count
-                            
-                            # Extract beat type for alignment check
-                            beat_type = beat.split("(")[0] if "(" in beat else beat[0]
-                            beat_patterns.append((beat_type, count))
-                        except (IndexError, ValueError):
-                            pass
-            
-            # Compare actual vs expected count
-            if total_expected > 0:
-                # Calculate adaptive threshold based on expected syllables
-                expected_ratio = 0.2  # More strict at second level
-                threshold = max(0.5, round(total_expected * expected_ratio))
-                
-                difference = abs(actual_count - total_expected)
-                
-                if difference > threshold:
-                    verification_notes.append(f"Second {i+1}: Expected {total_expected} syllables, got {actual_count}")
-                    total_mismatch_count += 1
-                    
-                    # Check for stress misalignment in this second
-                    words = re.findall(r'\b[a-zA-Z]+\b', line.lower())
-                    word_analysis = []
-                    cumulative_syllables = 0
-                    
-                    for word in words:
-                        syllable_count = count_syllables_for_word(word)
-                        stress_pattern = get_word_stress(word)
-                        
-                        word_analysis.append({
-                            "word": word,
-                            "syllables": syllable_count,
-                            "stress_pattern": stress_pattern,
-                            "position": cumulative_syllables
-                        })
-                        
-                        cumulative_syllables += syllable_count
-                    
-                    # Check if stressed syllables align with strong beats
-                    if beat_patterns:
-                        strong_positions = []
-                        current_pos = 0
-                        
-                        for beat_type, count in beat_patterns:
-                            if beat_type == "S":
-                                strong_positions.append(current_pos)
-                            current_pos += count
-                        
-                        # Look for misalignments
-                        for pos in strong_positions:
-                            for word_info in word_analysis:
-                                word_start = word_info["position"]
-                                word_end = word_start + word_info["syllables"]
-                                
-                                if word_start <= pos < word_end:
-                                    # Check if a stressed syllable falls on this position
-                                    syllable_in_word = int(pos - word_start)
-                                    stress = word_info["stress_pattern"]
-                                    
-                                    if stress and syllable_in_word < len(stress) and stress[syllable_in_word] != '1':
-                                        verification_notes.append(f"  → In second {i+1}, '{word_info['word']}' has unstressed syllable on strong beat")
-                                    break
-    
-    # Only add detailed analysis if we have rhythm mismatches
-    if verification_notes:
-        lyrics += "\n\n[Note: Potential rhythm mismatches detected in these lines:]\n"
-        lyrics += "\n".join(verification_notes)
-        
-        if detailed_analysis:
-            lyrics += "\n\n[Detailed Alignment Analysis:]\n"
-            lyrics += "\n\n".join(detailed_analysis)
-        
-        lyrics += "\n\n[How to fix rhythm mismatches:]\n"
-        lyrics += "1. Make sure stressed syllables (like 'LO' in 'LOV-er') fall on STRONG beats\n"
-        lyrics += "2. Adjust syllable counts to match the template (add/remove words or use different words)\n"
-        lyrics += "3. Try using words where natural stress aligns with musical rhythm\n"
-        
-        # Add specific word substitution suggestions if we found stress misalignments
-        if stress_misalignments:
-            lyrics += "\n[Specific word replacement suggestions:]\n"
-            for issue in stress_misalignments[:5]:  # Limit to first 5 issues
-                if issue["suggestion"]:
-                    lyrics += f"Line {issue['line']}: Consider replacing '{issue['word']}' with: {issue['suggestion']}\n"
-    
-    return lyrics
-
-def generate_alignment_visualization(line, beats_info, word_analysis):
-    """Generate a visual representation of syllable alignment with beats."""
-    if not beats_info or not word_analysis:
-        return None
-    
-    # Create a syllable breakdown with stress information
-    syllable_breakdown = []
-    syllable_stresses = []
-    
-    for word_info in word_analysis:
-        word = word_info["word"]
-        syllables = word_info["syllables"]
-        stress = word_info["stress_pattern"] or ""
-        
-        # Extend stress pattern if needed
-        while len(stress) < syllables:
-            stress += "0"
-        
-        # Get syllable breakdown
-        parts = naive_syllable_split(word, syllables)
-        
-        for i, part in enumerate(parts):
-            syllable_breakdown.append(part)
-            if i < len(stress):
-                syllable_stresses.append(stress[i])
-            else:
-                syllable_stresses.append("0")
-    
-    # Create beat pattern
-    beat_types = []
-    current_pos = 0
-    
-    for beat in beats_info:
-        beat_type = beat.get("type", "-")
-        count = beat.get("count", 1)
-        
-        # Handle whole numbers and half syllables
-        if isinstance(count, int):
-            beat_types.extend([beat_type] * count)
-        else:
-            # For half syllables, round up and use markers
-            whole_part = int(count)
-            frac_part = count - whole_part
-            
-            if whole_part > 0:
-                beat_types.extend([beat_type] * whole_part)
-            
-            if frac_part > 0:
-                beat_types.append(f"{beat_type}½")
-    
-    # Ensure we have enough beat types
-    while len(beat_types) < len(syllable_breakdown):
-        beat_types.append("-")
-    
-    # Trim beat types if too many
-    beat_types = beat_types[:len(syllable_breakdown)]
-    
-    # Generate the visualization with highlighted misalignments
-    result = []
-    
-    # First line: syllable breakdown with stress indicators
-    syllable_display = []
-    for i, syllable in enumerate(syllable_breakdown):
-        if i < len(syllable_stresses) and syllable_stresses[i] == "1":
-            syllable_display.append(syllable.upper())  # Uppercase for stressed syllables
-        else:
-            syllable_display.append(syllable.lower())  # Lowercase for unstressed
-    
-    result.append(" - ".join(syllable_display))
-    
-    # Second line: beat indicators with highlighting for misalignments
-    beat_indicators = []
-    for i, (syllable, beat_type) in enumerate(zip(syllable_stresses, beat_types)):
-        if beat_type == "S" or beat_type.startswith("S"):
-            if syllable == "1":
-                beat_indicators.append("↑")  # Aligned strong beat
-            else:
-                beat_indicators.append("❌")  # Misaligned strong beat
-        elif beat_type == "m" or beat_type.startswith("m"):
-            beat_indicators.append("•")  # Medium beat
-        elif beat_type == "w" or beat_type.startswith("w"):
-            beat_indicators.append("·")  # Weak beat
-        else:
-            beat_indicators.append(" ")
-    
-    result.append("   ".join(beat_indicators))
-    
-    # Third line: beat types
-    result.append(" - ".join(beat_types))
-    
-    return "\n".join(result)
-
-@functools.lru_cache(maxsize=256)
-def naive_syllable_split(word, syllable_count):
-    """Naively split a word into the specified number of syllables, with caching for performance."""
-    if syllable_count <= 1:
-        return [word]
-    
-    # Common syllable break patterns
-    vowels = "aeiouy"
-    consonants = "bcdfghjklmnpqrstvwxz"
-    
-    # Find potential split points
-    splits = []
-    for i in range(1, len(word) - 1):
-        if word[i] in consonants and word[i-1] in vowels:
-            splits.append(i)
-        elif word[i] in vowels and word[i-1] in consonants and word[i+1] in consonants:
-            splits.append(i+1)
-    
-    # Ensure we have enough split points
-    while len(splits) < syllable_count - 1:
-        for i in range(1, len(word)):
-            if i not in splits:
-                splits.append(i)
-                break
-    
-    # Sort and limit
-    splits.sort()
-    splits = splits[:syllable_count - 1]
-    
-    # Split the word
-    result = []
-    prev = 0
-    for pos in splits:
-        result.append(word[prev:pos])
-        prev = pos
-    
-    result.append(word[prev:])
-    return result
-
-def get_stress_aligned_alternatives(word, position_to_stress):
-    """Suggest alternative words with proper stress at the required position."""
-    # This would ideally use a more sophisticated dictionary lookup,
-    # but here's a simple implementation with common word patterns
-    syllable_count = count_syllables_for_word(word)
-    
-    # Common synonyms/replacements by syllable count with stress position
-    if syllable_count == 2:
-        if position_to_stress == 0:  # Need stress on first syllable
-            first_stress = ["love-ly", "won-der", "beau-ty", "danc-ing", "dream-ing", 
-                           "heart-beat", "sun-light", "moon-light", "star-light"]
-            return ", ".join(first_stress[:3])
-        else:  # Need stress on second syllable
-            second_stress = ["be-LIEVE", "a-BOVE", "a-ROUND", "to-DAY", "a-LIVE",
-                            "a-LONE", "be-HOLD", "re-TURN", "de-LIGHT"]
-            return ", ".join(second_stress[:3])
-    elif syllable_count == 3:
-        if position_to_stress == 0:  # First syllable stress
-            return "MEM-o-ry, WON-der-ful, BEAU-ti-ful"
-        elif position_to_stress == 1:  # Second syllable stress
-            return "a-MAZE-ing, to-GE-ther, for-EV-er"
-        else:  # Third syllable stress
-            return "un-der-STAND, o-ver-COME, ne-ver-MORE"
-    
-    # For other cases, just provide general guidance
-    return f"a word with stress on syllable {position_to_stress + 1}"
-
-def generate_lyrics(genre, duration, emotion_results, song_structure=None, lyrics_requirements=None):
-    """
-    Generate lyrics based on the genre, emotion, and structure analysis with enhanced rhythmic alignment.
-    
-    This improved version uses advanced template creation, better formatting, and verification with
-    potential refinement for lyrics that perfectly match the musical rhythm patterns.
-    
-    Parameters:
-        genre: Musical genre of the audio
-        duration: Duration of the audio in seconds
-        emotion_results: Dictionary containing emotional analysis results
-        song_structure: Optional dictionary containing song structure analysis
-        lyrics_requirements: Optional user-provided requirements for the lyrics
-        
-    Returns:
-        Generated lyrics aligned with the rhythm patterns of the music
-    """
-    # Safety check for strings
-    def is_safe_dict_access(obj, key):
-        """Safe dictionary key access with type checking"""
-        if not isinstance(obj, dict):
-            print(f"WARNING: Attempted to access key '{key}' on non-dictionary object of type {type(obj)}")
-            return False
-        return key in obj
-    
-    # Ensure emotion_results is a dictionary with the expected structure
-    if not isinstance(emotion_results, dict):
-        emotion_results = {
-            "emotion_analysis": {"primary_emotion": "Unknown"},
-            "theme_analysis": {"primary_theme": "Unknown"},
-            "rhythm_analysis": {"tempo": 0},
-            "tonal_analysis": {"key": "Unknown", "mode": ""},
-            "summary": {"tempo": 0, "key": "Unknown", "mode": "", "primary_emotion": "Unknown", "primary_theme": "Unknown"}
-        }
-    
-    # Ensure song_structure is properly structured
-    if song_structure is not None and not isinstance(song_structure, dict):
-        print(f"WARNING: song_structure is not a dict, it's {type(song_structure)}")
-        song_structure = None
-    
-    print(f"DEBUG: Starting generate_lyrics with genre={genre}, duration={duration}")
-    print(f"DEBUG: Type of song_structure={type(song_structure)}")
-    print(f"DEBUG: Type of emotion_results={type(emotion_results)}")
-    
-    # Helper function to safely access dictionary with string keys
-    def safe_dict_get(d, key, default=None):
-        """Safely get a value from a dictionary, handling non-dictionary objects."""
-        if not isinstance(d, dict):
-            print(f"WARNING: Attempted to access key '{key}' in non-dictionary object of type {type(d)}")
-            return default
-        return d.get(key, default)
-    
-    # Extract emotion and theme data with safe defaults
-    primary_emotion = safe_dict_get(safe_dict_get(emotion_results, "emotion_analysis", {}), "primary_emotion", "Unknown")
-    primary_theme = safe_dict_get(safe_dict_get(emotion_results, "theme_analysis", {}), "primary_theme", "Unknown")
-    
-    # Extract numeric values safely with fallbacks
-    try:
-        tempo = float(safe_dict_get(safe_dict_get(emotion_results, "rhythm_analysis", {}), "tempo", 0.0))
-    except (ValueError, TypeError):
-        tempo = 0.0
-        
-    key = safe_dict_get(safe_dict_get(emotion_results, "tonal_analysis", {}), "key", "Unknown")
-    mode = safe_dict_get(safe_dict_get(emotion_results, "tonal_analysis", {}), "mode", "")
-    
-    # Format syllable templates for the prompt
-    syllable_guidance = ""
-    templates_for_verification = []
-    
-    # Create a structure visualization to help with lyrics-music matching
-    structure_visualization = "=== MUSIC-LYRICS STRUCTURE MATCHING ===\n\n"
-    structure_visualization += f"Song Duration: {duration:.1f} seconds\n"
-    structure_visualization += f"Tempo: {tempo:.1f} BPM\n\n"
-    
-    # Add second-level template guidance if available
-    if song_structure and is_safe_dict_access(song_structure, "second_level") and is_safe_dict_access(song_structure.get("second_level", {}), "templates"):
-        print(f"DEBUG: Using second-level templates")
-        second_level_templates = song_structure.get("second_level", {}).get("templates", [])
-        
-        # Create second-level guidance
-        second_level_guidance = "\nSECOND-BY-SECOND RHYTHM INSTRUCTIONS:\n"
-        second_level_guidance += "Each line below corresponds to ONE SECOND of audio. Follow these rhythm patterns EXACTLY:\n\n"
-        
-        # Format each second's template
-        formatted_second_templates = []
-        for i, template in enumerate(second_level_templates):
-            if i < min(60, len(second_level_templates)):  # Limit to 60 seconds to avoid overwhelming the LLM
-                formatted_template = format_syllable_templates_for_prompt(template, arrow="→", line_wrap=0)
-                formatted_second_templates.append(f"Second {i+1}: {formatted_template}")
-        
-        second_level_guidance += "\n".join(formatted_second_templates)
-        
-        # Add critical instructions for second-level alignment
-        second_level_guidance += "\n\nCRITICAL: Create ONE LINE of lyrics for EACH SECOND, following the exact rhythm pattern."
-        second_level_guidance += "\nIf a second has no beats, use it for a breath or pause in the lyrics."
-        second_level_guidance += "\nThe first line of your lyrics MUST match Second 1, the second line matches Second 2, and so on."
-        
-        # Add to syllable guidance
-        syllable_guidance = second_level_guidance
-        
-        # Store templates for verification
-        templates_for_verification = second_level_templates
-    
-    elif song_structure:
-        print(f"DEBUG: Checking flexible structure")
-        # Try to use flexible structure if available
-        if is_safe_dict_access(song_structure, "flexible_structure"):
-            print(f"DEBUG: Using flexible structure")
-            flexible = song_structure.get("flexible_structure", {})
-            if is_safe_dict_access(flexible, "segments") and len(flexible.get("segments", [])) > 0:
-                print(f"DEBUG: Found segments in flexible structure")
-                # Get the segments
-                segments = flexible.get("segments", [])
-    
-        # Add structure visualization
-        structure_visualization += f"Total segments: {len(segments)}\n"
-        structure_visualization += "Each segment represents one musical phrase for which you should write ONE line of lyrics.\n\n"
-        
-        # Process each segment to create enhanced rhythmic templates
-        enhanced_templates = []
-        
-        for i, segment in enumerate(segments):
-            if i < 30:  # Extend limit to 30 lines to handle longer songs
-                # Get the beat information for this segment
-                segment_start = segment["start"]
-                segment_end = segment["end"]
-                
-                # Add segment info to visualization
-                structure_visualization += f"Segment {i+1}: {segment_start:.1f}s - {segment_end:.1f}s (duration: {segment_end-segment_start:.1f}s)\n"
-                
-                # Find beats within this segment
-                segment_beats = []
-                
-                # Add type checking for beat_times access
-                print(f"DEBUG: Checking beat_times in flexible structure")
-                if is_safe_dict_access(flexible, "beats") and is_safe_dict_access(flexible.get("beats", {}), "beat_times"):
-                    beat_times = flexible.get("beats", {}).get("beat_times", [])
-                    if isinstance(beat_times, list):
-                        beat_strengths = flexible.get("beats", {}).get("beat_strengths", [])
-                        
-                        for j, beat_time in enumerate(beat_times):
-                            if segment_start <= beat_time < segment_end:
-                                # Add this beat to the segment
-                                segment_beats.append(j)
-                        
-                        # Create segment-specific beat info
-                        segment_beats_info = {
-                            "beat_times": [beat_times[j] for j in segment_beats if j < len(beat_times)],
-                            "tempo": flexible.get("beats", {}).get("tempo", 120)
-                        }
-                        
-                        if beat_strengths and isinstance(beat_strengths, list):
-                            segment_beats_info["beat_strengths"] = [
-                                beat_strengths[j] for j in segment_beats 
-                                if j < len(beat_strengths)
-                            ]
-                        
-                        # Create a phrase structure for this segment
-                        segment_beats_info["phrases"] = [segment_beats]
-                        
-                        # Generate enhanced template with genre awareness and auto phrasing
-                        print(f"DEBUG: Creating flexible syllable template for segment {i+1}")
-                        enhanced_template = create_flexible_syllable_templates(
-                            segment_beats_info,
-                            genre=genre,
-                            phrase_mode='auto' if i == 0 else 'default'
-                        )
-                        enhanced_templates.append(enhanced_template)
-                        templates_for_verification.append(enhanced_template)
-                        
-                        # Add template to visualization
-                        structure_visualization += f"  Template: {enhanced_template}\n"
-                    else:
-                        print(f"DEBUG: beat_times is not a list, it's {type(beat_times)}")
-                else:
-                    print(f"DEBUG: beats or beat_times not found in flexible structure")
-                    # Skip segment if we don't have beat information
-                    continue
-            
-            # Use these templates to determine rhythm patterns, without classifying as verse/chorus
-            pattern_groups = {}
-            
-            for i, template in enumerate(enhanced_templates):
-                # Create simplified version for pattern matching
-                simple_pattern = template.replace("(", "").replace(")", "").replace(":", "")
-                
-                # Check if this pattern is similar to any we've seen
-                found_match = False
-                for group, patterns in pattern_groups.items():
-                    if any(simple_pattern == p.replace("(", "").replace(")", "").replace(":", "") for p in patterns):
-                        pattern_groups[group].append(template)
-                        found_match = True
-                        break
-                
-                if not found_match:
-                    # New pattern type
-                    group_name = f"Group_{len(pattern_groups) + 1}"
-                    pattern_groups[group_name] = [template]
-            
-            # Format templates with improved formatting for the prompt
-            syllable_guidance = "CRITICAL RHYTHM INSTRUCTIONS:\n"
-            syllable_guidance += "Each line of lyrics MUST match exactly with one musical phrase/segment.\n"
-            syllable_guidance += "Follow these rhythm patterns for each line (STRONG beats need stressed syllables):\n\n"
-            
-            # Add formatted templates without section labels
-            formatted_templates = []
-            for i, template in enumerate(enhanced_templates):
-                formatted_templates.append(format_syllable_templates_for_prompt([template], arrow="→", line_wrap=8))
-            
-            syllable_guidance += "\n".join(formatted_templates)
-            
-            # Store info for later use in traditional sections approach
-            use_sections = True
-            
-            # Use the detected section structure for traditional approach
-            if verse_lines > 0:
-                verse_lines = min(verse_lines, total_lines // 2)  # Ensure reasonable limits
-            else:
-                verse_lines = total_lines // 2
-                
-            if chorus_lines > 0:
-                chorus_lines = min(chorus_lines, total_lines // 3)
-            else:
-                chorus_lines = total_lines // 3
-                
-            if bridge_lines > 0:
-                bridge_lines = min(bridge_lines, total_lines // 6)
-            else:
-                bridge_lines = 0
-                
-    # Fallback to traditional sections if needed
-    elif song_structure and is_safe_dict_access(song_structure, "syllables") and song_structure.get("syllables"):
-        syllable_guidance = "RHYTHM PATTERN INSTRUCTIONS:\n"
-        syllable_guidance += "Follow these syllable patterns for each section. Each line should match ONE phrase:\n\n"
-        
-        # Count sections for visualization
-        section_counts = {"verse": 0, "chorus": 0, "bridge": 0, "intro": 0, "outro": 0}
-        
-        for section in song_structure.get("syllables", []):
-            if not isinstance(section, dict):
-                continue
-                
-            section_type = section.get("type", "verse")
-            section_counts[section_type] = section_counts.get(section_type, 0) + 1
-            
-            if is_safe_dict_access(section, "syllable_template"):
-                # Process to create enhanced template
-                if is_safe_dict_access(song_structure, "beats") and is_safe_dict_access(song_structure.get("beats", {}), "beat_times"):
-                    section_beats_info = {
-                        "beat_times": [beat for beat in song_structure.get("beats", {}).get("beat_times", []) 
-                                      if section.get("start", 0) <= beat < section.get("end", 0)],
-                        "tempo": song_structure.get("beats", {}).get("tempo", 120)
-                    }
-                    
-                    if is_safe_dict_access(song_structure.get("beats", {}), "beat_strengths"):
-                        section_beats_info["beat_strengths"] = [
-                            strength for i, strength in enumerate(song_structure.get("beats", {}).get("beat_strengths", []))
-                            if i < len(song_structure.get("beats", {}).get("beat_times", [])) and
-                            section.get("start", 0) <= song_structure.get("beats", {}).get("beat_times", [])[i] < section.get("end", 0)
-                        ]
-                    
-                    # Create a phrase structure for this section
-                    section_beats_info["phrases"] = [list(range(len(section_beats_info["beat_times"])))]
-                
-                # Create a phrase structure for this section
-                section_beats_info["phrases"] = [list(range(len(section_beats_info["beat_times"])))]
-                
-                # Generate enhanced template with genre awareness
-                enhanced_template = create_flexible_syllable_templates(
-                    section_beats_info,
-                    genre=genre,
-                    phrase_mode='auto' if section['type'] == 'verse' else 'default'
-                )
-                
-                syllable_guidance += f"[{section['type'].capitalize()}]:\n"
-                syllable_guidance += format_syllable_templates_for_prompt(
-                    enhanced_template,
-                    arrow="→", 
-                    line_wrap=6
-                ) + "\n\n"
-                templates_for_verification.append(section)
-            elif "syllable_count" in section:
-                syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
-        
-        # Create structure visualization
-        structure_visualization += "Using traditional section-based structure:\n"
-        for section_type, count in section_counts.items():
-            if count > 0:
-                structure_visualization += f"{section_type.capitalize()}: {count} sections\n"
-        
-        # Set traditional section counts
-        verse_lines = max(2, section_counts.get("verse", 0) * 4)
-        chorus_lines = max(2, section_counts.get("chorus", 0) * 4)
-        bridge_lines = max(0, section_counts.get("bridge", 0) * 2)
-        
-        # Use sections approach
-        use_sections = True
-    
-    # If we couldn't get specific templates, use general guidance
-    if not syllable_guidance:
-        syllable_guidance = "RHYTHM ALIGNMENT INSTRUCTIONS:\n\n"
-        syllable_guidance += "1. Align stressed syllables with strong beats (usually beats 1 and 3 in 4/4 time)\n"
-        syllable_guidance += "2. Use unstressed syllables on weak beats (usually beats 2 and 4 in 4/4 time)\n"
-        syllable_guidance += "3. Use appropriate syllable counts based on tempo:\n"
-        syllable_guidance += "   - Fast tempo (>120 BPM): 4-6 syllables per line\n"
-        syllable_guidance += "   - Medium tempo (90-120 BPM): 6-8 syllables per line\n"
-        syllable_guidance += "   - Slow tempo (<90 BPM): 8-10 syllables per line\n"
-        
-        # Create basic structure visualization
-        structure_visualization += "Using estimated structure (no detailed analysis available):\n"
-        
-        # Calculate rough section counts based on duration
-        estimated_lines = max(8, int(duration / 10))
-        structure_visualization += f"Estimated total lines: {estimated_lines}\n"
-        
-        # Set traditional section counts based on duration
-        verse_lines = estimated_lines // 2
-        chorus_lines = estimated_lines // 3
-        bridge_lines = estimated_lines // 6 if estimated_lines > 12 else 0
-        
-        # Use sections approach
-        use_sections = True
-    
-    # Add examples of syllable-beat alignment with enhanced format
-    syllable_guidance += "\nEXAMPLES OF PERFECT RHYTHM ALIGNMENT:\n"
-    syllable_guidance += "Pattern: S(0.95):1 → w(0.4):1 → m(0.7):1 → w(0.3):1\n"
-    syllable_guidance += "Lyric: 'HEAR the MU-sic PLAY'\n"
-    syllable_guidance += "        ↑     ↑    ↑    ↑\n"
-    syllable_guidance += "        S     w    m    w    <- BEAT TYPE\n\n"
-    
-    syllable_guidance += "Pattern: S(0.9):2 → w(0.3):1 → S(0.85):1 → w(0.4):2\n"
-    syllable_guidance += "Lyric: 'DANC-ing TO the RHYTHM of LOVE'\n"
-    syllable_guidance += "        ↑    ↑  ↑   ↑     ↑  ↑\n"
-    syllable_guidance += "        S    S  w   S     w  w    <- BEAT TYPE\n\n"
-    
-    syllable_guidance += "Pattern: S(0.92):1 → m(0.65):2 → S(0.88):1 → w(0.35):1\n"
-    syllable_guidance += "Lyric: 'TIME keeps FLOW-ing ON and ON'\n"
-    syllable_guidance += "        ↑     ↑    ↑   ↑  ↑   ↑\n"
-    syllable_guidance += "        S     m    m   S  w   w    <- BEAT TYPE\n\n"
-    
-    # Add genre-specific guidance based on the detected genre
-    genre_guidance = ""
-    if any(term in genre.lower() for term in ["rap", "hip-hop", "hip hop"]):
-        genre_guidance += "\nSPECIFIC GUIDANCE FOR RAP/HIP-HOP RHYTHMS:\n"
-        genre_guidance += "- Use more syllables per beat for rapid-fire sections\n"
-        genre_guidance += "- Create internal rhymes within lines, not just at line endings\n"
-        genre_guidance += "- Emphasize the first beat of each bar with strong consonants\n"
-    elif any(term in genre.lower() for term in ["electronic", "edm", "techno", "house", "dance"]):
-        genre_guidance += "\nSPECIFIC GUIDANCE FOR ELECTRONIC MUSIC RHYTHMS:\n"
-        genre_guidance += "- Use repetitive phrases that build and release tension\n"
-        genre_guidance += "- Match syllables precisely to the beat grid\n"
-        genre_guidance += "- Use short, percussive words on strong beats\n"
-    elif any(term in genre.lower() for term in ["rock", "metal", "punk", "alternative"]):
-        genre_guidance += "\nSPECIFIC GUIDANCE FOR ROCK RHYTHMS:\n"
-        genre_guidance += "- Use powerful, emotive words on downbeats\n"
-        genre_guidance += "- Create contrast between verse and chorus energy levels\n"
-        genre_guidance += "- Emphasize hooks with simple, memorable phrases\n"
-    elif any(term in genre.lower() for term in ["folk", "country", "acoustic", "ballad"]):
-        genre_guidance += "\nSPECIFIC GUIDANCE FOR FOLK/ACOUSTIC RHYTHMS:\n"
-        genre_guidance += "- Focus on storytelling with clear narrative flow\n"
-        genre_guidance += "- Use natural speech patterns that flow conversationally\n"
-        genre_guidance += "- Place important words at the start of phrases\n"
-
-    # Add genre guidance to the main guidance
-    syllable_guidance += genre_guidance
-    
-    # Store the syllable guidance for later use
-    syllable_guidance_text = syllable_guidance
-    
-    # Determine if we should use traditional sections or second-level alignment
-    use_sections = True
-    use_second_level = False
-    
-    if song_structure and "second_level" in song_structure and song_structure["second_level"]:
-        use_second_level = True
-        # If we have second-level templates, prioritize those over traditional sections
-        if isinstance(song_structure["second_level"], dict) and "templates" in song_structure["second_level"]:
-            templates = song_structure["second_level"]["templates"]
-            if isinstance(templates, list) and len(templates) > 0:
-                use_sections = False
-    elif song_structure and "flexible_structure" in song_structure and song_structure["flexible_structure"]:
-        # If we have more than 4 segments, it's likely not a traditional song structure
-        if "segments" in song_structure["flexible_structure"]:
-            segments = song_structure["flexible_structure"]["segments"]
-            if len(segments) > 4:
-                use_sections = False
-    
-    # Create enhanced prompt with better rhythm alignment instructions
-    if use_second_level:
-        # Second-level approach with per-second alignment
-        content = f"""
-You are a talented songwriter who specializes in {genre} music.
-Write original lyrics that match the rhythm of a {genre} music segment that is {duration:.1f} seconds long.
-
-IMPORTANT: DO NOT include any thinking process, explanations, or analysis before the lyrics. Start directly with the song lyrics.
-
-Music analysis has detected the following qualities:
-- Tempo: {tempo:.1f} BPM
-- Key: {key} {mode}
-- Primary emotion: {primary_emotion}
-- Primary theme: {primary_theme}
-
-{syllable_guidance}
-
-CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
-1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
-2. Natural word stress patterns must match the beat strength (strong words on strong beats)
-3. Line breaks should occur at phrase endings for natural breathing
-4. Consonant clusters should be avoided on fast notes and strong beats
-5. Open vowels (a, e, o) work better for sustained notes and syllables
-6. Pay attention to strength values in the pattern (higher values like 0.95 need stronger emphasis)
-7. For half-syllable positions (like S1.5 or m2.5), use short, quick syllables or words with weak vowels
-
-The lyrics should:
-- Perfectly capture the essence and style of {genre} music
-- Express the {primary_emotion} emotion and {primary_theme} theme
-- Be completely original
-- Maintain a consistent theme throughout
-- Match the audio segment duration of {duration:.1f} seconds
-
-Each line of lyrics must follow the corresponding segment's rhythm pattern EXACTLY.
-
-IMPORTANT: Start immediately with the lyrics. DO NOT include any thinking process, analysis, or explanation before presenting the lyrics.
-
-IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]" 
-where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
-even if there are no rhythm issues. Include the following in your analysis:
-1. Syllable counts for each line and how they match the rhythm pattern
-2. Where stressed syllables align with strong beats
-3. Any potential misalignments or improvements
-
-Your lyrics:
-"""
-
-        # Add user requirements if provided
-        if lyrics_requirements and lyrics_requirements.strip():
-            content += f"""
-USER REQUIREMENTS:
-{lyrics_requirements.strip()}
-
-The lyrics MUST incorporate these user requirements while still following the rhythm patterns.
-"""
-
-        content += """
-Each line of lyrics must follow the corresponding segment's rhythm pattern EXACTLY.
-
-IMPORTANT: Start immediately with the lyrics. DO NOT include any thinking process, analysis, or explanation before presenting the lyrics.
-
-IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]" 
-where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
-even if there are no rhythm issues. Include the following in your analysis:
-1. Syllable counts for each line and how they match the rhythm pattern
-2. Where stressed syllables align with strong beats
-3. Any potential misalignments or improvements
-
-Your lyrics:
-"""
-    elif use_sections:
-        # Traditional approach with sections
-        content = f"""
-You are a talented songwriter who specializes in {genre} music.
-Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
-
-IMPORTANT: DO NOT include any thinking process, explanations, or analysis before the lyrics. Start directly with the song lyrics.
-
-Music analysis has detected the following qualities in the music:
-- Tempo: {tempo:.1f} BPM
-- Key: {key} {mode}
-- Primary emotion: {primary_emotion}
-- Primary theme: {primary_theme}
-
-{syllable_guidance}
-
-CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
-1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
-2. Natural word stress patterns must match the beat strength (strong words on strong beats)
-3. Line breaks should occur at phrase endings for natural breathing
-4. Consonant clusters should be avoided on fast notes and strong beats
-5. Open vowels (a, e, o) work better for sustained notes and syllables
-6. Pay attention to strength values in the pattern (higher values like 0.95 need stronger emphasis)
-7. For half-syllable positions (like S1.5 or m2.5), use short, quick syllables or words with weak vowels
-
-The lyrics should:
-- Perfectly capture the essence and style of {genre} music
-- Express the {primary_emotion} emotion and {primary_theme} theme
-- Follow the structure patterns provided above
-- Be completely original
-- Match the song duration of {duration:.1f} seconds
-"""
-
-        # Add user requirements if provided
-        if lyrics_requirements and lyrics_requirements.strip():
-            content += f"""
-USER REQUIREMENTS:
-{lyrics_requirements.strip()}
-
-The lyrics MUST incorporate these user requirements while still following the rhythm patterns.
-"""
-
-        content += """
-IMPORTANT: Start immediately with the lyrics. DO NOT include any thinking process, analysis, or explanation before presenting the lyrics.
-
-IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]" 
-where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
-even if there are no rhythm issues. Include the following in your analysis:
-1. Syllable counts for each line and how they match the rhythm pattern
-2. Where stressed syllables align with strong beats
-3. Any potential misalignments or improvements
-
-Your lyrics:
-"""
-    else:
-        # Flexible approach without traditional sections
-        content = f"""
-You are a talented songwriter who specializes in {genre} music.
-Write original lyrics that match the rhythm of a {genre} music segment that is {duration:.1f} seconds long.
-
-IMPORTANT: DO NOT include any thinking process, explanations, or analysis before the lyrics. Start directly with the song lyrics.
-
-Music analysis has detected the following qualities:
-- Tempo: {tempo:.1f} BPM
-- Key: {key} {mode}
-- Primary emotion: {primary_emotion}
-- Primary theme: {primary_theme}
-
-{syllable_guidance}
-
-CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
-1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
-2. Natural word stress patterns must match the beat strength (strong words on strong beats)
-3. Line breaks should occur at phrase endings for natural breathing
-4. Consonant clusters should be avoided on fast notes and strong beats
-5. Open vowels (a, e, o) work better for sustained notes and syllables
-6. Pay attention to strength values in the pattern (higher values like 0.95 need stronger emphasis)
-7. For half-syllable positions (like S1.5 or m2.5), use short, quick syllables or words with weak vowels
-
-The lyrics should:
-- Perfectly capture the essence and style of {genre} music
-- Express the {primary_emotion} emotion and {primary_theme} theme
-- Be completely original
-- Maintain a consistent theme throughout
-- Match the audio segment duration of {duration:.1f} seconds
-"""
-
-        # Add user requirements if provided
-        if lyrics_requirements and lyrics_requirements.strip():
-            content += f"""
-USER REQUIREMENTS:
-{lyrics_requirements.strip()}
-
-The lyrics MUST incorporate these user requirements while still following the rhythm patterns.
-"""
-
-        content += """
-Include any section labels like [Verse] or [Chorus] as indicated in the rhythm patterns above.
-Each line of lyrics must follow the corresponding segment's rhythm pattern EXACTLY.
-
-IMPORTANT: Start immediately with the lyrics. DO NOT include any thinking process, analysis, or explanation before presenting the lyrics.
-
-IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]" 
-where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
-even if there are no rhythm issues. Include the following in your analysis:
-1. Syllable counts for each line and how they match the rhythm pattern
-2. Where stressed syllables align with strong beats
-3. Any potential misalignments or improvements
-
-Your lyrics:
-"""
-
-    # Format as a chat message for the LLM
-    messages = [
-        {"role": "system", "content": "You are a professional songwriter. Create lyrics that match the specified rhythm patterns exactly. Start with the lyrics immediately without any explanation or thinking. Be concise and direct."},
-        {"role": "user", "content": content}
-    ]
-    
-    # Apply standard chat template without thinking enabled
-    text = llm_tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    
-    # Generate lyrics using the LLM
-    model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
-    
-    # Configure generation parameters based on model capability
-    generation_params = {
-        "do_sample": True,
-        "temperature": 0.5,  # Lower for more consistent and direct output
-        "top_p": 0.85,  # Slightly lower for more predictable responses
-        "top_k": 50,
-        "repetition_penalty": 1.2,
-        "max_new_tokens": 2048,
-        "num_return_sequences": 1
-    }
-    
-    # Add specific stop sequences to prevent excessive explanation
-    if hasattr(llm_model.generation_config, "stopping_criteria"):
-        thinking_stops = ["Let me think", "First, I need to", "Let's analyze", "I'll approach this", "Step 1:", "To start,"]
-        for stop in thinking_stops:
-            if stop not in llm_model.generation_config.stopping_criteria:
-                llm_model.generation_config.stopping_criteria.append(stop)
-    
-    # Generate output
-    generated_ids = llm_model.generate(
-        **model_inputs,
-        **generation_params
-    )
-    
-    # Extract output tokens
-    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
-    
-    # Get the raw output and strip any thinking process
-    lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
-    
-    # Enhanced thinking process removal - handle multiple formats
-    # First check for standard thinking tags
-    if "<thinking>" in lyrics and "</thinking>" in lyrics:
-        lyrics = lyrics.split("</thinking>")[1].strip()
-    
-    # Check for alternative thinking indicators with improved detection
-    thinking_markers = [
-        "<think>", "</think>", 
-        "[thinking]", "[/thinking]", 
-        "I'll think step by step:", 
-        "First, I need to understand", 
-        "Let me think about", 
-        "Let's tackle this query",
-        "Okay, let's tackle this query",
-        "First, I need to understand the requirements",
-        "Looking at the rhythm patterns"
-    ]
-    
-    # First try to find clear section breaks
-    for marker in thinking_markers:
-        if marker in lyrics:
-            parts = lyrics.split(marker)
-            if len(parts) > 1:
-                lyrics = parts[-1].strip()  # Take the last part after any thinking marker
-    
-    # Look for long analytical sections followed by clear lyrics
-    analytical_patterns = [
-        "Let me analyze", 
-        "I need to understand", 
-        "The tempo is", 
-        "First, let's look at",
-        "Wait, maybe",
-        "Considering the emotional tone",
-        "Starting with the first line",
-        "Let me check the examples"
-    ]
-    
-    # Check if lyrics begin with any analytical patterns
-    for pattern in analytical_patterns:
-        if lyrics.startswith(pattern):
-            # Try to find where the actual lyrics start - look for common lyrics markers
-            lyrics_markers = [
-                "\n\n[Verse", 
-                "\n\n[Chorus", 
-                "\n\nVerse", 
-                "\n\nChorus",
-                "\n\n[Verse 1]",
-                "\n\n[Intro]"
-            ]
-            
-            for marker in lyrics_markers:
-                if marker in lyrics:
-                    lyrics = lyrics[lyrics.index(marker):].strip()
-                    break
-    
-    # One last effort to clean up - if the text is very long and contains obvious thinking
-    # before getting to actual lyrics, try to find a clear starting point
-    if len(lyrics.split()) > 100 and "\n\n" in lyrics:
-        paragraphs = lyrics.split("\n\n")
-        for i, paragraph in enumerate(paragraphs):
-            # Look for typical song structure indicators in a paragraph
-            if any(marker in paragraph for marker in ["[Verse", "[Chorus", "Verse 1", "Chorus:"]):
-                lyrics = "\n\n".join(paragraphs[i:])
-                break
-    
-    # Clean up any remaining thinking artifacts at the beginning
-    lines = lyrics.split('\n')
-    clean_lines = []
-    lyrics_started = False
-    
-    for line in lines:
-        # Skip initial commentary/thinking lines until we hit what looks like lyrics
-        if not lyrics_started:
-            if (line.strip().startswith('[') and ']' in line) or not any(thinking in line.lower() for thinking in ["i think", "let me", "maybe", "perhaps", "alternatively", "checking"]):
-                lyrics_started = True
-        
-        if lyrics_started:
-            clean_lines.append(line)
-    
-    # Only use the cleaning logic if we found some actual lyrics
-    if clean_lines:
-        lyrics = '\n'.join(clean_lines)
-    
-    # Special handling for second-level templates
-    second_level_verification = None
-    if song_structure and "second_level" in song_structure and song_structure["second_level"]:
-        if isinstance(song_structure["second_level"], dict) and "templates" in song_structure["second_level"]:
-            second_level_verification = song_structure["second_level"]["templates"]
-            if not isinstance(second_level_verification, list):
-                second_level_verification = None
-    
-    # Verify syllable counts with enhanced verification - pass second-level templates if available
-    if templates_for_verification:
-        # Convert any NumPy values to native types before verification - directly handle conversions
-        # Simple conversion for basic templates (non-recursive)
-        if isinstance(templates_for_verification, list):
-            safe_templates = []
-            for template in templates_for_verification:
-                if isinstance(template, dict):
-                    processed_template = {}
-                    for k, v in template.items():
-                        if isinstance(v, np.ndarray):
-                            if v.size == 1:
-                                processed_template[k] = float(v.item())
-                            else:
-                                processed_template[k] = [float(x) if isinstance(x, np.number) else x for x in v]
-                        elif isinstance(v, np.number):
-                            processed_template[k] = float(v)
-                        else:
-                            processed_template[k] = v
-                    safe_templates.append(processed_template)
-                else:
-                    safe_templates.append(template)
-        else:
-            safe_templates = templates_for_verification
-            
-        # Wrap verification in try-except to handle any potential string indices errors
-        try:
-            print(f"DEBUG: Calling verify_flexible_syllable_counts")
-            print(f"DEBUG: Type of lyrics: {type(lyrics)}")
-            print(f"DEBUG: Type of safe_templates: {type(safe_templates)}")
-            print(f"DEBUG: Type of second_level_verification: {type(second_level_verification)}")
-            
-            verified_lyrics = verify_flexible_syllable_counts(lyrics, safe_templates, second_level_verification)
-            print(f"DEBUG: Type of verified_lyrics: {type(verified_lyrics)}")
-            
-        except Exception as e:
-            print(f"ERROR in verify_flexible_syllable_counts: {str(e)}")
-            # Return the original lyrics if verification fails
-            return {
-                "lyrics": lyrics if isinstance(lyrics, str) else str(lyrics),
-                "rhythm_analysis": f"Error in rhythm analysis: {str(e)}",
-                "syllable_analysis": "Error performing syllable analysis",
-                "prompt_template": "Error generating prompt template"
-            }
-        
-        if isinstance(verified_lyrics, str) and "[Note: Potential rhythm mismatches" in verified_lyrics and "Detailed Alignment Analysis" in verified_lyrics:
-            # Extract the original lyrics (before the notes section)
-            original_lyrics = lyrics.split("[Note:")[0].strip() if isinstance(lyrics, str) else str(lyrics)
-            
-            # Extract the analysis
-            analysis = verified_lyrics.split("[Note:")[1] if "[Note:" in verified_lyrics else ""
-            
-            # If we have serious alignment issues, consider a refinement step
-            if "stress misalignments" in analysis and len(templates_for_verification) > 0:
-                # Add a refinement prompt with the specific analysis
-                refinement_prompt = f"""
-You need to fix rhythm issues in these lyrics. Here's the analysis of the problems:
-
-{analysis}
-
-Revise the lyrics to perfectly match the rhythm pattern while maintaining the theme.
-Focus on fixing the stress misalignments by placing stressed syllables on STRONG beats.
-
-Original lyrics:
-{original_lyrics}
-
-Improved lyrics with fixed rhythm:
-"""
-                # Format as a chat message for refinement
-                refinement_messages = [
-                    {"role": "user", "content": refinement_prompt}
-                ]
-                
-                # Use standard template for refinement (no thinking mode needed)
-                refinement_text = llm_tokenizer.apply_chat_template(
-                    refinement_messages,
-                    tokenize=False,
-                    add_generation_prompt=True
-                )
-                
-                try:
-                    # Generate refined lyrics with more focus on rhythm alignment
-                    refinement_inputs = llm_tokenizer([refinement_text], return_tensors="pt").to(llm_model.device)
-                    
-                    # Use stricter parameters for refinement
-                    refinement_params = {
-                        "do_sample": True,
-                        "temperature": 0.4,  # Lower temperature for more precise refinement
-                        "top_p": 0.9,
-                        "repetition_penalty": 1.3,
-                        "max_new_tokens": 1024
-                    }
-                    
-                    refined_ids = llm_model.generate(
-                        **refinement_inputs,
-                        **refinement_params
-                    )
-                    
-                    # Extract refined lyrics
-                    refined_output_ids = refined_ids[0][len(refinement_inputs.input_ids[0]):].tolist()
-                    refined_lyrics = llm_tokenizer.decode(refined_output_ids, skip_special_tokens=True).strip()
-                    
-                    # Verify the refined lyrics
-                    try:
-                        refined_verified_lyrics = verify_flexible_syllable_counts(refined_lyrics, safe_templates, second_level_verification)
-                        
-                        # Only use refined lyrics if they're better (fewer notes)
-                        if "[Note: Potential rhythm mismatches" not in refined_verified_lyrics:
-                            lyrics = refined_lyrics
-                        elif refined_verified_lyrics.count("misalignments") < verified_lyrics.count("misalignments"):
-                            lyrics = refined_verified_lyrics
-                        else:
-                            lyrics = verified_lyrics
-                    except Exception as e:
-                        print(f"Error in refined lyrics verification: {str(e)}")
-                        lyrics = verified_lyrics
-                except Exception as e:
-                    print(f"Error in lyrics refinement: {str(e)}")
-                    lyrics = verified_lyrics
-            else:
-                # Minor issues, just use the verification notes
-                lyrics = verified_lyrics
-        else:
-            # No significant issues detected
-            lyrics = verified_lyrics
-    
-    # Check if we have the [RHYTHM_ANALYSIS_SECTION] tag
-    if "[RHYTHM_ANALYSIS_SECTION]" in lyrics:
-        # Split at our custom marker
-        parts = lyrics.split("[RHYTHM_ANALYSIS_SECTION]")
-        clean_lyrics = parts[0].strip()
-        rhythm_analysis = parts[1].strip()
-        
-        # Add our standard marker for compatibility with existing code
-        lyrics = clean_lyrics + "\n\n[Note: Rhythm Analysis]\n" + rhythm_analysis
-    
-    # For backwards compatibility - if we have the old format, still handle it
-    elif "[Note: Potential rhythm mismatches" in lyrics:
-        # Keep it as is, the existing parsing code can handle this format
-        pass
-    else:
-        # No analysis found, add a minimal one
-        lyrics = lyrics + "\n\n[Note: Rhythm Analysis]\nNo rhythm issues detected. All syllables align well with the beat pattern."
-    
-    # Before returning, add syllable analysis and prompt template
-    if isinstance(lyrics, str):
-        # Extract clean lyrics and analysis
-        if "[Note: Rhythm Analysis]" in lyrics:
-            clean_lyrics = lyrics.split("[Note: Rhythm Analysis]")[0].strip()
-            rhythm_analysis = lyrics.split("[Note: Rhythm Analysis]")[1]
-        elif "[Note: Potential rhythm mismatches" in lyrics:
-            clean_lyrics = lyrics.split("[Note:")[0].strip()
-            rhythm_analysis = "[Note:" + lyrics.split("[Note:")[1]
-        else:
-            clean_lyrics = lyrics
-            rhythm_analysis = "No rhythm analysis available"
-        
-        # Create syllable analysis
-        syllable_analysis = "=== SYLLABLE ANALYSIS ===\n\n"
-        if templates_for_verification:
-            syllable_analysis += "Template Analysis:\n"
-            for i, template in enumerate(templates_for_verification):
-                if i < min(len(templates_for_verification), 30):  # Limit to 30 to avoid overwhelming output
-                    syllable_analysis += f"Line {i+1}:\n"
-                    if isinstance(template, dict):
-                        if "syllable_template" in template:
-                            syllable_analysis += f"  Template: {template['syllable_template']}\n"
-                        if "syllable_count" in template:
-                            syllable_analysis += f"  Expected syllables: {template['syllable_count']}\n"
-                    elif isinstance(template, str):
-                        syllable_analysis += f"  Template: {template}\n"
-                    syllable_analysis += "\n"
-            
-            if len(templates_for_verification) > 30:
-                syllable_analysis += f"... and {len(templates_for_verification) - 30} more lines\n\n"
-        
-        # Add second-level analysis if available
-        if second_level_verification:
-            syllable_analysis += "\nSecond-Level Template Analysis:\n"
-            for i, template in enumerate(second_level_verification):
-                if i < min(len(second_level_verification), 30):  # Limit to 30 seconds
-                    syllable_analysis += f"Second {i+1}: {template}\n"
-            
-            if len(second_level_verification) > 30:
-                syllable_analysis += f"... and {len(second_level_verification) - 30} more seconds\n"
-                
-        # Add structure visualization to syllable analysis
-        syllable_analysis += "\n" + structure_visualization
-        
-        # Create prompt template
-        prompt_template = "=== PROMPT TEMPLATE ===\n\n"
-        prompt_template += "Genre: " + genre + "\n"
-        prompt_template += f"Duration: {duration:.1f} seconds\n"
-        prompt_template += f"Tempo: {tempo:.1f} BPM\n"
-        prompt_template += f"Key: {key} {mode}\n"
-        prompt_template += f"Primary Emotion: {primary_emotion}\n"
-        prompt_template += f"Primary Theme: {primary_theme}\n\n"
-        prompt_template += "Syllable Guidance:\n" + syllable_guidance_text
-        
-        # Return all components
-        return {
-            "lyrics": clean_lyrics,
-            "rhythm_analysis": rhythm_analysis,
-            "syllable_analysis": syllable_analysis,
-            "prompt_template": prompt_template
-        }
-    
-    return {
-        "lyrics": lyrics,
-        "rhythm_analysis": "No rhythm analysis available",
-        "syllable_analysis": "No syllable analysis available",
-        "prompt_template": "No prompt template available"
-    }
-
-def process_audio(audio_file, lyrics_requirements=None):
-    """Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
-    if audio_file is None:
-        return "Please upload an audio file.", None, None
-    
-    try:
-        print("Step 1/5: Extracting audio features...")
-        # Extract audio features
-        audio_data = extract_audio_features(audio_file)
-        
-        print("Step 2/5: Verifying audio contains music...")
-        # First check if it's music
-        try:
-            is_music, ast_results = detect_music(audio_data)
-        except Exception as e:
-            print(f"Error in music detection: {str(e)}")
-            return f"Error in music detection: {str(e)}", None, ast_results
-            
-        if not is_music:
-            return "The uploaded audio does not appear to be music. Please upload a music file.", None, ast_results
-        
-        print("Step 3/5: Classifying music genre...")
-        # Classify genre
-        try:
-            top_genres = classify_genre(audio_data)
-            # Format genre results using utility function
-            genre_results = format_genre_results(top_genres)
-            if not isinstance(top_genres, list) or len(top_genres) == 0:
-                # Fallback if we don't have valid top_genres
-                top_genres = [("rock", 1.0)]
-        except Exception as e:
-            print(f"Error in genre classification: {str(e)}")
-            top_genres = [("rock", 1.0)]  # Ensure we have a default even when exception happens
-            return f"Error in genre classification: {str(e)}", None, ast_results
-        
-        # Initialize default values
-        ast_results = ast_results if ast_results else []
-        song_structure = None
-        emotion_results = {
-            "emotion_analysis": {"primary_emotion": "Unknown"},
-            "theme_analysis": {"primary_theme": "Unknown"},
-            "rhythm_analysis": {"tempo": 0},
-            "tonal_analysis": {"key": "Unknown", "mode": ""},
-            "summary": {"tempo": 0, "key": "Unknown", "mode": "", "primary_emotion": "Unknown", "primary_theme": "Unknown"}
-        }
-        
-        print("Step 4/5: Analyzing music emotions, themes, and structure...")
-        # Analyze music emotions and themes
-        try:
-            emotion_results = music_analyzer.analyze_music(audio_file)
-        except Exception as e:
-            print(f"Error in emotion analysis: {str(e)}")
-            # Continue with default emotion_results
-        
-        # Calculate detailed song structure for better lyrics alignment
-        try:
-            # Load audio data
-            y, sr = load_audio(audio_file, SAMPLE_RATE)
-            
-            # Analyze beats and phrases for music-aligned lyrics
-            beats_info = detect_beats(y, sr)
-            sections_info = detect_sections(y, sr)
-            
-            # Create structured segments for precise line-by-line matching
-            segments = []
-            
-            # Try to break audio into meaningful segments based on sections
-            # Each segment will correspond to one line of lyrics
-            if sections_info and len(sections_info) > 1:
-                min_segment_duration = 1.5  # Minimum 1.5 seconds per segment
-                
-                for section in sections_info:
-                    section_start = section["start"]
-                    section_end = section["end"]
-                    section_duration = section["duration"]
-                    
-                    # For very short sections, add as a single segment
-                    if section_duration < min_segment_duration * 1.5:
-                        segments.append({
-                            "start": section_start,
-                            "end": section_end
-                        })
-                    else:
-                        # Calculate ideal number of segments for this section
-                        # based on its duration - aiming for 2-4 second segments
-                        ideal_segment_duration = 3.0  # Target 3 seconds per segment
-                        segment_count = max(1, int(section_duration / ideal_segment_duration))
-                        
-                        # Create evenly-spaced segments within this section
-                        segment_duration = section_duration / segment_count
-                        for i in range(segment_count):
-                            segment_start = section_start + i * segment_duration
-                            segment_end = segment_start + segment_duration
-                            segments.append({
-                                "start": segment_start,
-                                "end": segment_end
-                            })
-            # If no good sections found, create segments based on beats
-            elif beats_info and len(beats_info["beat_times"]) > 4:
-                beats = beats_info["beat_times"]
-                time_signature = beats_info.get("time_signature", 4)
-                
-                # Target one segment per musical measure (typically 4 beats)
-                measure_size = time_signature
-                for i in range(0, len(beats), measure_size):
-                    if i + 1 < len(beats):  # Need at least 2 beats for a meaningful segment
-                        measure_start = beats[i]
-                        # If we have enough beats for the full measure
-                        if i + measure_size < len(beats):
-                            measure_end = beats[i + measure_size]
-                        else:
-                            # Use available beats and extrapolate for the last measure
-                            if i > 0:
-                                beat_interval = beats[i] - beats[i-1]
-                                measure_end = beats[-1] + (beat_interval * (measure_size - (len(beats) - i)))
-                            else:
-                                measure_end = audio_data["duration"]
-                        
-                        segments.append({
-                            "start": measure_start,
-                            "end": measure_end
-                        })
-            # Last resort: simple time-based segments
-            else:
-                # Create segments of approximately 3 seconds each
-                segment_duration = 3.0
-                total_segments = max(4, int(audio_data["duration"] / segment_duration))
-                segment_duration = audio_data["duration"] / total_segments
-                
-                for i in range(total_segments):
-                    segment_start = i * segment_duration
-                    segment_end = segment_start + segment_duration
-                    segments.append({
-                        "start": segment_start,
-                        "end": segment_end
-                    })
-            
-            # Create flexible structure with the segments
-            flexible_structure = {
-                "beats": beats_info,
-                "segments": segments
-            }
-            
-            # Create song structure object
-            song_structure = {
-                "beats": beats_info,
-                "sections": sections_info,
-                "flexible_structure": flexible_structure,
-                "syllables": []
-            }
-            
-            # Add syllable counts to each section
-            for section in sections_info:
-                # Create syllable templates for sections
-                section_beats_info = {
-                    "beat_times": [beat for beat in beats_info["beat_times"] 
-                                  if section["start"] <= beat < section["end"]],
-                    "tempo": beats_info.get("tempo", 120)
-                }
-                if "beat_strengths" in beats_info:
-                    section_beats_info["beat_strengths"] = [
-                        strength for i, strength in enumerate(beats_info["beat_strengths"])
-                        if i < len(beats_info["beat_times"]) and
-                        section["start"] <= beats_info["beat_times"][i] < section["end"]
-                    ]
-                
-                # Get a syllable count based on section duration and tempo
-                syllable_count = int(section["duration"] * (beats_info.get("tempo", 120) / 60) * 1.5)
-                
-                section_info = {
-                    "type": section["type"],
-                    "start": section["start"],
-                    "end": section["end"],
-                    "duration": section["duration"],
-                    "syllable_count": syllable_count,
-                    "beat_count": len(section_beats_info["beat_times"])
-                }
-                
-                # Try to create a more detailed syllable template
-                if len(section_beats_info["beat_times"]) >= 2:
-                    # Ensure top_genres is a list with at least one element
-                    if isinstance(top_genres, list) and len(top_genres) > 0 and isinstance(top_genres[0], tuple):
-                        genre_name = top_genres[0][0]
-                    else:
-                        genre_name = "unknown"  # Default genre if top_genres is invalid
-                    
-                    section_info["syllable_template"] = create_flexible_syllable_templates(
-                        section_beats_info,
-                        genre=genre_name
-                    )
-                
-                song_structure["syllables"].append(section_info)
-            
-            # Add second-level beat analysis
-            try:
-                # Get enhanced beat information with subbeats
-                subbeat_info = detect_beats_and_subbeats(y, sr, subdivision=4)
-                
-                # Map beats to second-level windows
-                sec_map = map_beats_to_seconds(
-                    subbeat_info["subbeat_times"], 
-                    audio_data["duration"]
-                )
-                
-                # Create second-level templates
-                # Ensure top_genres is a list with at least one element
-                genre_name = "unknown"
-                if isinstance(top_genres, list) and len(top_genres) > 0 and isinstance(top_genres[0], tuple):
-                    genre_name = top_genres[0][0]
-                
-                second_level_templates = create_second_level_templates(
-                    sec_map,
-                    subbeat_info["tempo"],
-                    genre_name  # Use top genre with safety check
-                )
-                
-                # Add to song structure
-                song_structure["second_level"] = {
-                    "sec_map": sec_map,
-                    "templates": second_level_templates
-                }
-                
-            except Exception as e:
-                print(f"Error in second-level beat analysis: {str(e)}")
-                # Continue without second-level data
-                
-        except Exception as e:
-            print(f"Error analyzing song structure: {str(e)}")
-            # Continue without song structure
-        
-        print("Step 5/5: Generating rhythmically aligned lyrics...")
-        # Generate lyrics based on top genre, emotion analysis, and song structure
-        try:
-            # Ensure top_genres is a list with at least one element before accessing
-            primary_genre = "unknown"
-            if isinstance(top_genres, list) and len(top_genres) > 0 and isinstance(top_genres[0], tuple):
-                primary_genre, _ = top_genres[0]
-            
-            # CRITICAL FIX: Create a sanitized version of song_structure to prevent string indices error
-            sanitized_song_structure = None
-            if song_structure:
-                sanitized_song_structure = {}
-                
-                # Safely copy beats data
-                if "beats" in song_structure and isinstance(song_structure["beats"], dict):
-                    sanitized_song_structure["beats"] = song_structure["beats"]
-                
-                # Safely copy sections data
-                if "sections" in song_structure and isinstance(song_structure["sections"], list):
-                    sanitized_song_structure["sections"] = song_structure["sections"]
-                
-                # Safely handle flexible structure
-                if "flexible_structure" in song_structure and isinstance(song_structure["flexible_structure"], dict):
-                    flex_struct = song_structure["flexible_structure"]
-                    sanitized_flex = {}
-                    
-                    # Safely handle segments
-                    if "segments" in flex_struct and isinstance(flex_struct["segments"], list):
-                        sanitized_flex["segments"] = flex_struct["segments"]
-                    
-                    # Safely handle beats
-                    if "beats" in flex_struct and isinstance(flex_struct["beats"], dict):
-                        sanitized_flex["beats"] = flex_struct["beats"]
-                    
-                    sanitized_song_structure["flexible_structure"] = sanitized_flex
-                
-                # Safely handle syllables
-                if "syllables" in song_structure and isinstance(song_structure["syllables"], list):
-                    sanitized_song_structure["syllables"] = song_structure["syllables"]
-                
-                # Safely handle second-level
-                if "second_level" in song_structure and isinstance(song_structure["second_level"], dict):
-                    second_level = song_structure["second_level"]
-                    sanitized_second = {}
-                    
-                    if "templates" in second_level and isinstance(second_level["templates"], list):
-                        sanitized_second["templates"] = second_level["templates"]
-                    
-                    if "sec_map" in second_level and isinstance(second_level["sec_map"], list):
-                        sanitized_second["sec_map"] = second_level["sec_map"]
-                    
-                    sanitized_song_structure["second_level"] = sanitized_second
-            
-            try:
-                print("Calling generate_lyrics function...")
-                # Pass lyrics_requirements to generate_lyrics function
-                lyrics_result = generate_lyrics(primary_genre, audio_data["duration"], emotion_results, 
-                                               sanitized_song_structure, lyrics_requirements)
-                print(f"Type of lyrics_result: {type(lyrics_result)}")
-                
-                # Handle both old and new return formats with robust type checking
-                if isinstance(lyrics_result, dict) and all(k in lyrics_result for k in ["lyrics"]):
-                    lyrics = lyrics_result.get("lyrics", "No lyrics generated")
-                    rhythm_analysis = lyrics_result.get("rhythm_analysis", "No rhythm analysis available")
-                    syllable_analysis = lyrics_result.get("syllable_analysis", "No syllable analysis available")
-                    prompt_template = lyrics_result.get("prompt_template", "No prompt template available")
-                else:
-                    # Convert to string regardless of the type
-                    lyrics = str(lyrics_result) if lyrics_result is not None else "No lyrics generated"
-                    rhythm_analysis = "No detailed rhythm analysis available"
-                    syllable_analysis = "No syllable analysis available"
-                    prompt_template = "No prompt template available"
-            except Exception as inner_e:
-                print(f"Inner error in lyrics generation: {str(inner_e)}")
-                # Create a simplified fallback result with just the error message
-                lyrics = f"Error generating lyrics: {str(inner_e)}"
-                rhythm_analysis = "Error in rhythm analysis"
-                syllable_analysis = "Error in syllable analysis"
-                prompt_template = "Error in prompt template generation"
-                
-        except Exception as e:
-            print(f"Outer error in lyrics generation: {str(e)}")
-            lyrics = f"Error generating lyrics: {str(e)}"
-            rhythm_analysis = "No rhythm analysis available"
-            syllable_analysis = "No syllable analysis available"
-            prompt_template = "No prompt template available"
-        # Prepare results dictionary with additional rhythm analysis
-        results = {
-            "genre_results": genre_results,
-            "lyrics": lyrics,
-            "rhythm_analysis": rhythm_analysis,
-            "syllable_analysis": syllable_analysis,
-            "prompt_template": prompt_template,
-            "ast_results": ast_results
-        }
-        
-        return results
-    
-    except Exception as e:
-        error_msg = f"Error processing audio: {str(e)}"
-        print(error_msg)
-        return error_msg, None, []
-
-def format_complete_beat_timeline(audio_file, lyrics=None):
-    """Creates a complete formatted timeline showing all beat timings and their syllable patterns without truncation"""
-    if audio_file is None:
-        return "Please upload an audio file to see beat timeline."
-    
-    try:
-        # Extract audio data
-        y, sr = load_audio(audio_file, SAMPLE_RATE)
-        
-        # Get beat information
-        beats_info = detect_beats(y, sr)
-        
-        # Helper function to convert numpy values to floats - FIXED
-        def ensure_float(value):
-            if isinstance(value, np.ndarray) or isinstance(value, np.number):
-                return float(value)
-            return value
-        
-        # Format the timeline with enhanced scientific headers
-        timeline = "=== BEAT & SYLLABLE TIMELINE ===\n\n"
-
-        tempo = ensure_float(beats_info['tempo'])
-        tempo_confidence  = ensure_float(beats_info.get('tempo_confidence', 90.0))
-        time_sig_confidence = ensure_float(beats_info.get('time_sig_confidence', 85.0))
-        beat_periodicity = ensure_float(beats_info.get('beat_periodicity', 60 / tempo))
-
-        timeline += f"Tempo: {tempo:.1f} BPM (±{tempo_confidence:.1f}%)\n"
-        timeline += f"Time Signature: {beats_info['time_signature']}/4 (Confidence: {time_sig_confidence:.1f}%)\n"
-        timeline += f"Beat Periodicity: {beat_periodicity:.3f}s\n"
-        timeline += f"Beat Entropy: {beats_info.get('beat_entropy', 'N/A')}\n"
-        timeline += f"Total Beats: {beats_info['beat_count']}\n"
-
-        # Add musicological context based on tempo classification
-        if tempo < 60:
-            tempo_class = "Largo (very slow)"
-        elif tempo < 76:
-            tempo_class = "Adagio (slow)"
-        elif tempo < 108:
-            tempo_class = "Andante (walking pace)"
-        elif tempo < 132:
-            tempo_class = "Moderato (moderate)"
-        elif tempo < 168:
-            tempo_class = "Allegro (fast)"
-        else:
-            tempo_class = "Presto (very fast)"
-
-        timeline += f"Tempo Classification: {tempo_class}\n\n"
-
-        # Create an enhanced table header with better column descriptions
-        timeline += "| Beat # | Time (s) | Beat Strength | Syllable Pattern |\n"
-        timeline += "|--------|----------|--------------|------------------|\n"
-        
-        # Add beat-by-beat information with improved classification
-        for i, (time, strength) in enumerate(zip(beats_info['beat_times'], beats_info['beat_strengths'])):
-            # Convert numpy values to Python float if needed
-            time = ensure_float(time)
-            strength = ensure_float(strength)
-            
-            # More scientific determination of beat type based on both strength and metrical position
-            metrical_position = i % beats_info['time_signature']
-            
-            if metrical_position == 0:  # Downbeat (first beat of measure)
-                beat_type = "STRONG"
-                syllable_value = 1.5
-            elif metrical_position == beats_info['time_signature'] // 2 and beats_info['time_signature'] > 2:
-                # Secondary strong beat (e.g., beat 3 in 4/4 time)
-                beat_type = "MEDIUM" if strength < 0.8 else "STRONG" 
-                syllable_value = 1.0 if strength < 0.8 else 1.5
-            else:
-                # Other beats - classified by actual strength value
-                if strength >= 0.8:
-                    beat_type = "STRONG"
-                    syllable_value = 1.5
-                elif strength >= 0.5:
-                    beat_type = "MEDIUM"
-                    syllable_value = 1.0
-                else:
-                    beat_type = "WEAK"
-                    syllable_value = 1.0
-            
-            # Determine pattern letter based on beat type for consistency
-            if beat_type == "STRONG":
-                pattern = "S"
-            elif beat_type == "MEDIUM":
-                pattern = "m"
-            else:
-                pattern = "w"
-            
-            # Add row to table with the correct beat classification
-            timeline += f"| {i+1:<6} | {time:.2f}s | {beat_type:<12} | {pattern}:{syllable_value} |\n"
-               
-            # No truncation - show all beats
-        
-        # Add a visual timeline of beats
-        timeline += "\n=== VISUAL BEAT TIMELINE ===\n\n"
-        timeline += "Each character represents 0.5 seconds. Beats are marked as:\n"
-        timeline += "S = Strong beat | m = Medium beat | w = Weak beat | · = No beat\n\n"
-        
-        # Calculate total duration and create time markers
-        if 'beat_times' in beats_info and len(beats_info['beat_times']) > 0:
-            # Get the max value safely
-            max_beat_time = max([ensure_float(t) for t in beats_info['beat_times']])
-            total_duration = max_beat_time + 2  # Add 2 seconds of padding
-        else:
-            total_duration = 30  # Default duration if no beats found
-        
-        time_markers = ""
-        for i in range(0, int(total_duration) + 1, 5):
-            time_markers += f"{i:<5}"
-        timeline += time_markers + " (seconds)\n"
-        
-        # Create a ruler for easier time tracking
-        ruler = ""
-        for i in range(0, int(total_duration) + 1):
-            if i % 5 == 0:
-                ruler += "+"
-            else:
-                ruler += "-"
-            ruler += "-" * 9  # Each second is 10 characters wide
-        timeline += ruler + "\n"
-        
-        # Create a visualization of beats with symbols
-        beat_line = ["·"] * int(total_duration * 2)  # 2 characters per second
-        
-        for i, time in enumerate(beats_info['beat_times']):
-            if i >= len(beats_info['beat_strengths']):
-                break
-                
-            # Convert to float if it's a numpy array
-            time_val = ensure_float(time)
-            
-            # Determine position in the timeline
-            pos = int(time_val * 2)  # Convert to position in the beat_line
-            if pos >= len(beat_line):
-                continue
-                
-            # Determine beat type based on strength and position
-            strength = beats_info['beat_strengths'][i]
-            # Convert to float if it's a numpy array
-            strength = ensure_float(strength)
-            
-            if i % beats_info['time_signature'] == 0:
-                beat_line[pos] = "S"  # Strong beat at start of measure
-            elif strength >= 0.8:
-                beat_line[pos] = "S"  # Strong beat
-            elif i % beats_info['time_signature'] == beats_info['time_signature'] // 2 and beats_info['time_signature'] > 3:
-                beat_line[pos] = "m"  # Medium beat (3rd beat in 4/4)
-            elif strength >= 0.5:
-                beat_line[pos] = "m"  # Medium beat
-            else:
-                beat_line[pos] = "w"  # Weak beat
-        
-        # Format and add to timeline
-        beat_visualization = ""
-        for i in range(0, len(beat_line), 10):
-            beat_visualization += "".join(beat_line[i:i+10])
-            if i + 10 < len(beat_line):
-                beat_visualization += " "  # Add space every 5 seconds
-        timeline += beat_visualization + "\n\n"
-        
-        # Add measure markers
-        timeline += "=== MEASURE MARKERS ===\n\n"
-        
-        # Create a list to track measure start times
-        measure_starts = []
-        for i, time in enumerate(beats_info['beat_times']):
-            if i % beats_info['time_signature'] == 0:  # Start of measure
-                # Convert to float if it's a numpy array
-                time_val = ensure_float(time)
-                measure_starts.append((i // beats_info['time_signature'] + 1, time_val))
-        
-        # Format measure information
-        if measure_starts:
-            timeline += "| Measure # | Start Time | Duration |\n"
-            timeline += "|-----------|------------|----------|\n"
-            
-            for i in range(len(measure_starts)):
-                measure_num, start_time = measure_starts[i]
-                
-                # Calculate end time (start of next measure or end of song)
-                if i < len(measure_starts) - 1:
-                    end_time = measure_starts[i+1][1]
-                elif 'beat_times' in beats_info and len(beats_info['beat_times']) > 0:
-                    # Get the last beat time and convert to float if needed
-                    last_beat = beats_info['beat_times'][-1]
-                    end_time = ensure_float(last_beat)
-                else:
-                    end_time = start_time + 2.0  # Default 2 seconds if no next measure
-                    
-                duration = end_time - start_time
-                
-                timeline += f"| {measure_num:<9} | {start_time:.2f}s | {duration:.2f}s |\n"
-                
-                # No truncation - show all measures
-        
-        # Add phrase information
-        if 'phrases' in beats_info and beats_info['phrases']:
-            timeline += "\n=== MUSICAL PHRASES ===\n\n"
-            for i, phrase in enumerate(beats_info['phrases']):
-                # Show all phrases, not just the first 10
-                if not phrase:
-                    continue
-                
-                # Safely check phrase indices
-                if not (len(phrase) > 0 and len(beats_info['beat_times']) > 0):
-                    continue
-                    
-                start_beat = min(phrase[0], len(beats_info['beat_times'])-1)
-                end_beat = min(phrase[-1], len(beats_info['beat_times'])-1)
-                
-                # Convert to float if needed
-                phrase_start = ensure_float(beats_info['beat_times'][start_beat])
-                phrase_end = ensure_float(beats_info['beat_times'][end_beat])
-                
-                timeline += f"Phrase {i+1}: Beats {start_beat+1}-{end_beat+1} ({phrase_start:.2f}s - {phrase_end:.2f}s)\n"
-                
-                # Create syllable template for this phrase with simplified numpy handling
-                phrase_beats = {
-                    "beat_times": [ensure_float(beats_info['beat_times'][j]) 
-                                  for j in phrase if j < len(beats_info['beat_times'])],
-                    "beat_strengths": [ensure_float(beats_info['beat_strengths'][j])
-                                      for j in phrase if j < len(beats_info['beat_strengths'])],
-                    "tempo": ensure_float(beats_info['tempo']),
-                    "time_signature": beats_info['time_signature'],
-                    "phrases": [list(range(len(phrase)))]
-                }
-                
-                template = create_flexible_syllable_templates(phrase_beats)
-                timeline += f"  Syllable Template: {template}\n"
-                
-                # Create a visual representation of this phrase
-                if phrase_start < total_duration and phrase_end < total_duration:
-                    # Create a timeline for this phrase
-                    phrase_visualization = ["·"] * int(total_duration * 2)
-                    
-                    # Mark the phrase boundaries
-                    start_pos = int(phrase_start * 2)
-                    end_pos = int(phrase_end * 2)
-                    
-                    if start_pos < len(phrase_visualization):
-                        phrase_visualization[start_pos] = "["
-                    
-                    if end_pos < len(phrase_visualization):
-                        phrase_visualization[end_pos] = "]"
-                    
-                    # Mark the beats in this phrase
-                    for j in phrase:
-                        if j < len(beats_info['beat_times']):
-                            beat_time = ensure_float(beats_info['beat_times'][j])
-                            beat_pos = int(beat_time * 2)
-                            
-                            if beat_pos < len(phrase_visualization) and beat_pos != start_pos and beat_pos != end_pos:
-                                # Determine beat type
-                                if j % beats_info['time_signature'] == 0:
-                                    phrase_visualization[beat_pos] = "S"
-                                elif j % beats_info['time_signature'] == beats_info['time_signature'] // 2:
-                                    phrase_visualization[beat_pos] = "m"
-                                else:
-                                    phrase_visualization[beat_pos] = "w"
-                    
-                    # Format and add visualization
-                    phrase_visual = ""
-                    for k in range(0, len(phrase_visualization), 10):
-                        phrase_visual += "".join(phrase_visualization[k:k+10])
-                        if k + 10 < len(phrase_visualization):
-                            phrase_visual += " "
-                            
-                    timeline += f"  Timeline: {phrase_visual}\n\n"
-        
-        # Add second-level script display
-        try:
-            # Get second-level beat information
-            subbeat_info = detect_beats_and_subbeats(y, sr, subdivision=4)
-            duration = librosa.get_duration(y=y, sr=sr)
-            
-            # Map to seconds
-            sec_map = map_beats_to_seconds(subbeat_info["subbeat_times"], duration)
-            
-            # Create templates
-            templates = create_second_level_templates(sec_map, subbeat_info["tempo"])
-            
-            # Add to timeline
-            timeline += "\n=== SECOND-LEVEL SCRIPT ===\n\n"
-            timeline += "Each line below represents ONE SECOND of audio with matching lyric content.\n"
-            timeline += "| Second | Beat Pattern | Lyric Content |\n"
-            timeline += "|--------|-------------|---------------|\n"
-            
-            # Get clean lyrics (without analysis notes)
-            clean_lyrics = lyrics
-            if isinstance(lyrics, str):
-                if "[Note: Rhythm Analysis]" in lyrics:
-                    clean_lyrics = lyrics.split("[Note: Rhythm Analysis]")[0].strip()
-                elif "[Note: Potential rhythm mismatches" in lyrics:
-                    clean_lyrics = lyrics.split("[Note:")[0].strip()
-            
-            # Get lyric lines
-            lines = clean_lyrics.strip().split('\n') if clean_lyrics else []
-            
-            for i, template in enumerate(templates):
-                # Get corresponding lyric line if available
-                lyric = lines[i] if i < len(lines) else ""
-                if lyric.startswith('[') and ']' in lyric:
-                    lyric = ""  # Skip section headers
-                
-                # Format nicely for display
-                timeline += f"| {i+1:<6} | {template:<30} | {lyric[:40]} |\n"
-            
-            # Add ASCII visualization of second-level beats
-            timeline += "\n=== SECOND-LEVEL VISUALIZATION ===\n\n"
-            timeline += "Each row represents ONE SECOND. Beat types:\n"
-            timeline += "S = Strong beat | m = Medium beat | w = Weak beat | · = No beat\n\n"
-            
-            for i, window in enumerate(sec_map):
-                beats = window["beats"]
-                
-                # Create ASCII visualization
-                beat_viz = ["·"] * 20  # 20 columns for visualization
-                
-                for beat in beats:
-                    # Calculate position in visualization
-                    pos = int(beat["relative_pos"] * 19)  # Map 0-1 to 0-19
-                    if 0 <= pos < len(beat_viz):
-                        # Set marker based on beat type
-                        if beat["type"] == "main":
-                            beat_viz[pos] = "S"
-                        elif beat["strength"] >= 0.7:
-                            beat_viz[pos] = "m"
-                        else:
-                            beat_viz[pos] = "w"
-                
-                # Get corresponding lyric
-                lyric = lines[i] if i < len(lines) else ""
-                if lyric.startswith('[') and ']' in lyric:
-                    lyric = ""
-                
-                # Format visualization line
-                viz_line = f"Second {i+1:2d}: [" + "".join(beat_viz) + "]"
-                if lyric:
-                    viz_line += f" → {lyric[:40]}"
-                
-                timeline += viz_line + "\n"
-                
-        except Exception as e:
-            timeline += f"\n[Error generating second-level analysis: {str(e)}]"
-        
-        # Add a section showing alignment if lyrics were generated
-        if lyrics and isinstance(lyrics, str):
-            timeline += "\n=== LYRICS-BEAT ALIGNMENT ===\n\n"
-            # Remove rhythm analysis notes from lyrics if present
-            if "[Note:" in lyrics:
-                clean_lyrics = lyrics.split("[Note:")[0].strip()
-            else:
-                clean_lyrics = lyrics
-                    
-            lines = clean_lyrics.strip().split('\n')
-            
-            # Show alignment for ALL lines, not just the first 10
-            for i, line in enumerate(lines):
-                if not line.strip() or line.startswith('['):
-                    continue
-                        
-                timeline += f"Line: \"{line}\"\n"
-                
-                # Count syllables
-                syllable_count = count_syllables(line)
-                timeline += f"  Syllables: {syllable_count}\n"
-                
-                # Create adaptive phrase matching - if we don't have a direct phrase match,
-                # try to find the closest matching phrase by time or measure
-                matching_phrase = None
-                if 'phrases' in beats_info and beats_info['phrases']:
-                    # First try direct index matching
-                    if i < len(beats_info['phrases']) and beats_info['phrases'][i]:
-                        matching_phrase = beats_info['phrases'][i]
-                    else:
-                        # If no direct match, try to find a phrase by musical position
-                        # Calculate which section of the song we're in
-                        if len(beats_info['phrases']) > 0:
-                            section_size = max(1, len(beats_info['phrases']) // 4)
-                            section_index = min(i // section_size, 3)  # Limit to 4 sections
-                            section_start = section_index * section_size
-                            section_end = min(section_start + section_size, len(beats_info['phrases']))
-                            
-                            # Try to find a phrase within this section
-                            candidate_phrases = [phrase for j, phrase in enumerate(beats_info['phrases']) 
-                                            if section_start <= j < section_end and phrase]
-                            
-                            if candidate_phrases:
-                                matching_phrase = candidate_phrases[min(i % section_size, len(candidate_phrases)-1)]
-                            elif beats_info['phrases']:
-                                # Fallback to cycling through available phrases
-                                phrase_index = i % len(beats_info['phrases'])
-                                if beats_info['phrases'][phrase_index]:
-                                    matching_phrase = beats_info['phrases'][phrase_index]
-                
-                # Show timing and detailed alignment if we found a matching phrase
-                if matching_phrase and len(matching_phrase) > 0 and len(beats_info['beat_times']) > 0:
-                    # Safely check if phrase has elements and indices are valid
-                    if len(matching_phrase) > 0 and len(beats_info['beat_times']) > 0:
-                        start_beat = min(matching_phrase[0], len(beats_info['beat_times'])-1)
-                        end_beat = min(matching_phrase[-1], len(beats_info['beat_times'])-1)
-                        
-                        start_time = ensure_float(beats_info['beat_times'][start_beat])
-                        end_time = ensure_float(beats_info['beat_times'][end_beat])
-                        
-                        timeline += f"  Timing: {start_time:.2f}s - {end_time:.2f}s\n"
-                        
-                        # Create an enhanced visualization of syllable alignment
-                        timeline += "  Alignment: "
-                        
-                        # Create a timeline focused on just this phrase
-                        phrase_duration = end_time - start_time
-                        syllable_viz = []
-                        
-                        # Initialize with beat markers for this phrase using improved algorithm
-                        for j, beat_idx in enumerate(matching_phrase):
-                            if beat_idx < len(beats_info['beat_times']):
-                                beat_time = ensure_float(beats_info['beat_times'][beat_idx])
-                                
-                                # Handle edge case where phrase_duration is very small
-                                if phrase_duration > 0.001:  # Avoid division by very small numbers
-                                    # Use non-linear mapping for more musical alignment
-                                    # This accounts for natural speech rhythms not being strictly linear
-                                    normalized_pos = (beat_time - start_time) / phrase_duration
-                                    # Apply slight curve to map syllable positions more naturally
-                                    curved_pos = min(1.0, normalized_pos * (1.0 + 0.1 * (normalized_pos - 0.5)))
-                                    relative_pos = int(curved_pos * syllable_count)
-                                else:
-                                    relative_pos = j  # Default to sequential if duration is too small
-                                
-                                # Ensure we have enough space
-                                while len(syllable_viz) <= relative_pos:
-                                    syllable_viz.append("·")
-                                
-                                # Determine beat type with metrical context
-                                metrical_pos = beat_idx % beats_info['time_signature']
-                                beat_strength = beats_info['beat_strengths'][beat_idx] if beat_idx < len(beats_info['beat_strengths']) else 0
-                                
-                                if metrical_pos == 0 or beat_strength >= 0.8:
-                                    syllable_viz[relative_pos] = "S"  # Strong beat
-                                elif metrical_pos == beats_info['time_signature'] // 2 or beat_strength >= 0.5:
-                                    syllable_viz[relative_pos] = "m"  # Medium beat
-                                else:
-                                    syllable_viz[relative_pos] = "w"  # Weak beat
-                        
-                        # Fill in any gaps
-                        while len(syllable_viz) < syllable_count:
-                            syllable_viz.append("·")
-                            
-                        # Trim if too long
-                        syllable_viz = syllable_viz[:syllable_count]
-                        
-                        # Add alignment visualization with word stress analysis
-                        timeline += "".join(syllable_viz) + "\n"
-                        
-                        # Add word stress analysis
-                        words = re.findall(r'\b[a-zA-Z]+\b', line.lower())
-                        if words:
-                            word_stresses = []
-                            cumulative_syllables = 0
-                            
-                            for word in words:
-                                syllable_count_word = count_syllables_for_word(word)
-                                stress_pattern = get_word_stress(word)
-                                
-                                # Ensure stress pattern is as long as syllable count
-                                while len(stress_pattern) < syllable_count_word:
-                                    stress_pattern += "0"
-                                
-                                for j in range(syllable_count_word):
-                                    stress_char = "S" if j < len(stress_pattern) and stress_pattern[j] == "1" else "_"
-                                    word_stresses.append(stress_char)
-                                
-                                cumulative_syllables += syllable_count_word
-                            
-                            # Add word stress information
-                            timeline += "  Word stress: " + "".join(word_stresses) + "\n"
-                            
-                            # Check if stressed syllables align with strong beats
-                            alignment_score = 0
-                            alignment_issues = []
-                            
-                            for j, (stress, beat) in enumerate(zip(word_stresses, syllable_viz)):
-                                if (stress == "S" and beat == "S") or (stress != "S" and beat != "S"):
-                                    alignment_score += 1
-                                elif stress == "S" and beat != "S":
-                                    alignment_issues.append(f"Syllable {j+1} has stress but weak beat")
-                                elif stress != "S" and beat == "S":
-                                    alignment_issues.append(f"Syllable {j+1} has no stress but strong beat")
-                            
-                            if word_stresses:
-                                alignment_percent = (alignment_score / len(word_stresses)) * 100
-                                timeline += f"  Stress alignment: {alignment_percent:.1f}% match\n"
-                            
-                            if alignment_issues and len(alignment_issues) <= 3:
-                                timeline += "  Issues: " + "; ".join(alignment_issues) + "\n"
-                else:
-                    timeline += "  No matching phrase found for alignment\n"
-                
-                timeline += "\n"
-          
-        return timeline
-        
-    except Exception as e:
-        print(f"Error generating complete beat timeline: {str(e)}")
-        return f"Error generating complete beat timeline: {str(e)}"
-
-def display_results(audio_file, lyrics_requirements=None):
-    """Process audio file and return formatted results for display in the UI."""
-    # Default error response
-    error_response = ("Please upload an audio file.", 
-                     "No emotion analysis available.", 
-                     "No audio classification available.", 
-                     "No lyrics generated.", 
-                     "No beat timeline available.")
-    
-    if audio_file is None:
-        return error_response
-    
-    try:
-        # Process audio and get results - pass user requirements
-        results = process_audio(audio_file, lyrics_requirements)
-        
-        # Check if we got an error message
-        if isinstance(results, str) and "Error" in results:
-            return results, *error_response[1:]
-        elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
-            return results[0], *error_response[1:]
-        
-        # Extract results
-        if isinstance(results, dict):
-            # New format
-            genre_results = results.get("genre_results", "Genre classification failed")
-            lyrics = results.get("lyrics", "Lyrics generation failed")
-            ast_results = results.get("ast_results", [])
-        else:
-            # Old tuple format
-            genre_results, lyrics, ast_results = results
-        
-        # Get clean lyrics (without analysis notes)
-        clean_lyrics = lyrics
-        if isinstance(lyrics, str):
-            if "[Note: Rhythm Analysis]" in lyrics:
-                clean_lyrics = lyrics.split("[Note: Rhythm Analysis]")[0].strip()
-            elif "[Note: Potential rhythm mismatches" in lyrics:
-                clean_lyrics = lyrics.split("[Note:")[0].strip()
-        
-        # Generate beat timeline - use the complete timeline function that shows all beats
-        beat_timeline = format_complete_beat_timeline(audio_file, clean_lyrics)
-        
-        # Format emotion analysis results
-        emotion_text = "No emotion analysis available."
-        try:
-            emotion_results = music_analyzer.analyze_music(audio_file)
-            emotion_text = (f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM\n"
-                           f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}\n"
-                           f"Primary Emotion: {emotion_results['summary']['primary_emotion']}\n"
-                           f"Primary Theme: {emotion_results['summary']['primary_theme']}")
-            
-            # Keep basic beat analysis without section information
-            y, sr = load_audio(audio_file, SAMPLE_RATE)
-            beats_info = detect_beats(y, sr)
-            
-            # Add beat analysis info
-            emotion_text += f"\n\nBeat Analysis:\n"
-            emotion_text += f"- Tempo: {beats_info.get('tempo', 0):.1f} BPM\n"
-            emotion_text += f"- Time Signature: {beats_info.get('time_signature', 4)}/4\n"
-            emotion_text += f"- Total Beats: {beats_info.get('beat_count', 0)}\n"
-            
-        except Exception as e:
-            print(f"Error in emotion analysis: {str(e)}")
-        
-        # Format audio classification results
-        ast_text = "No valid audio classification results available."
-        if ast_results and isinstance(ast_results, list):
-            ast_text = "Audio Classification Results:\n"
-            for result in ast_results[:5]:  # Show top 5 results
-                ast_text += f"{result['label']}: {result['score']*100:.2f}%\n"
-        
-        # Return all results
-        return genre_results, emotion_text, ast_text, clean_lyrics, beat_timeline
-        
-    except Exception as e:
-        error_msg = f"Error: {str(e)}"
-        print(error_msg)
-        return error_msg, *error_response[1:]
-
-# Create enhanced Gradio interface with tabs for better organization
-with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
-    gr.Markdown("# Music Genre Classifier & Lyrics Generator")
-    gr.Markdown("Upload a music file to classify its genre, analyze its emotions, and generate perfectly aligned lyrics.")
-    
-    with gr.Row():
-        with gr.Column(scale=1):
-            audio_input = gr.Audio(label="Upload Music", type="filepath")
-            
-            # Add the new lyrics requirements input
-            lyrics_requirements_input = gr.Textbox(
-                label="Lyrics Requirements (optional)",
-                placeholder="Enter specific themes, topics, words, or styles you want in the lyrics",
-                lines=3
-            )
-            
-            submit_btn = gr.Button("Analyze & Generate", variant="primary")
-            
-            # Add genre info box 
-            with gr.Accordion("About Music Genres", open=False):
-                gr.Markdown("""
-                The system recognizes various music genres including:
-                - Pop, Rock, Hip-Hop, R&B
-                - Electronic, Dance, Techno, House
-                - Jazz, Blues, Classical
-                - Folk, Country, Acoustic
-                - Metal, Punk, Alternative
-                - And many others!
-                
-                For best results, use high-quality audio files (MP3, WAV, FLAC) with at least 10 seconds of music.
-                """)
-        
-        with gr.Column(scale=2):
-            # Use tabs for better organization of outputs
-            with gr.Tabs():
-                with gr.TabItem("Analysis Results"):
-                    genre_output = gr.Textbox(label="Detected Genres", lines=4)
-                    
-                    # Create 2 columns for emotion and audio classification
-                    with gr.Row():
-                        with gr.Column():
-                            emotion_output = gr.Textbox(label="Emotion & Structure Analysis", lines=8)
-                        with gr.Column():
-                            ast_output = gr.Textbox(label="Audio Classification", lines=8)
-                
-                with gr.TabItem("Generated Lyrics"):
-                    lyrics_output = gr.Textbox(label="Lyrics", lines=18)
-                
-                with gr.TabItem("Beat & Syllable Timeline"):
-                    beat_timeline_output = gr.Textbox(label="Beat Timings & Syllable Patterns", lines=40)
-    
-    # Connect the button to the display function with updated inputs
-    submit_btn.click(
-        fn=display_results,
-        inputs=[audio_input, lyrics_requirements_input],
-        outputs=[genre_output, emotion_output, ast_output, lyrics_output, beat_timeline_output]
-    )
-    
-    # Enhanced explanation of how the system works
-    with gr.Accordion("How it works", open=False):
-        gr.Markdown("""
-        ## Advanced Lyrics Generation Process
-        
-        1. **Audio Analysis**: The system analyzes your uploaded music file using multiple machine learning models.
-        
-        2. **Genre Classification**: A specialized neural network identifies the musical genre, detecting subtle patterns in the audio.
-        
-        3. **Emotional Analysis**: The system examines harmonic, rhythmic, and timbral features to determine the emotional qualities of the music.
-        
-        4. **Rhythm Mapping**: Advanced beat detection algorithms create a detailed rhythmic map of the music, identifying:
-           - Strong and weak beats
-           - Natural phrase boundaries
-           - Time signature and tempo variations
-           - Beat subdivisions (half and quarter beats)
-        
-        5. **Second-Level Alignment**: The system maps beats and subbeats to each second of audio, creating precise templates for perfect alignment.
-        
-        6. **Syllable Template Creation**: For each second of audio, the system generates precise syllable templates that reflect:
-           - Beat stress patterns (strong, medium, weak)
-           - Appropriate syllable counts based on tempo
-           - Genre-specific rhythmic qualities
-           - Half-beat and quarter-beat subdivisions
-        
-        7. **Lyrics Generation**: Using the detected genre, emotion, rhythm patterns, and your custom requirements, a large language model generates lyrics that:
-           - Match the emotional quality of the music
-           - Follow the precise syllable templates for each second
-           - Align stressed syllables with strong beats
-           - Maintain genre-appropriate style and themes
-           - Incorporate your specific requirements and preferences
-        
-        8. **Rhythm Verification**: The system verifies the generated lyrics, analyzing:
-           - Syllable count accuracy
-           - Stress alignment with strong beats
-           - Word stress patterns
-           - Second-by-second alignment precision
-           
-        9. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
-        
-        This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
-        """)
+    return demo
 
 # Launch the app
-demo.launch()
\ No newline at end of file
+demo = create_interface()
+
+if __name__ == "__main__":
+    demo.launch()
+else:
+    # For Hugging Face Spaces
+    app = demo
\ No newline at end of file