diff --git "a/lastapp.py" "b/lastapp.py"
new file mode 100644--- /dev/null
+++ "b/lastapp.py"
@@ -0,0 +1,2729 @@
+import os
+import io
+import gradio as gr
+import torch
+import numpy as np
+import re
+import pronouncing  # Add this to requirements.txt for syllable counting
+import functools  # Add this for lru_cache functionality
+from transformers import (
+    AutoModelForAudioClassification,
+    AutoFeatureExtractor,
+    AutoTokenizer,
+    pipeline,
+    AutoModelForCausalLM,
+    BitsAndBytesConfig
+)
+from huggingface_hub import login
+from utils import (
+    load_audio,
+    extract_audio_duration,
+    extract_mfcc_features,
+    calculate_lyrics_length,
+    format_genre_results,
+    ensure_cuda_availability,
+    preprocess_audio_for_model
+)
+from emotionanalysis import MusicAnalyzer
+import librosa
+
+# Login to Hugging Face Hub if token is provided
+if "HF_TOKEN" in os.environ:
+    login(token=os.environ["HF_TOKEN"])
+
+# Constants
+GENRE_MODEL_NAME = "dima806/music_genres_classification"
+MUSIC_DETECTION_MODEL = "MIT/ast-finetuned-audioset-10-10-0.4593"
+LLM_MODEL_NAME = "Qwen/Qwen3-14B"
+SAMPLE_RATE = 22050  # Standard sample rate for audio processing
+
+# Check CUDA availability (for informational purposes)
+CUDA_AVAILABLE = ensure_cuda_availability()
+
+# Create music detection pipeline
+print(f"Loading music detection model: {MUSIC_DETECTION_MODEL}")
+try:
+    music_detector = pipeline(
+        "audio-classification",
+        model=MUSIC_DETECTION_MODEL,
+        device=0 if CUDA_AVAILABLE else -1
+    )
+    print("Successfully loaded music detection pipeline")
+except Exception as e:
+    print(f"Error creating music detection pipeline: {str(e)}")
+    # Fallback to manual loading
+    try:
+        music_processor = AutoFeatureExtractor.from_pretrained(MUSIC_DETECTION_MODEL)
+        music_model = AutoModelForAudioClassification.from_pretrained(MUSIC_DETECTION_MODEL)
+        print("Successfully loaded music detection model and feature extractor")
+    except Exception as e2:
+        print(f"Error loading music detection model components: {str(e2)}")
+        raise RuntimeError(f"Could not load music detection model: {str(e2)}")
+
+# Create genre classification pipeline
+print(f"Loading audio classification model: {GENRE_MODEL_NAME}")
+try:
+    genre_classifier = pipeline(
+        "audio-classification",
+        model=GENRE_MODEL_NAME,
+        device=0 if CUDA_AVAILABLE else -1
+    )
+    print("Successfully loaded audio classification pipeline")
+except Exception as e:
+    print(f"Error creating pipeline: {str(e)}")
+    # Fallback to manual loading
+    try:
+        genre_processor = AutoFeatureExtractor.from_pretrained(GENRE_MODEL_NAME)
+        genre_model = AutoModelForAudioClassification.from_pretrained(GENRE_MODEL_NAME)
+        print("Successfully loaded audio classification model and feature extractor")
+    except Exception as e2:
+        print(f"Error loading model components: {str(e2)}")
+        raise RuntimeError(f"Could not load genre classification model: {str(e2)}")
+
+# Load LLM with appropriate quantization for T4 GPU
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+
+llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
+llm_model = AutoModelForCausalLM.from_pretrained(
+    LLM_MODEL_NAME,
+    device_map="auto",
+    quantization_config=bnb_config,
+    torch_dtype=torch.float16,
+)
+
+# Create LLM pipeline
+llm_pipeline = pipeline(
+    "text-generation",
+    model=llm_model,
+    tokenizer=llm_tokenizer,
+    max_new_tokens=512,
+)
+
+# Initialize music emotion analyzer
+music_analyzer = MusicAnalyzer()
+
+# New function: Count syllables in text
+def count_syllables(text):
+    """Count syllables in a given text using the pronouncing library."""
+    words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
+    syllable_count = 0
+    
+    for word in words:
+        # Get pronunciations for the word
+        pronunciations = pronouncing.phones_for_word(word)
+        if pronunciations:
+            # Count syllables in the first pronunciation
+            syllable_count += pronouncing.syllable_count(pronunciations[0])
+        else:
+            # Fallback: estimate syllables based on vowel groups
+            vowels = "aeiouy"
+            count = 0
+            prev_is_vowel = False
+            
+            for char in word:
+                is_vowel = char.lower() in vowels
+                if is_vowel and not prev_is_vowel:
+                    count += 1
+                prev_is_vowel = is_vowel
+                
+            if word.endswith('e'):
+                count -= 1
+            if word.endswith('le') and len(word) > 2 and word[-3] not in vowels:
+                count += 1
+            if count == 0:
+                count = 1
+                
+            syllable_count += count
+    
+    return syllable_count
+
+def extract_audio_features(audio_file):
+    """Extract audio features from an audio file."""
+    try:
+        # Load the audio file using utility function
+        y, sr = load_audio(audio_file, SAMPLE_RATE)
+        
+        if y is None or sr is None:
+            raise ValueError("Failed to load audio data")
+        
+        # Get audio duration in seconds
+        duration = extract_audio_duration(y, sr)
+        
+        # Extract MFCCs for genre classification (may not be needed with the pipeline)
+        mfccs_mean = extract_mfcc_features(y, sr, n_mfcc=20)
+        
+        return {
+            "features": mfccs_mean,
+            "duration": duration,
+            "waveform": y,
+            "sample_rate": sr,
+            "path": audio_file  # Keep path for the pipeline
+        }
+    except Exception as e:
+        print(f"Error extracting audio features: {str(e)}")
+        raise ValueError(f"Failed to extract audio features: {str(e)}")
+
+def classify_genre(audio_data):
+    """Classify the genre of the audio using the loaded model."""
+    try:
+        # First attempt: Try using the pipeline if available
+        if 'genre_classifier' in globals():
+            results = genre_classifier(audio_data["path"])
+            # Transform pipeline results to our expected format
+            top_genres = [(result["label"], result["score"]) for result in results[:3]]
+            return top_genres
+        
+        # Second attempt: Use manually loaded model components
+        elif 'genre_processor' in globals() and 'genre_model' in globals():
+            # Process audio input with feature extractor
+            inputs = genre_processor(
+                audio_data["waveform"], 
+                sampling_rate=audio_data["sample_rate"], 
+                return_tensors="pt"
+            )
+            
+            with torch.no_grad():
+                outputs = genre_model(**inputs)
+                predictions = outputs.logits.softmax(dim=-1)
+            
+            # Get the top 3 genres
+            values, indices = torch.topk(predictions, 3)
+            
+            # Map indices to genre labels
+            genre_labels = genre_model.config.id2label
+            
+            top_genres = []
+            for i, (value, index) in enumerate(zip(values[0], indices[0])):
+                genre = genre_labels[index.item()]
+                confidence = value.item()
+                top_genres.append((genre, confidence))
+            
+            return top_genres
+        
+        else:
+            raise ValueError("No genre classification model available")
+            
+    except Exception as e:
+        print(f"Error in genre classification: {str(e)}")
+        # Fallback: return a default genre if everything fails
+        return [("rock", 1.0)]
+
+def detect_music(audio_data):
+    """Detect if the audio is music using the MIT AST model."""
+    try:
+        # First attempt: Try using the pipeline if available
+        if 'music_detector' in globals():
+            results = music_detector(audio_data["path"])
+            # Look for music-related classes in the results
+            music_confidence = 0.0
+            for result in results:
+                label = result["label"].lower()
+                if any(music_term in label for music_term in ["music", "song", "singing", "instrument"]):
+                    music_confidence = max(music_confidence, result["score"])
+            return music_confidence >= 0.2, results
+        
+        # Second attempt: Use manually loaded model components
+        elif 'music_processor' in globals() and 'music_model' in globals():
+            # Process audio input with feature extractor
+            inputs = music_processor(
+                audio_data["waveform"], 
+                sampling_rate=audio_data["sample_rate"], 
+                return_tensors="pt"
+            )
+            
+            with torch.no_grad():
+                outputs = music_model(**inputs)
+                predictions = outputs.logits.softmax(dim=-1)
+            
+            # Get the top predictions
+            values, indices = torch.topk(predictions, 5)
+            
+            # Map indices to labels
+            labels = music_model.config.id2label
+            
+            # Check for music-related classes
+            music_confidence = 0.0
+            results = []
+            
+            for i, (value, index) in enumerate(zip(values[0], indices[0])):
+                label = labels[index.item()].lower()
+                score = value.item()
+                results.append({"label": label, "score": score})
+                
+                if any(music_term in label for music_term in ["music", "song", "singing", "instrument"]):
+                    music_confidence = max(music_confidence, score)
+            
+            return music_confidence >= 0.2, results
+            
+        else:
+            raise ValueError("No music detection model available")
+            
+    except Exception as e:
+        print(f"Error in music detection: {str(e)}")
+        return False, []
+
+def detect_beats(y, sr):
+    """Enhanced beat detection with adaptive threshold analysis and improved time signature detection."""
+    # STEP 1: Improved pre-processing with robustness for quiet sections
+    # Apply a small floor to avoid division-by-zero issues
+    y = np.clip(y, 1e-10, None)  # Prevent extreme quiet sections from causing NaN
+    
+    # Separate harmonic and percussive components
+    y_harmonic, y_percussive = librosa.effects.hpss(y)
+    
+    # Generate multiple onset envelopes with smoothing for stability
+    onset_env_full = librosa.onset.onset_strength(y=y, sr=sr)
+    onset_env_perc = librosa.onset.onset_strength(y=y_percussive, sr=sr)
+    
+    # Apply small smoothing to handle quiet sections
+    onset_env_full = np.maximum(onset_env_full, 1e-6)  # Minimum threshold to avoid NaN
+    onset_env_perc = np.maximum(onset_env_perc, 1e-6)
+    
+    # Create weighted combination
+    combined_onset = onset_env_full * 0.3 + onset_env_perc * 0.7
+    
+    # STEP 2: Multi-strategy tempo and beat detection
+    tempo_candidates = []
+    beat_candidates = []
+    
+    # Strategy 1: Standard detection
+    tempo1, beats1 = librosa.beat.beat_track(
+        onset_envelope=combined_onset, 
+        sr=sr,
+        tightness=100  # More sensitive tracking
+    )
+    tempo_candidates.append(tempo1)
+    beat_candidates.append(beats1)
+    
+    # Strategy 2: Try with different tempo range for complex signatures
+    tempo2, beats2 = librosa.beat.beat_track(
+        onset_envelope=combined_onset,
+        sr=sr,
+        tightness=100,
+        start_bpm=60,  # Lower starting BPM helps find different time signatures
+        std_bpm=20     # Allow wider variations
+    )
+    tempo_candidates.append(tempo2)
+    beat_candidates.append(beats2)
+    
+    # Select the best strategy based on consistency
+    beat_consistency = []
+    for beats in beat_candidates:
+        if len(beats) <= 1:
+            beat_consistency.append(0)
+            continue
+            
+        times = librosa.frames_to_time(beats, sr=sr)
+        intervals = np.diff(times)
+        
+        # More consistent beats have lower variance in intervals
+        if np.mean(intervals) > 0:
+            consistency = 1.0 / (1.0 + np.std(intervals)/np.mean(intervals))
+            beat_consistency.append(consistency)
+        else:
+            beat_consistency.append(0)
+    
+    best_idx = np.argmax(beat_consistency) if beat_consistency else 0
+    tempo = tempo_candidates[best_idx]
+    beat_frames = beat_candidates[best_idx]
+    
+    # STEP 3: Performance optimization with vectorized operations
+    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
+    
+    # Vectorized extraction of beat strengths instead of loop
+    beat_strengths = []
+    if len(beat_frames) > 0:
+        # Filter out beat frames that exceed the onset envelope length
+        valid_frames = [frame for frame in beat_frames if frame < len(combined_onset)]
+        if valid_frames:
+            # Vectorized extraction of valid beat strengths
+            beat_strengths = combined_onset[valid_frames].tolist()
+            
+            # Handle any remaining beats
+            avg_strength = np.mean(beat_strengths) if beat_strengths else 1.0
+            beat_strengths.extend([avg_strength] * (len(beat_times) - len(beat_strengths)))
+        else:
+            beat_strengths = [1.0] * len(beat_times)
+    else:
+        beat_strengths = [1.0] * len(beat_times)
+    
+    # STEP 4: Calculate intervals between beats
+    intervals = np.diff(beat_times).tolist() if len(beat_times) > 1 else []
+    
+    # STEP 5: Improved time signature detection for various patterns
+    # Start with default assumption
+    time_signature = 4
+    
+    if len(beat_strengths) > 8:
+        # Use autocorrelation to find periodicity in beat strengths
+        if len(beat_strengths) > 4:
+            # Normalize beat strengths for better pattern detection
+            norm_strengths = np.array(beat_strengths)
+            if np.max(norm_strengths) > 0:
+                norm_strengths = norm_strengths / np.max(norm_strengths)
+            
+            # Compute autocorrelation to find periodic patterns (N)
+            ac = librosa.autocorrelate(norm_strengths, max_size=len(norm_strengths)//2)
+            
+            # Find peaks in autocorrelation (indicates periodicity)
+            if len(ac) > 3:  # Need enough data for peak picking
+                # Find peaks after lag 0
+                peaks = librosa.util.peak_pick(ac[1:], pre_max=1, post_max=1, pre_avg=1, post_avg=1, delta=0.1, wait=1)
+                peaks = peaks + 1  # Adjust for the removed lag 0
+                
+                if len(peaks) > 0:
+                    # Get the first significant peak position (cycle length N)
+                    N = peaks[0]
+                    
+                    # Map common cycle lengths to time signatures
+                    if 2 <= N <= 3:
+                        time_signature = N  # Direct mapping for simple cases
+                    elif N == 6:
+                        time_signature = 3  # Could be 6/8 or 3/4 with subdivisions
+                    elif N == 8:
+                        time_signature = 4  # Could be 4/4 with subdivisions
+                    elif N == 5 or N == 7:
+                        time_signature = N  # Odd time signatures like 5/4 or 7/8
+                    # Otherwise, keep default 4
+        
+        # Use adaptive thresholds for pattern detection instead of fixed values
+        if len(beat_strengths) > 3:
+            # Calculate z-scores to identify statistically significant strong beats
+            strengths_array = np.array(beat_strengths)
+            mean_strength = np.mean(strengths_array)
+            std_strength = np.std(strengths_array)
+            
+            if std_strength > 0:
+                z_scores = (strengths_array - mean_strength) / std_strength
+                
+                # Count beats with z-score > 1 in groups of 3 (for 3/4 time)
+                strong_beat_pattern = []
+                for i in range(0, len(z_scores) - 2, 3):
+                    # First beat should be significantly stronger (z > 1)
+                    # Second and third beats should be weaker
+                    if z_scores[i] > 1 and z_scores[i+1] < 0.5 and z_scores[i+2] < 0.5:
+                        strong_beat_pattern.append(1)
+                    else:
+                        strong_beat_pattern.append(0)
+                
+                # Check if we have a clear 3/4 pattern
+                if strong_beat_pattern and len(strong_beat_pattern) >= 3:
+                    three_pattern_probability = sum(strong_beat_pattern) / len(strong_beat_pattern)
+                    if three_pattern_probability > 0.6:
+                        time_signature = 3
+    
+    # STEP 6: Enhanced phrase detection with adaptive thresholds
+    phrases = []
+    current_phrase = []
+    
+    if len(beat_times) > 0:
+        # Calculate adaptive thresholds using percentiles instead of fixed ratios
+        if len(beat_strengths) > 4:
+            # Define thresholds based on distribution rather than fixed values
+            strong_threshold = np.percentile(beat_strengths, 75)  # Top 25% are "strong" beats
+            # For gaps, calculate significant deviation using z-scores if we have intervals
+            if intervals:
+                mean_interval = np.mean(intervals)
+                std_interval = np.std(intervals)
+                # A significant gap is > 1.5 standard deviations above mean
+                significant_gap = mean_interval + (1.5 * std_interval) if std_interval > 0 else mean_interval * 1.3
+            else:
+                significant_gap = 0
+        else:
+            # Fallback for limited data
+            strong_threshold = np.max(beat_strengths) * 0.8 if beat_strengths else 1.0
+            significant_gap = 0
+    
+        # Identify phrase boundaries
+        for i in range(len(beat_times)):
+            current_phrase.append(i)
+            
+            # Check for phrase boundary conditions
+            if i < len(beat_times) - 1:
+                # Strong beat coming up (using adaptive threshold)
+                is_stronger_next = False
+                if i < len(beat_strengths) - 1:
+                    is_stronger_next = beat_strengths[i+1] > strong_threshold and beat_strengths[i+1] > beat_strengths[i] * 1.1
+                
+                # Significant gap (using adaptive threshold)
+                is_longer_gap = False
+                if i < len(beat_times) - 1 and intervals and i < len(intervals):
+                    is_longer_gap = intervals[i] > significant_gap
+                
+                # Measure boundary based on time signature
+                is_measure_boundary = (i + 1) % time_signature == 0 and i > 0
+                
+                # Combined decision for phrase boundary
+                if ((is_stronger_next or is_longer_gap) and len(current_phrase) >= 2) or \
+                   (is_measure_boundary and len(current_phrase) >= time_signature):
+                    phrases.append(current_phrase)
+                    current_phrase = []
+    
+    # Add the last phrase if not empty
+    if current_phrase and len(current_phrase) >= 2:
+        phrases.append(current_phrase)
+    
+    # Ensure we have at least one phrase
+    if not phrases and len(beat_times) >= 2:
+        # Default to grouping by measures based on detected time signature
+        for i in range(0, len(beat_times), time_signature):
+            end = min(i + time_signature, len(beat_times))
+            if end - i >= 2:  # Ensure at least 2 beats per phrase
+                phrases.append(list(range(i, end)))
+    
+    # Return in the original format for compatibility
+    return {
+        "tempo": tempo,
+        "beat_frames": beat_frames,
+        "beat_times": beat_times,
+        "beat_count": len(beat_times),
+        "beat_strengths": beat_strengths,
+        "intervals": intervals,
+        "time_signature": time_signature,
+        "phrases": phrases
+    }
+
+def detect_sections(y, sr):
+    """
+    Advanced detection of musical sections with adaptive segmentation and improved classification.
+    
+    Parameters:
+        y: Audio time series
+        sr: Sample rate
+    
+    Returns:
+        A list of section dictionaries with type, start time, end time, and duration
+    """
+    # Step 1: Extract rich feature set for comprehensive analysis
+    # ----------------------------------------------------------------------
+    hop_length = 512  # Common hop length for feature extraction
+    
+    # Spectral features
+    S = np.abs(librosa.stft(y, hop_length=hop_length))
+    contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
+    
+    # Harmonic features with CQT-based chroma (better for harmonic analysis)
+    chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
+    
+    # Timbral features
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=hop_length)
+    
+    # Energy features
+    rms = librosa.feature.rms(y=y, hop_length=hop_length)
+    
+    # Harmonic-percussive source separation for better rhythm analysis
+    y_harmonic, y_percussive = librosa.effects.hpss(y)
+    percussive_rms = librosa.feature.rms(y=y_percussive, hop_length=hop_length)
+    
+    # Step 2: Adaptive determination of segment count based on song complexity
+    # ----------------------------------------------------------------------
+    duration = librosa.get_duration(y=y, sr=sr)
+    
+    # Feature preparation for adaptive segmentation
+    # Stack features with proper normalization (addressing the scale issue)
+    feature_stack = np.vstack([
+        librosa.util.normalize(contrast),
+        librosa.util.normalize(chroma),
+        librosa.util.normalize(mfcc),
+        librosa.util.normalize(rms)
+    ])
+    
+    # Transpose to get time as first dimension
+    feature_matrix = feature_stack.T
+    
+    # Step 3: Feature fusion using dimensionality reduction (addressing simple summation issue)
+    # ----------------------------------------------------------------------
+    
+    # Apply PCA to reduce dimensionality while preserving relationships
+    from sklearn.decomposition import PCA
+    
+    # Handle very short audio files
+    n_components = min(8, feature_matrix.shape[0], feature_matrix.shape[1])
+    
+    if feature_matrix.shape[0] > n_components and feature_matrix.shape[1] > 0:
+        try:
+            pca = PCA(n_components=n_components)
+            reduced_features = pca.fit_transform(feature_matrix)
+        except Exception as e:
+            print(f"PCA failed, falling back to original features: {e}")
+            # Fallback to simpler approach if PCA fails
+            reduced_features = feature_matrix
+    else:
+        # Not enough data for PCA
+        reduced_features = feature_matrix
+    
+    # Step 4: Adaptive determination of optimal segment count
+    # ----------------------------------------------------------------------
+    
+    # Initialize range of segment counts to try
+    min_segments = max(2, int(duration / 60))  # At least 2 segments, roughly 1 per minute
+    max_segments = min(10, int(duration / 20))  # At most 10 segments, roughly 1 per 20 seconds
+    
+    # Ensure reasonable bounds
+    min_segments = max(2, min(min_segments, 4))
+    max_segments = max(min_segments + 1, min(max_segments, 8))
+    
+    # Try different segment counts and evaluate with silhouette score
+    best_segments = min_segments
+    best_score = -1
+    
+    from sklearn.metrics import silhouette_score
+    from sklearn.cluster import AgglomerativeClustering
+    
+    # Only do this analysis if we have enough data
+    if reduced_features.shape[0] > max_segments:
+        for n_segments in range(min_segments, max_segments + 1):
+            try:
+                # Perform agglomerative clustering
+                clustering = AgglomerativeClustering(n_clusters=n_segments)
+                labels = clustering.fit_predict(reduced_features)
+                
+                # Calculate silhouette score if we have enough samples
+                if len(np.unique(labels)) > 1 and len(labels) > n_segments + 1:
+                    score = silhouette_score(reduced_features, labels)
+                    
+                    if score > best_score:
+                        best_score = score
+                        best_segments = n_segments
+            except Exception as e:
+                print(f"Clustering with {n_segments} segments failed: {e}")
+                continue
+    
+    # Use the optimal segment count for final segmentation
+    n_segments = best_segments
+    
+    # Step 5: Final segmentation using the optimal segment count
+    # ----------------------------------------------------------------------
+    
+    # Method 1: Use agglomerative clustering on the reduced features
+    try:
+        clustering = AgglomerativeClustering(n_clusters=n_segments)
+        labels = clustering.fit_predict(reduced_features)
+        
+        # Convert cluster labels to boundaries by finding where labels change
+        boundaries = [0]  # Start with the beginning
+        
+        for i in range(1, len(labels)):
+            if labels[i] != labels[i-1]:
+                boundaries.append(i)
+        
+        boundaries.append(len(labels))  # Add the end
+        
+        # Convert to frames
+        bounds_frames = np.array(boundaries)
+        
+    except Exception as e:
+        print(f"Final clustering failed: {e}")
+        # Fallback to librosa's agglomerative clustering on original features
+        bounds_frames = librosa.segment.agglomerative(feature_stack, n_segments)
+    
+    # Step 6: Detect harmonic changes for better bridge identification
+    # ----------------------------------------------------------------------
+    
+    # Calculate tonal centroids to identify key changes
+    tonnetz = librosa.feature.tonnetz(y=y_harmonic, sr=sr)
+    
+    # Look for significant changes in harmonic content
+    harmonic_changes = []
+    
+    if tonnetz.shape[1] > 1:
+        tonnetz_diff = np.sum(np.abs(np.diff(tonnetz, axis=1)), axis=0)
+        # Normalize
+        if np.max(tonnetz_diff) > 0:
+            tonnetz_diff = tonnetz_diff / np.max(tonnetz_diff)
+        
+        # Identify significant harmonic changes (potential bridges or section changes)
+        threshold = np.percentile(tonnetz_diff, 90)  # Top 10% most significant changes
+        for i in range(len(tonnetz_diff)):
+            if tonnetz_diff[i] > threshold:
+                harmonic_changes.append(i)
+    
+    # Step 7: Convert boundaries to time and create sections
+    # ----------------------------------------------------------------------
+    bounds_times = librosa.frames_to_time(bounds_frames, sr=sr, hop_length=hop_length)
+    
+    # Create sections from the boundaries
+    sections = []
+    
+    for i in range(len(bounds_times) - 1):
+        start = bounds_times[i]
+        end = bounds_times[i+1]
+        duration = end - start
+        
+        # Skip extremely short sections
+        if duration < 4 and i > 0 and i < len(bounds_times) - 2:
+            continue
+        
+        # Step 8: Section type classification with improved musical features
+        # ----------------------------------------------------------------------
+        
+        # Get indices for this section
+        start_idx = bounds_frames[i]
+        end_idx = bounds_frames[i+1]
+        
+        # Basic section type based on position
+        if i == 0:
+            section_type = "intro"
+        elif i == len(bounds_times) - 2:
+            section_type = "outro"
+        else:
+            # Default to alternating verse/chorus
+            section_type = "chorus" if i % 2 == 1 else "verse"
+        
+        # Only analyze characteristics if we have enough frames
+        if end_idx > start_idx:
+            # Calculate musical characteristics for this section
+            
+            # 1. Energy profile
+            energy = np.mean(rms[0, start_idx:end_idx])
+            
+            # 2. Rhythm intensity (percussive content)
+            rhythm_intensity = np.mean(percussive_rms[0, start_idx:end_idx])
+            
+            # 3. Harmonic complexity
+            if chroma.shape[1] > 0:
+                chroma_var = np.var(chroma[:, start_idx:end_idx])
+            else:
+                chroma_var = 0
+            
+            # 4. Timbral characteristics
+            if mfcc.shape[1] > 0:
+                mfcc_mean = np.mean(mfcc[:, start_idx:end_idx], axis=1)
+                mfcc_var = np.var(mfcc[:, start_idx:end_idx], axis=1)
+            else:
+                mfcc_mean = np.zeros(mfcc.shape[0])
+                mfcc_var = np.zeros(mfcc.shape[0])
+            
+            # 5. Check for harmonic changes within this section (for bridge detection)
+            has_harmonic_change = False
+            for change_idx in harmonic_changes:
+                if start_idx <= change_idx < end_idx:
+                    has_harmonic_change = True
+                    break
+            
+            # Calculate relative metrics by comparing to the entire song
+            relative_energy = energy / np.mean(rms)
+            relative_rhythm = rhythm_intensity / np.mean(percussive_rms)
+            
+            # Improved section type classification:
+            
+            # Chorus: High energy, strong rhythm, less harmonic variation
+            if (relative_energy > 1.1 and relative_rhythm > 1.1 and 
+                section_type != "intro" and section_type != "outro"):
+                section_type = "chorus"
+            
+            # Verse: Moderate energy, moderate rhythm, more harmonic variation
+            elif (0.8 <= relative_energy <= 1.1 and chroma_var > np.mean(np.var(chroma, axis=1)) and
+                  section_type != "intro" and section_type != "outro"):
+                section_type = "verse"
+            
+            # Bridge: Often has harmonic changes, energy drop, or unique timbral characteristics
+            if (section_type not in ["intro", "outro"] and 
+                (has_harmonic_change or 
+                 (0.5 <= relative_energy <= 0.9 and duration < 30) or
+                 np.any(mfcc_var > np.percentile(np.var(mfcc, axis=1), 75)))):
+                section_type = "bridge"
+        
+        # Add section to the list
+        sections.append({
+            "type": section_type,
+            "start": start,
+            "end": end,
+            "duration": duration
+        })
+    
+    # Post-processing: Ensure reasonable section sequence and durations
+    for i in range(1, len(sections) - 1):
+        # Check for unreasonably short sections and merge them
+        if sections[i]["duration"] < 8 and sections[i]["type"] not in ["intro", "outro", "bridge"]:
+            # Either merge with previous or next section based on similarity
+            prev_type = sections[i-1]["type"]
+            next_type = sections[i+1]["type"] if i+1 < len(sections) else "outro"
+            
+            # Default to merging with the previous section
+            sections[i]["type"] = prev_type
+    
+    # Filter out any remaining extremely short sections
+    sections = [s for s in sections if s["duration"] >= 5 or 
+                s["type"] == "intro" or s["type"] == "outro"]
+    
+    return sections
+
+def create_flexible_syllable_templates(beats_info, genre=None, phrase_mode='default'):
+    """
+    Create enhanced syllable templates based on beat patterns with improved musical intelligence.
+    
+    Parameters:
+        beats_info: Dictionary containing beat analysis data
+        genre: Optional genre to influence template creation
+        phrase_mode: 'default' uses provided phrases, 'auto' forces recalculation
+        
+    Returns:
+        String of syllable templates with embedded strength values and flexible timing
+    """
+    import numpy as np
+    from sklearn.cluster import KMeans
+    
+    # Extract basic beat information
+    beat_times = beats_info.get("beat_times", [])
+    beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
+    tempo = beats_info.get("tempo", 120)
+    time_signature = beats_info.get("time_signature", 4)
+    
+    # Early return for insufficient data
+    if len(beat_times) < 2:
+        return "S(1.0):1-w(0.5):1|S(1.0):1-w(0.5):1"  # Default fallback pattern
+    
+    # Step 1: Adaptive thresholding using k-means clustering
+    # ----------------------------------------------------------------------
+    if len(beat_strengths) >= 6:  # Need enough data points for clustering
+        # Reshape for k-means
+        X = np.array(beat_strengths).reshape(-1, 1)
+        
+        # Use k-means with 3 clusters for Strong, Medium, Weak classification
+        kmeans = KMeans(n_clusters=3, random_state=0, n_init=10).fit(X)
+        
+        # Find the centroid values and sort them
+        centroids = sorted([float(c[0]) for c in kmeans.cluster_centers_])
+        
+        # Map to thresholds (using the midpoints between centroids)
+        if len(centroids) >= 3:
+            medium_threshold = (centroids[0] + centroids[1]) / 2
+            strong_threshold = (centroids[1] + centroids[2]) / 2
+        else:
+            # Fallback if clustering doesn't work well
+            medium_threshold = np.percentile(beat_strengths, 33)
+            strong_threshold = np.percentile(beat_strengths, 66)
+    else:
+        # For limited data, use percentile-based approach
+        medium_threshold = np.percentile(beat_strengths, 33)
+        strong_threshold = np.percentile(beat_strengths, 66)
+    
+    # Step 2: Create or refine phrases based on mode
+    # ----------------------------------------------------------------------
+    phrases = beats_info.get("phrases", [])
+    
+    if phrase_mode == 'auto' or not phrases:
+        # Create phrases based on time signature and beat strengths
+        phrases = []
+        current_phrase = []
+        
+        for i in range(len(beat_times)):
+            current_phrase.append(i)
+            
+            # Check for natural phrase endings
+            if (i + 1) % time_signature == 0 or i == len(beat_times) - 1:
+                if len(current_phrase) >= 2:  # Ensure minimum phrase length
+                    phrases.append(current_phrase)
+                    current_phrase = []
+        
+        # Add any remaining beats
+        if current_phrase and len(current_phrase) >= 2:
+            phrases.append(current_phrase)
+    
+    # Step 3: Calculate continuous tempo-to-syllable mapping function
+    # ----------------------------------------------------------------------
+    def tempo_to_syllable_base(tempo):
+        """Continuous function mapping tempo to syllable base count"""
+        # Sigmoid-like function that smoothly transitions between syllable counts
+        if tempo > 180:
+            return 1.0
+        elif tempo > 140: 
+            return 1.0 + (180 - tempo) * 0.02  # Gradual increase 1.0 → 1.8
+        elif tempo > 100:
+            return 1.8 + (140 - tempo) * 0.01  # Gradual increase 1.8 → 2.2
+        elif tempo > 70:
+            return 2.2 + (100 - tempo) * 0.02  # Gradual increase 2.2 → 2.8
+        else:
+            return 2.8 + max(0, (70 - tempo) * 0.04)  # Continue increasing for very slow tempos
+    
+    # Step 4: Generate enhanced templates with flexible timing
+    # ----------------------------------------------------------------------
+    syllable_templates = []
+    
+    for phrase in phrases:
+        # Skip empty phrases
+        if not phrase:
+            continue
+        
+        # Extract beat strengths for this phrase
+        phrase_strengths = [beat_strengths[i] for i in phrase if i < len(beat_strengths)]
+        if not phrase_strengths:
+            phrase_strengths = [1.0] * len(phrase)
+        
+        # Apply adaptive thresholding for stress pattern detection
+        stress_pattern = []
+        for i, strength in enumerate(phrase_strengths):
+            # Consider both strength and metrical position
+            metrical_position = i % time_signature
+            
+            # Apply positional boost for strong metrical positions
+            position_boost = 0.15 if metrical_position == 0 else 0
+            # Secondary stress on beat 3 in 4/4 time
+            if time_signature == 4 and metrical_position == 2:
+                position_boost = 0.08
+                
+            effective_strength = strength + position_boost
+            
+            if effective_strength >= strong_threshold:
+                stress_pattern.append(("S", effective_strength))  # Strong beat with strength
+            elif effective_strength >= medium_threshold:
+                stress_pattern.append(("m", effective_strength))  # Medium beat with strength
+            else:
+                stress_pattern.append(("w", effective_strength))  # Weak beat with strength
+        
+        # Step 5: Calculate syllable counts using continuous function
+        # ----------------------------------------------------------------------
+        detailed_template = []
+        
+        for i, (stress_type, strength) in enumerate(stress_pattern):
+            # Get base syllable count from tempo
+            base_syllables = tempo_to_syllable_base(tempo)
+            
+            # Adjust based on stress type
+            if stress_type == "S":
+                syllable_factor = 1.2  # More syllables for strong beats
+            elif stress_type == "m":
+                syllable_factor = 1.0  # Normal for medium beats
+            else:
+                syllable_factor = 0.8  # Fewer for weak beats
+            
+            # Apply genre-specific adjustments
+            genre_factor = 1.0
+            if genre:
+                genre = genre.lower()
+                if any(term in genre for term in ["rap", "hip hop", "hip-hop"]):
+                    genre_factor = 1.4  # Much higher syllable density for rap
+                elif any(term in genre for term in ["folk", "country", "ballad"]):
+                    genre_factor = 0.8  # Lower density for folk styles
+            
+            # Calculate adjusted syllable count
+            raw_count = base_syllables * syllable_factor * genre_factor
+            
+            # Allow for more flexible syllable counts with non-integer values
+            # Round to multiples of 0.5 for half-syllable precision
+            rounded_count = round(raw_count * 2) / 2
+            
+            # Limit to reasonable range (0.5 to 4)
+            syllable_count = max(0.5, min(4, rounded_count))
+            
+            # Format with embedded strength value for reversibility
+            # Convert strength to 2-decimal precision percentage
+            strength_pct = int(strength * 100) / 100
+            detailed_template.append(f"{stress_type}({strength_pct}):{syllable_count}")
+        
+        # Join beat templates for this phrase
+        phrase_template = "-".join(detailed_template)
+        syllable_templates.append(phrase_template)
+    
+    # Step 6: Ensure valid output with reasonable defaults
+    # ----------------------------------------------------------------------
+    if not syllable_templates:
+        # Create a sensible default based on time signature
+        if time_signature == 3:
+            syllable_templates = ["S(0.95):2-w(0.4):1-w(0.35):1"]  # 3/4 default
+        else:
+            syllable_templates = ["S(0.95):2-w(0.4):1-m(0.7):1.5-w(0.35):1"]  # 4/4 default
+    
+    # Join all phrase templates with the original separator for compatibility
+    return "|".join(syllable_templates)
+
+def format_syllable_templates_for_prompt(syllable_templates, arrow="→", line_wrap=10, 
+                                         structured_output=False, beat_types=None):
+    """
+    Convert technical syllable templates into clear, human-readable instructions with
+    enhanced flexibility and customization options.
+    
+    Parameters:
+        syllable_templates: String or list of templates
+        arrow: Symbol to use between beats (default: "→")
+        line_wrap: Number of beats before automatic line wrapping (0 = no wrapping)
+        structured_output: If True, return structured data instead of text
+        beat_types: Custom mapping for beat types (default: None, uses standard mapping)
+        
+    Returns:
+        Human-readable instructions or structured data depending on parameters
+    """
+    if not syllable_templates:
+        return {} if structured_output else ""
+    
+    # Define standard beat type mapping (extensible)
+    default_beat_types = {
+        "S": {"name": "STRONG", "description": "stressed syllable"},
+        "m": {"name": "medium", "description": "medium-stressed syllable"},
+        "w": {"name": "weak", "description": "unstressed syllable"},
+        "X": {"name": "EXTRA", "description": "extra strong syllable"},
+        "L": {"name": "legato", "description": "connected/tied syllable"}
+    }
+    
+    # Use custom mapping if provided, otherwise use default
+    beat_types = beat_types or default_beat_types
+    
+    # Initialize structured output if requested
+    structured_data = {"lines": [], "explanations": []} if structured_output else None
+    
+    # Improved format detection - more robust than just checking for "|"
+    is_enhanced_format = False
+    
+    # Check if it's a string with enhanced format patterns
+    if isinstance(syllable_templates, str):
+        # Look for enhanced format patterns - check for beat type indicators
+        if any(bt + "(" in syllable_templates or bt + ":" in syllable_templates or bt + "[" in syllable_templates 
+               for bt in beat_types.keys()):
+            is_enhanced_format = True
+        # Secondary check for the "|" delimiter between phrases
+        elif "|" in syllable_templates:
+            is_enhanced_format = True
+    
+    # Initialize the output with a brief explanatory header
+    output = []
+    
+    if is_enhanced_format:
+        # Split into individual phrase templates
+        phrases = syllable_templates.split("|") if "|" in syllable_templates else [syllable_templates]
+        
+        # Process each phrase into human-readable instructions
+        for i, phrase in enumerate(phrases):
+            # Check for special annotations
+            has_swing = "(swing)" in phrase
+            if has_swing:
+                phrase = phrase.replace("(swing)", "")  # Remove annotation for processing
+            
+            beats = phrase.split("-")
+            beat_instructions = []
+            
+            # Process each beat in the phrase
+            for j, beat in enumerate(beats):
+                # Extract beat type and information
+                beat_info = {"original": beat, "type": None, "count": None, "strength": None}
+                
+                # Handle enhanced format with embedded strength values: S(0.95):2
+                if "(" in beat and ")" in beat and ":" in beat:
+                    parts = beat.split(":")
+                    beat_type = parts[0].split("(")[0]  # Extract beat type
+                    strength = parts[0].split("(")[1].rstrip(")")  # Extract strength value
+                    count = parts[1]  # Extract syllable count
+                    
+                    beat_info["type"] = beat_type
+                    beat_info["count"] = count
+                    beat_info["strength"] = strength
+                
+                # Handle simpler format: S2, m1, w1
+                elif any(beat.startswith(bt) for bt in beat_types.keys()) and len(beat) > 1:
+                    beat_type = beat[0]
+                    count = beat[1:]
+                    
+                    beat_info["type"] = beat_type
+                    beat_info["count"] = count
+                
+                # Fallback for any other format
+                else:
+                    beat_instructions.append(beat)
+                    continue
+                
+                # Format the beat instruction based on type
+                if beat_info["type"] in beat_types:
+                    type_name = beat_types[beat_info["type"]]["name"]
+                    if beat_info["strength"]:
+                        beat_instructions.append(f"{type_name}({beat_info['count']}) [{beat_info['strength']}]")
+                    else:
+                        beat_instructions.append(f"{type_name}({beat_info['count']})")
+                else:
+                    # Unknown beat type, use as-is
+                    beat_instructions.append(beat)
+            
+            # Handle line wrapping for readability
+            if line_wrap > 0 and len(beat_instructions) > line_wrap:
+                wrapped_instructions = []
+                for k in range(0, len(beat_instructions), line_wrap):
+                    section = beat_instructions[k:k+line_wrap]
+                    wrapped_instructions.append(f"{arrow} ".join(section))
+                line_desc = f"\n    {arrow} ".join(wrapped_instructions)
+            else:
+                line_desc = f" {arrow} ".join(beat_instructions)
+            
+            # Add swing notation if present
+            if has_swing:
+                line_desc += " [with swing feel]"
+            
+            # Add to output
+            line_output = f"Line {i+1}: {line_desc}"
+            output.append(line_output)
+            
+            if structured_output:
+                structured_data["lines"].append({
+                    "line_number": i+1,
+                    "beats": [{"original": beats[j], 
+                              "type": beat_info.get("type"),
+                              "count": beat_info.get("count"),
+                              "strength": beat_info.get("strength")} 
+                             for j, beat_info in enumerate([b for b in beats if isinstance(b, dict)])],
+                    "has_swing": has_swing
+                })
+        
+        # Add explanation of notation after the lines
+        explanation = [
+            "\n📝 UNDERSTANDING THE NOTATION:"
+        ]
+        
+        # Add descriptions for each beat type that was actually used
+        used_beat_types = set()
+        for phrase in phrases:
+            for beat in phrase.split("-"):
+                for bt in beat_types.keys():
+                    if beat.startswith(bt):
+                        used_beat_types.add(bt)
+        
+        for bt in used_beat_types:
+            if bt in beat_types:
+                name = beat_types[bt]["name"]
+                desc = beat_types[bt]["description"]
+                explanation.append(f"- {name}(n): Place a {desc} here, plus (n-1) unstressed syllables")
+        
+        explanation.extend([
+            f"- {arrow}: Indicates flow from one beat to the next",
+            "- [0.xx]: Beat strength value (higher = more emphasis needed)"
+        ])
+        
+        output.extend(explanation)
+        
+        if structured_output:
+            structured_data["explanations"] = explanation
+        
+        # Add examples for half-syllable values if they appear in the templates
+        has_half_syllables = any((".5" in beat) for phrase in phrases for beat in phrase.split("-"))
+        if has_half_syllables:
+            half_syllable_examples = [
+                "\n🎵 HALF-SYLLABLE EXAMPLES:",
+                "- STRONG(1.5): One stressed syllable followed by an unstressed half-syllable",
+                "  Example: \"LOVE you\" where \"LOVE\" is stressed and \"you\" is quick",
+                "- medium(2.5): One medium syllable plus one-and-a-half unstressed syllables",
+                "  Example: \"Wait for the\" where \"Wait\" is medium-stressed and \"for the\" is quick"
+            ]
+            output.extend(half_syllable_examples)
+            
+            if structured_output:
+                structured_data["half_syllable_examples"] = half_syllable_examples
+        
+        # Add swing explanation if needed
+        if any("swing" in phrase for phrase in phrases):
+            swing_guide = [
+                "\n🎶 SWING RHYTHM GUIDE:",
+                "- In swing, syllables should be unevenly timed (long-short pattern)",
+                "- Example: \"SUM-mer TIME\" in swing feels like \"SUM...mer-TIME\" with delay"
+            ]
+            output.extend(swing_guide)
+            
+            if structured_output:
+                structured_data["swing_guide"] = swing_guide
+    
+    # Handle the original format or segment dictionaries
+    else:
+        formatted_lines = []
+        
+        if isinstance(syllable_templates, list):
+            for i, template in enumerate(syllable_templates):
+                if isinstance(template, dict) and "syllable_template" in template:
+                    line = f"Line {i+1}: {template['syllable_template']} syllables"
+                    formatted_lines.append(line)
+                    
+                    if structured_output:
+                        structured_data["lines"].append({
+                            "line_number": i+1,
+                            "syllable_count": template["syllable_template"]
+                        })
+                elif isinstance(template, str):
+                    line = f"Line {i+1}: {template} syllables"
+                    formatted_lines.append(line)
+                    
+                    if structured_output:
+                        structured_data["lines"].append({
+                            "line_number": i+1,
+                            "syllable_count": template
+                        })
+            
+            output = formatted_lines
+        else:
+            output = [str(syllable_templates)]
+            
+            if structured_output:
+                structured_data["raw_content"] = str(syllable_templates)
+    
+    # Add general application advice
+    application_tips = [
+        "\n💡 APPLICATION TIPS:",
+        "1. Strong beats need naturally stressed syllables (like the START of \"RE-mem-ber\")",
+        "2. Place important words on strong beats for natural emphasis",
+        "3. Vowel sounds work best for sustained or emphasized syllables",
+        "4. Keep consonant clusters (like \"str\" or \"thr\") on weak beats"
+    ]
+    output.extend(application_tips)
+    
+    if structured_output:
+        structured_data["application_tips"] = application_tips
+        return structured_data
+    
+    return "\n".join(output)
+
+def verify_flexible_syllable_counts(lyrics, templates):
+    """
+    Enhanced verification of syllable counts and stress patterns with precise alignment analysis
+    and detailed feedback for all phrases in a template.
+    """
+    import re
+    import pronouncing
+    import numpy as np
+    import functools
+    from itertools import chain
+    
+    # Apply caching to improve performance for repeated word lookups
+    @functools.lru_cache(maxsize=512)
+    def cached_phones_for_word(word):
+        return pronouncing.phones_for_word(word)
+    
+    @functools.lru_cache(maxsize=512)
+    def count_syllables_for_word(word):
+        """Count syllables in a single word with caching for performance."""
+        # Try using pronouncing library first
+        pronunciations = cached_phones_for_word(word.lower())
+        if pronunciations:
+            return pronouncing.syllable_count(pronunciations[0])
+        
+        # Fallback method for words not in the pronouncing dictionary
+        vowels = "aeiouy"
+        word = word.lower()
+        count = 0
+        prev_is_vowel = False
+        
+        for char in word:
+            is_vowel = char in vowels
+            if is_vowel and not prev_is_vowel:
+                count += 1
+            prev_is_vowel = is_vowel
+        
+        # Handle special cases
+        if word.endswith('e') and not word.endswith('le'):
+            count -= 1
+        if word.endswith('le') and len(word) > 2 and word[-3] not in vowels:
+            count += 1
+        if count == 0:
+            count = 1
+        
+        return count
+    
+    @functools.lru_cache(maxsize=512)
+    def get_word_stress(word):
+        """Get the stress pattern for a word with improved fallback handling."""
+        pronunciations = cached_phones_for_word(word.lower())
+        if pronunciations:
+            return pronouncing.stresses(pronunciations[0])
+        
+        # Enhanced fallback for words not in the dictionary
+        syllables = count_syllables_for_word(word)
+        
+        # Common English stress patterns by word length
+        if syllables == 1:
+            return "1"  # Single syllable words are stressed
+        elif syllables == 2:
+            # Most 2-syllable nouns and adjectives stress first syllable
+            # Common endings that indicate second-syllable stress
+            second_syllable_stress = ["ing", "er", "or", "ize", "ise", "ate", "ect", "end", "ure"]
+            if any(word.endswith(ending) for ending in second_syllable_stress):
+                return "01"
+            else:
+                return "10"  # Default for 2-syllable words
+        elif syllables == 3:
+            # Common endings for specific stress patterns in 3-syllable words
+            if any(word.endswith(ending) for ending in ["ity", "ety", "ify", "ogy", "graphy"]):
+                return "100"  # First syllable stress
+            elif any(word.endswith(ending) for ending in ["ation", "ious", "itis"]):
+                return "010"  # Middle syllable stress
+            else:
+                return "100"  # Default for 3-syllable words
+        else:
+            # For longer words, use common English patterns
+            return "1" + "0" * (syllables - 1)
+    
+    # Split lyrics into lines
+    lines = [line.strip() for line in lyrics.split("\n") if line.strip()]
+    
+    # Initialize tracking variables
+    verification_notes = []
+    detailed_analysis = []
+    stress_misalignments = []
+    total_mismatch_count = 0
+    
+    # Process each lyric line against its template
+    for i, line in enumerate(lines):
+        if i >= len(templates):
+            break
+            
+        template = templates[i]
+        
+        # Extract the template string from different possible formats
+        if isinstance(template, dict) and "syllable_template" in template:
+            template_str = template["syllable_template"]
+        elif isinstance(template, str):
+            template_str = template
+        else:
+            continue
+        
+        # Handle multiple phrases in template - process ALL phrases, not just the first
+        template_phrases = [template_str]
+        if "|" in template_str:
+            template_phrases = template_str.split("|")
+        
+        # Check against all phrases and find the best match
+        best_match_diff = float('inf')
+        best_match_phrase = None
+        best_phrase_beats = None
+        actual_count = count_syllables(line)
+        
+        for phrase_idx, phrase in enumerate(template_phrases):
+            # Extract beat patterns and expected syllable counts from template
+            beats_info = []
+            total_expected = 0
+            
+            # Enhanced template parsing
+            if "-" in phrase:
+                beat_templates = phrase.split("-")
+                
+                # Parse each beat template
+                for beat in beat_templates:
+                    beat_info = {"original": beat, "type": None, "count": 1, "strength": None}
+                    
+                    # Handle templates with embedded strength values: S(0.95):2
+                    if "(" in beat and ")" in beat and ":" in beat:
+                        parts = beat.split(":")
+                        beat_type = parts[0].split("(")[0]
+                        try:
+                            strength = float(parts[0].split("(")[1].rstrip(")"))
+                        except ValueError:
+                            strength = 1.0
+                        
+                        # Handle potential float syllable counts
+                        try:
+                            count = float(parts[1])
+                            # Convert to int if it's a whole number
+                            if count == int(count):
+                                count = int(count)
+                        except ValueError:
+                            count = 1
+                        
+                        beat_info.update({
+                            "type": beat_type,
+                            "count": count,
+                            "strength": strength
+                        })
+                    
+                    # Handle simple format: S2, m1, w1
+                    elif any(beat.startswith(x) for x in ["S", "m", "w", "X", "L"]):
+                        beat_type = beat[0]
+                        
+                        # Extract count, supporting float values
+                        try:
+                            count_str = beat[1:]
+                            count = float(count_str)
+                            if count == int(count):
+                                count = int(count)
+                        except ValueError:
+                            count = 1
+                        
+                        beat_info.update({
+                            "type": beat_type,
+                            "count": count
+                        })
+                    
+                    # Legacy format - just numbers
+                    else:
+                        try:
+                            count = float(beat)
+                            if count == int(count):
+                                count = int(count)
+                            beat_info["count"] = count
+                        except ValueError:
+                            pass
+                    
+                    beats_info.append(beat_info)
+                    total_expected += beat_info["count"]
+                
+                # Compare this phrase to actual syllable count
+                phrase_diff = abs(actual_count - total_expected)
+                
+                # Adaptive threshold based on expected syllables
+                expected_ratio = 0.15 if total_expected > 10 else 0.25
+                phrase_threshold = max(1, round(total_expected * expected_ratio))
+                
+                # If this is the best match so far, store it
+                if phrase_diff < best_match_diff:
+                    best_match_diff = phrase_diff
+                    best_match_phrase = phrase
+                    best_phrase_beats = beats_info
+            
+            # For very simple templates without "-"
+            else:
+                try:
+                    total_expected = float(phrase)
+                    phrase_diff = abs(actual_count - total_expected)
+                    if phrase_diff < best_match_diff:
+                        best_match_diff = phrase_diff
+                        best_match_phrase = phrase
+                        best_phrase_beats = [{"count": total_expected}]
+                except ValueError:
+                    pass
+        
+        # If we found a reasonable match, proceed with analysis
+        if best_match_phrase and best_phrase_beats:
+            total_expected = sum(beat["count"] for beat in best_phrase_beats)
+            
+            # Calculate adaptive threshold based on expected syllables
+            expected_ratio = 0.15 if total_expected > 10 else 0.25
+            threshold = max(1, round(total_expected * expected_ratio))
+            
+            # Check if total syllable count is significantly off
+            if total_expected > 0 and best_match_diff > threshold:
+                verification_notes.append(f"Line {i+1}: Expected {total_expected} syllables, got {actual_count}")
+                total_mismatch_count += 1
+                
+                # Extract words and perform detailed alignment analysis
+                words = re.findall(r'\b[a-zA-Z]+\b', line.lower())
+                
+                # Get syllable count and stress for each word
+                word_analysis = []
+                cumulative_syllables = 0
+                
+                for word in words:
+                    syllable_count = count_syllables_for_word(word)
+                    
+                    # Get stress pattern
+                    stress_pattern = get_word_stress(word)
+                    
+                    word_analysis.append({
+                        "word": word,
+                        "syllables": syllable_count,
+                        "stress_pattern": stress_pattern,
+                        "position": cumulative_syllables
+                    })
+                    
+                    cumulative_syllables += syllable_count
+                
+                # Analyze alignment with beats - only if there are beat types
+                if best_phrase_beats and any(b.get("type") == "S" for b in best_phrase_beats if "type" in b):
+                    # Identify positions where strong syllables should fall
+                    strong_positions = []
+                    current_pos = 0
+                    
+                    for beat in best_phrase_beats:
+                        if beat.get("type") == "S":
+                            strong_positions.append(current_pos)
+                        current_pos += beat.get("count", 1)
+                    
+                    # Check if strong syllables align with strong beats
+                    alignment_issues = []
+                    
+                    for pos in strong_positions:
+                        # Find which word contains this position
+                        misaligned_word = None
+                        
+                        for word_info in word_analysis:
+                            word_start = word_info["position"]
+                            word_end = word_start + word_info["syllables"]
+                            
+                            if word_start <= pos < word_end:
+                                # Check if a stressed syllable falls on this position
+                                syllable_in_word = pos - word_start
+                                
+                                # Get stress pattern for this word
+                                stress = word_info["stress_pattern"]
+                                
+                                # If we have stress information and this syllable isn't stressed
+                                if stress and syllable_in_word < len(stress) and stress[syllable_in_word] != '1':
+                                    misaligned_word = word_info["word"]
+                                    alignment_issues.append(f"'{word_info['word']}' (unstressed syllable on strong beat)")
+                                    stress_misalignments.append({
+                                        "line": i+1,
+                                        "word": word_info["word"],
+                                        "position": pos,
+                                        "suggestion": get_stress_aligned_alternatives(word_info["word"], syllable_in_word)
+                                    })
+                                break
+                    
+                    if alignment_issues:
+                        verification_notes.append(f"  → Stress misalignments: {', '.join(alignment_issues)}")
+                    
+                    # Generate a visual alignment map for better understanding
+                    alignment_map = generate_alignment_visualization(line, best_phrase_beats, word_analysis)
+                    if alignment_map:
+                        detailed_analysis.append(f"Line {i+1} Alignment Analysis:\n{alignment_map}")
+        else:
+            # If no matching template was found
+            verification_notes.append(f"Line {i+1}: Unable to find matching template pattern")
+    
+    # Only add detailed analysis if we have rhythm mismatches
+    if verification_notes:
+        lyrics += "\n\n[Note: Potential rhythm mismatches detected in these lines:]\n"
+        lyrics += "\n".join(verification_notes)
+        
+        if detailed_analysis:
+            lyrics += "\n\n[Detailed Alignment Analysis:]\n"
+            lyrics += "\n\n".join(detailed_analysis)
+        
+        lyrics += "\n\n[How to fix rhythm mismatches:]\n"
+        lyrics += "1. Make sure stressed syllables (like 'LO' in 'LOV-er') fall on STRONG beats\n"
+        lyrics += "2. Adjust syllable counts to match the template (add/remove words or use different words)\n"
+        lyrics += "3. Try using words where natural stress aligns with musical rhythm\n"
+        
+        # Add specific word substitution suggestions if we found stress misalignments
+        if stress_misalignments:
+            lyrics += "\n[Specific word replacement suggestions:]\n"
+            for issue in stress_misalignments[:5]:  # Limit to first 5 issues
+                if issue["suggestion"]:
+                    lyrics += f"Line {issue['line']}: Consider replacing '{issue['word']}' with: {issue['suggestion']}\n"
+    
+    return lyrics
+
+def generate_alignment_visualization(line, beats_info, word_analysis):
+    """Generate a visual representation of syllable alignment with beats."""
+    if not beats_info or not word_analysis:
+        return None
+    
+    # Create a syllable breakdown with stress information
+    syllable_breakdown = []
+    syllable_stresses = []
+    
+    for word_info in word_analysis:
+        word = word_info["word"]
+        syllables = word_info["syllables"]
+        stress = word_info["stress_pattern"] or ""
+        
+        # Extend stress pattern if needed
+        while len(stress) < syllables:
+            stress += "0"
+        
+        # Get syllable breakdown
+        parts = naive_syllable_split(word, syllables)
+        
+        for i, part in enumerate(parts):
+            syllable_breakdown.append(part)
+            if i < len(stress):
+                syllable_stresses.append(stress[i])
+            else:
+                syllable_stresses.append("0")
+    
+    # Create beat pattern
+    beat_types = []
+    current_pos = 0
+    
+    for beat in beats_info:
+        beat_type = beat.get("type", "-")
+        count = beat.get("count", 1)
+        
+        # Handle whole numbers and half syllables
+        if isinstance(count, int):
+            beat_types.extend([beat_type] * count)
+        else:
+            # For half syllables, round up and use markers
+            whole_part = int(count)
+            frac_part = count - whole_part
+            
+            if whole_part > 0:
+                beat_types.extend([beat_type] * whole_part)
+            
+            if frac_part > 0:
+                beat_types.append(f"{beat_type}½")
+    
+    # Ensure we have enough beat types
+    while len(beat_types) < len(syllable_breakdown):
+        beat_types.append("-")
+    
+    # Trim beat types if too many
+    beat_types = beat_types[:len(syllable_breakdown)]
+    
+    # Generate the visualization with highlighted misalignments
+    result = []
+    
+    # First line: syllable breakdown with stress indicators
+    syllable_display = []
+    for i, syllable in enumerate(syllable_breakdown):
+        if i < len(syllable_stresses) and syllable_stresses[i] == "1":
+            syllable_display.append(syllable.upper())  # Uppercase for stressed syllables
+        else:
+            syllable_display.append(syllable.lower())  # Lowercase for unstressed
+    
+    result.append(" - ".join(syllable_display))
+    
+    # Second line: beat indicators with highlighting for misalignments
+    beat_indicators = []
+    for i, (syllable, beat_type) in enumerate(zip(syllable_stresses, beat_types)):
+        if beat_type == "S" or beat_type.startswith("S"):
+            if syllable == "1":
+                beat_indicators.append("↑")  # Aligned strong beat
+            else:
+                beat_indicators.append("❌")  # Misaligned strong beat
+        elif beat_type == "m" or beat_type.startswith("m"):
+            beat_indicators.append("•")  # Medium beat
+        elif beat_type == "w" or beat_type.startswith("w"):
+            beat_indicators.append("·")  # Weak beat
+        else:
+            beat_indicators.append(" ")
+    
+    result.append("   ".join(beat_indicators))
+    
+    # Third line: beat types
+    result.append(" - ".join(beat_types))
+    
+    return "\n".join(result)
+
+@functools.lru_cache(maxsize=256)
+def naive_syllable_split(word, syllable_count):
+    """Naively split a word into the specified number of syllables, with caching for performance."""
+    if syllable_count <= 1:
+        return [word]
+    
+    # Common syllable break patterns
+    vowels = "aeiouy"
+    consonants = "bcdfghjklmnpqrstvwxz"
+    
+    # Find potential split points
+    splits = []
+    for i in range(1, len(word) - 1):
+        if word[i] in consonants and word[i-1] in vowels:
+            splits.append(i)
+        elif word[i] in vowels and word[i-1] in consonants and word[i+1] in consonants:
+            splits.append(i+1)
+    
+    # Ensure we have enough split points
+    while len(splits) < syllable_count - 1:
+        for i in range(1, len(word)):
+            if i not in splits:
+                splits.append(i)
+                break
+    
+    # Sort and limit
+    splits.sort()
+    splits = splits[:syllable_count - 1]
+    
+    # Split the word
+    result = []
+    prev = 0
+    for pos in splits:
+        result.append(word[prev:pos])
+        prev = pos
+    
+    result.append(word[prev:])
+    return result
+
+def get_stress_aligned_alternatives(word, position_to_stress):
+    """Suggest alternative words with proper stress at the required position."""
+    # This would ideally use a more sophisticated dictionary lookup,
+    # but here's a simple implementation with common word patterns
+    syllable_count = count_syllables_for_word(word)
+    
+    # Common synonyms/replacements by syllable count with stress position
+    if syllable_count == 2:
+        if position_to_stress == 0:  # Need stress on first syllable
+            first_stress = ["love-ly", "won-der", "beau-ty", "danc-ing", "dream-ing", 
+                           "heart-beat", "sun-light", "moon-light", "star-light"]
+            return ", ".join(first_stress[:3])
+        else:  # Need stress on second syllable
+            second_stress = ["be-LIEVE", "a-BOVE", "a-ROUND", "to-DAY", "a-LIVE",
+                            "a-LONE", "be-HOLD", "re-TURN", "de-LIGHT"]
+            return ", ".join(second_stress[:3])
+    elif syllable_count == 3:
+        if position_to_stress == 0:  # First syllable stress
+            return "MEM-o-ry, WON-der-ful, BEAU-ti-ful"
+        elif position_to_stress == 1:  # Second syllable stress
+            return "a-MAZE-ing, to-GE-ther, for-EV-er"
+        else:  # Third syllable stress
+            return "un-der-STAND, o-ver-COME, ne-ver-MORE"
+    
+    # For other cases, just provide general guidance
+    return f"a word with stress on syllable {position_to_stress + 1}"
+
+def generate_lyrics(genre, duration, emotion_results, song_structure=None):
+    """
+    Generate lyrics based on the genre, emotion, and structure analysis with enhanced rhythmic alignment.
+    
+    This improved version uses advanced template creation, better formatting, and verification with
+    potential refinement for lyrics that perfectly match the musical rhythm patterns.
+    
+    Parameters:
+        genre: Musical genre of the audio
+        duration: Duration of the audio in seconds
+        emotion_results: Dictionary containing emotional analysis results
+        song_structure: Optional dictionary containing song structure analysis
+        
+    Returns:
+        Generated lyrics aligned with the rhythm patterns of the music
+    """
+    # Extract emotion and theme data from analysis results
+    primary_emotion = emotion_results["emotion_analysis"]["primary_emotion"]
+    primary_theme = emotion_results["theme_analysis"]["primary_theme"]
+    
+    # Extract numeric values safely with fallbacks
+    try:
+        tempo = float(emotion_results["rhythm_analysis"]["tempo"])
+    except (KeyError, ValueError, TypeError):
+        tempo = 0.0
+        
+    key = emotion_results["tonal_analysis"]["key"]
+    mode = emotion_results["tonal_analysis"]["mode"]
+    
+    # Format syllable templates for the prompt
+    syllable_guidance = ""
+    templates_for_verification = []
+    
+    # Create a structure visualization to help with lyrics-music matching
+    structure_visualization = "=== MUSIC-LYRICS STRUCTURE MATCHING ===\n\n"
+    structure_visualization += f"Song Duration: {duration:.1f} seconds\n"
+    structure_visualization += f"Tempo: {tempo:.1f} BPM\n\n"
+    
+    if song_structure:
+        # Try to use flexible structure if available
+        if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
+            flexible = song_structure["flexible_structure"]
+            if "segments" in flexible and flexible["segments"]:
+                # Get the segments
+                segments = flexible["segments"]
+                
+                # Add structure visualization
+                structure_visualization += f"Total segments: {len(segments)}\n"
+                structure_visualization += "Each segment represents one musical phrase for which you should write ONE line of lyrics.\n\n"
+                
+                # Process each segment to create enhanced rhythmic templates
+                enhanced_templates = []
+                
+                for i, segment in enumerate(segments):
+                    if i < 30:  # Extend limit to 30 lines to handle longer songs
+                        # Get the beat information for this segment
+                        segment_start = segment["start"]
+                        segment_end = segment["end"]
+                        
+                        # Add segment info to visualization
+                        structure_visualization += f"Segment {i+1}: {segment_start:.1f}s - {segment_end:.1f}s (duration: {segment_end-segment_start:.1f}s)\n"
+                        
+                        # Find beats within this segment
+                        segment_beats = []
+                        beat_times = flexible["beats"]["beat_times"]
+                        beat_strengths = flexible["beats"].get("beat_strengths", [])
+                        
+                        for j, beat_time in enumerate(beat_times):
+                            if segment_start <= beat_time < segment_end:
+                                # Add this beat to the segment
+                                segment_beats.append(j)
+                        
+                        # Create segment-specific beat info
+                        segment_beats_info = {
+                            "beat_times": [beat_times[j] for j in segment_beats],
+                            "tempo": flexible["beats"].get("tempo", 120)
+                        }
+                        
+                        if beat_strengths:
+                            segment_beats_info["beat_strengths"] = [
+                                beat_strengths[j] for j in segment_beats 
+                                if j < len(beat_strengths)
+                            ]
+                        
+                        # Create a phrase structure for this segment
+                        segment_beats_info["phrases"] = [segment_beats]
+                        
+                        # Generate enhanced template with genre awareness and auto phrasing
+                        enhanced_template = create_flexible_syllable_templates(
+                            segment_beats_info,
+                            genre=genre,
+                            phrase_mode='auto' if i == 0 else 'default'
+                        )
+                        enhanced_templates.append(enhanced_template)
+                        templates_for_verification.append(enhanced_template)
+                        
+                        # Add template to visualization
+                        structure_visualization += f"  Template: {enhanced_template}\n"
+                
+                # Use these templates to determine verse/chorus structure based on similar patterns
+                # This is a simple version - could be enhanced with more sophisticated pattern detection
+                section_types = []
+                pattern_groups = {}
+                
+                for i, template in enumerate(enhanced_templates):
+                    # Create simplified version for pattern matching
+                    simple_pattern = template.replace("(", "").replace(")", "").replace(":", "")
+                    
+                    # Check if this pattern is similar to any we've seen
+                    found_match = False
+                    for group, patterns in pattern_groups.items():
+                        if any(simple_pattern == p.replace("(", "").replace(")", "").replace(":", "") for p in patterns):
+                            pattern_groups[group].append(template)
+                            section_types.append(group)
+                            found_match = True
+                            break
+                    
+                    if not found_match:
+                        # New pattern type
+                        group_name = f"Group_{len(pattern_groups) + 1}"
+                        pattern_groups[group_name] = [template]
+                        section_types.append(group_name)
+                
+                # Map pattern groups to verse/chorus/bridge based on common structures
+                section_mapping = {}
+                if len(pattern_groups) >= 1:
+                    # Assume the most common pattern is the verse
+                    most_common = max(pattern_groups.items(), key=lambda x: len(x[1]))[0]
+                    section_mapping[most_common] = "verse"
+                
+                if len(pattern_groups) >= 2:
+                    # Second most common might be chorus
+                    sorted_groups = sorted(pattern_groups.items(), key=lambda x: len(x[1]), reverse=True)
+                    if len(sorted_groups) > 1:
+                        section_mapping[sorted_groups[1][0]] = "chorus"
+                
+                if len(pattern_groups) >= 3:
+                    # Third pattern could be bridge
+                    sorted_groups = sorted(pattern_groups.items(), key=lambda x: len(x[1]), reverse=True)
+                    if len(sorted_groups) > 2:
+                        section_mapping[sorted_groups[2][0]] = "bridge"
+                
+                # Update section types using the mapping
+                mapped_section_types = []
+                for section_type in section_types:
+                    if section_type in section_mapping:
+                        mapped_section_types.append(section_mapping[section_type])
+                    else:
+                        mapped_section_types.append("verse")  # Default to verse
+                
+                # Add structure visualization with section types
+                structure_visualization += "\nPredicted Song Structure:\n"
+                for i, section_type in enumerate(mapped_section_types):
+                    if i < len(enhanced_templates):
+                        structure_visualization += f"Line {i+1}: [{section_type.upper()}] {enhanced_templates[i]}\n"
+                
+                # Calculate total line count
+                total_lines = len(enhanced_templates)
+                verse_lines = mapped_section_types.count("verse")
+                chorus_lines = mapped_section_types.count("chorus")
+                bridge_lines = mapped_section_types.count("bridge")
+                
+                # Add summary
+                structure_visualization += f"\nTotal Lines Required: {total_lines}\n"
+                structure_visualization += f"Verse Lines: {verse_lines}\n"
+                structure_visualization += f"Chorus Lines: {chorus_lines}\n"
+                structure_visualization += f"Bridge Lines: {bridge_lines}\n"
+                
+                # Format templates with improved formatting for the prompt
+                syllable_guidance = "CRITICAL RHYTHM INSTRUCTIONS:\n"
+                syllable_guidance += "Each line of lyrics MUST match exactly with one musical phrase/segment.\n"
+                syllable_guidance += "Follow these rhythm patterns for each line (STRONG beats need stressed syllables):\n\n"
+                
+                # Add section headers to formatted templates
+                formatted_templates = []
+                for i, template in enumerate(enhanced_templates):
+                    if i < len(mapped_section_types):
+                        section_type = mapped_section_types[i].upper()
+                        if i > 0 and mapped_section_types[i] != mapped_section_types[i-1]:
+                            # New section
+                            formatted_templates.append(f"\n[{section_type}]")
+                        elif i == 0:
+                            # First section
+                            formatted_templates.append(f"[{section_type}]")
+                    formatted_templates.append(format_syllable_templates_for_prompt([template], arrow="→", line_wrap=8))
+                
+                syllable_guidance += "\n".join(formatted_templates)
+                
+                # Store info for later use in traditional sections approach
+                use_sections = True
+                
+                # Use the detected section structure for traditional approach
+                if verse_lines > 0:
+                    verse_lines = min(verse_lines, total_lines // 2)  # Ensure reasonable limits
+                else:
+                    verse_lines = total_lines // 2
+                    
+                if chorus_lines > 0:
+                    chorus_lines = min(chorus_lines, total_lines // 3)
+                else:
+                    chorus_lines = total_lines // 3
+                    
+                if bridge_lines > 0:
+                    bridge_lines = min(bridge_lines, total_lines // 6)
+                else:
+                    bridge_lines = 0
+                
+        # Fallback to traditional sections if needed
+        elif "syllables" in song_structure and song_structure["syllables"]:
+            syllable_guidance = "RHYTHM PATTERN INSTRUCTIONS:\n"
+            syllable_guidance += "Follow these syllable patterns for each section. Each line should match ONE phrase:\n\n"
+            
+            # Count sections for visualization
+            section_counts = {"verse": 0, "chorus": 0, "bridge": 0, "intro": 0, "outro": 0}
+            
+            for section in song_structure["syllables"]:
+                section_counts[section["type"]] = section_counts.get(section["type"], 0) + 1
+                
+                if "syllable_template" in section:
+                    # Process to create enhanced template
+                    section_beats_info = {
+                        "beat_times": [beat for beat in song_structure["beats"]["beat_times"] 
+                                       if section["start"] <= beat < section["end"]],
+                        "tempo": song_structure["beats"].get("tempo", 120)
+                    }
+                    
+                    if "beat_strengths" in song_structure["beats"]:
+                        section_beats_info["beat_strengths"] = [
+                            strength for i, strength in enumerate(song_structure["beats"]["beat_strengths"])
+                            if i < len(song_structure["beats"]["beat_times"]) and
+                            section["start"] <= song_structure["beats"]["beat_times"][i] < section["end"]
+                        ]
+                    
+                    # Create a phrase structure for this section
+                    section_beats_info["phrases"] = [list(range(len(section_beats_info["beat_times"])))]
+                    
+                    # Generate enhanced template with genre awareness
+                    enhanced_template = create_flexible_syllable_templates(
+                        section_beats_info,
+                        genre=genre,
+                        phrase_mode='auto' if section['type'] == 'verse' else 'default'
+                    )
+                    
+                    syllable_guidance += f"[{section['type'].capitalize()}]:\n"
+                    syllable_guidance += format_syllable_templates_for_prompt(
+                        enhanced_template,
+                        arrow="→", 
+                        line_wrap=6
+                    ) + "\n\n"
+                    templates_for_verification.append(section)
+                elif "syllable_count" in section:
+                    syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
+            
+            # Create structure visualization
+            structure_visualization += "Using traditional section-based structure:\n"
+            for section_type, count in section_counts.items():
+                if count > 0:
+                    structure_visualization += f"{section_type.capitalize()}: {count} sections\n"
+            
+            # Set traditional section counts
+            verse_lines = max(2, section_counts.get("verse", 0) * 4)
+            chorus_lines = max(2, section_counts.get("chorus", 0) * 4)
+            bridge_lines = max(0, section_counts.get("bridge", 0) * 2)
+            
+            # Use sections approach
+            use_sections = True
+    
+    # If we couldn't get specific templates, use general guidance
+    if not syllable_guidance:
+        syllable_guidance = "RHYTHM ALIGNMENT INSTRUCTIONS:\n\n"
+        syllable_guidance += "1. Align stressed syllables with strong beats (usually beats 1 and 3 in 4/4 time)\n"
+        syllable_guidance += "2. Use unstressed syllables on weak beats (usually beats 2 and 4 in 4/4 time)\n"
+        syllable_guidance += "3. Use appropriate syllable counts based on tempo:\n"
+        syllable_guidance += "   - Fast tempo (>120 BPM): 4-6 syllables per line\n"
+        syllable_guidance += "   - Medium tempo (90-120 BPM): 6-8 syllables per line\n"
+        syllable_guidance += "   - Slow tempo (<90 BPM): 8-10 syllables per line\n"
+        
+        # Create basic structure visualization
+        structure_visualization += "Using estimated structure (no detailed analysis available):\n"
+        
+        # Calculate rough section counts based on duration
+        estimated_lines = max(8, int(duration / 10))
+        structure_visualization += f"Estimated total lines: {estimated_lines}\n"
+        
+        # Set traditional section counts based on duration
+        verse_lines = estimated_lines // 2
+        chorus_lines = estimated_lines // 3
+        bridge_lines = estimated_lines // 6 if estimated_lines > 12 else 0
+        
+        # Use sections approach
+        use_sections = True
+    
+    # Add examples of syllable-beat alignment with enhanced format
+    syllable_guidance += "\nEXAMPLES OF PERFECT RHYTHM ALIGNMENT:\n"
+    syllable_guidance += "Pattern: S(0.95):1 → w(0.4):1 → m(0.7):1 → w(0.3):1\n"
+    syllable_guidance += "Lyric: 'HEAR the MU-sic PLAY'\n"
+    syllable_guidance += "        ↑     ↑    ↑    ↑\n"
+    syllable_guidance += "        S     w    m    w    <- BEAT TYPE\n\n"
+    
+    syllable_guidance += "Pattern: S(0.9):2 → w(0.3):1 → S(0.85):1 → w(0.4):2\n"
+    syllable_guidance += "Lyric: 'DANC-ing TO the RHYTHM of LOVE'\n"
+    syllable_guidance += "        ↑    ↑  ↑   ↑     ↑  ↑\n"
+    syllable_guidance += "        S    S  w   S     w  w    <- BEAT TYPE\n\n"
+    
+    syllable_guidance += "Pattern: S(0.92):1 → m(0.65):2 → S(0.88):1 → w(0.35):1\n"
+    syllable_guidance += "Lyric: 'TIME keeps FLOW-ing ON and ON'\n"
+    syllable_guidance += "        ↑     ↑    ↑   ↑  ↑   ↑\n"
+    syllable_guidance += "        S     m    m   S  w   w    <- BEAT TYPE\n\n"
+    
+    # Add genre-specific guidance based on the detected genre
+    genre_guidance = ""
+    if any(term in genre.lower() for term in ["rap", "hip-hop", "hip hop"]):
+        genre_guidance += "\nSPECIFIC GUIDANCE FOR RAP/HIP-HOP RHYTHMS:\n"
+        genre_guidance += "- Use more syllables per beat for rapid-fire sections\n"
+        genre_guidance += "- Create internal rhymes within lines, not just at line endings\n"
+        genre_guidance += "- Emphasize the first beat of each bar with strong consonants\n"
+    elif any(term in genre.lower() for term in ["electronic", "edm", "techno", "house", "dance"]):
+        genre_guidance += "\nSPECIFIC GUIDANCE FOR ELECTRONIC MUSIC RHYTHMS:\n"
+        genre_guidance += "- Use repetitive phrases that build and release tension\n"
+        genre_guidance += "- Match syllables precisely to the beat grid\n"
+        genre_guidance += "- Use short, percussive words on strong beats\n"
+    elif any(term in genre.lower() for term in ["rock", "metal", "punk", "alternative"]):
+        genre_guidance += "\nSPECIFIC GUIDANCE FOR ROCK RHYTHMS:\n"
+        genre_guidance += "- Use powerful, emotive words on downbeats\n"
+        genre_guidance += "- Create contrast between verse and chorus energy levels\n"
+        genre_guidance += "- Emphasize hooks with simple, memorable phrases\n"
+    elif any(term in genre.lower() for term in ["folk", "country", "acoustic", "ballad"]):
+        genre_guidance += "\nSPECIFIC GUIDANCE FOR FOLK/ACOUSTIC RHYTHMS:\n"
+        genre_guidance += "- Focus on storytelling with clear narrative flow\n"
+        genre_guidance += "- Use natural speech patterns that flow conversationally\n"
+        genre_guidance += "- Place important words at the start of phrases\n"
+
+    # Add genre guidance to the main guidance
+    syllable_guidance += genre_guidance
+    
+    # Store the syllable guidance for later use
+    syllable_guidance_text = syllable_guidance
+    
+    # Determine if we should use traditional sections or not based on structure
+    if song_structure and "flexible_structure" in song_structure and song_structure["flexible_structure"]:
+        # If we have more than 4 segments, it's likely not a traditional song structure
+        if "segments" in song_structure["flexible_structure"]:
+            segments = song_structure["flexible_structure"]["segments"]
+            if len(segments) > 4:
+                use_sections = False
+    
+    # Create enhanced prompt with better rhythm alignment instructions
+    if use_sections:
+        # Traditional approach with sections
+        content = f"""
+You are a talented songwriter who specializes in {genre} music.
+Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
+
+Music analysis has detected the following qualities in the music:
+- Tempo: {tempo:.1f} BPM
+- Key: {key} {mode}
+- Primary emotion: {primary_emotion}
+- Primary theme: {primary_theme}
+
+{syllable_guidance}
+
+CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
+1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
+2. Natural word stress patterns must match the beat strength (strong words on strong beats)
+3. Line breaks should occur at phrase endings for natural breathing
+4. Consonant clusters should be avoided on fast notes and strong beats
+5. Open vowels (a, e, o) work better for sustained notes and syllables
+6. Pay attention to strength values in the pattern (higher values like 0.95 need stronger emphasis)
+7. For half-syllable positions (like S1.5 or m2.5), use short, quick syllables or words with weak vowels
+
+Think step by step about how to match words to the rhythm pattern:
+1. First, identify the strong beats in each line pattern
+2. Choose words where stressed syllables naturally fall on strong beats
+3. Count syllables carefully to ensure they match the pattern precisely
+4. Test your line against the pattern by mapping each syllable
+
+IMPORTANT: Each line of lyrics must match exactly to ONE musical phrase/segment.
+
+The lyrics should:
+- Perfectly capture the essence and style of {genre} music
+- Express the {primary_emotion} emotion and {primary_theme} theme
+- Follow the structure patterns provided above
+- Be completely original
+- Match the song duration of {duration:.1f} seconds
+
+IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]" 
+where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
+even if there are no rhythm issues. Include the following in your analysis:
+1. Syllable counts for each line and how they match the rhythm pattern
+2. Where stressed syllables align with strong beats
+3. Any potential misalignments or improvements
+
+Your lyrics:
+"""
+    else:
+        # Flexible approach without traditional sections
+        content = f"""
+You are a talented songwriter who specializes in {genre} music.
+Write original lyrics that match the rhythm of a {genre} music segment that is {duration:.1f} seconds long.
+
+Music analysis has detected the following qualities:
+- Tempo: {tempo:.1f} BPM
+- Key: {key} {mode}
+- Primary emotion: {primary_emotion}
+- Primary theme: {primary_theme}
+
+{syllable_guidance}
+
+CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
+1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
+2. Natural word stress patterns must match the beat strength (strong words on strong beats)
+3. Line breaks should occur at phrase endings for natural breathing
+4. Consonant clusters should be avoided on fast notes and strong beats
+5. Open vowels (a, e, o) work better for sustained notes and syllables
+6. Pay attention to strength values in the pattern (higher values like 0.95 need stronger emphasis)
+7. For half-syllable positions (like S1.5 or m2.5), use short, quick syllables or words with weak vowels
+
+Think step by step about how to match words to the rhythm pattern:
+1. First, identify the strong beats in each line pattern
+2. Choose words where stressed syllables naturally fall on strong beats 
+3. Count syllables carefully to ensure they match the pattern precisely
+4. Test your line against the pattern by mapping each syllable
+
+CRITICAL: Each line of lyrics must match exactly to ONE musical phrase/segment.
+
+For perfect alignment examples:
+- "FEEL the RHY-thm in your SOUL" – stressed syllables on strong beats
+- "to-DAY we DANCE a-LONG" – natural speech stress matches musical stress
+- "WAIT-ing FOR the SUN to RISE" – syllable emphasis aligns with beat emphasis
+
+The lyrics should:
+- Perfectly capture the essence and style of {genre} music
+- Express the {primary_emotion} emotion and {primary_theme} theme
+- Be completely original
+- Maintain a consistent theme throughout
+- Match the audio segment duration of {duration:.1f} seconds
+
+Include any section labels like [Verse] or [Chorus] as indicated in the rhythm patterns above.
+Each line of lyrics must follow the corresponding segment's rhythm pattern EXACTLY.
+
+IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]" 
+where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
+even if there are no rhythm issues. Include the following in your analysis:
+1. Syllable counts for each line and how they match the rhythm pattern
+2. Where stressed syllables align with strong beats
+3. Any potential misalignments or improvements
+
+Your lyrics:
+"""
+
+    # Format as a chat message for the LLM
+    messages = [
+        {"role": "user", "content": content}
+    ]
+    
+    # Apply standard chat template without thinking enabled
+    text = llm_tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    
+    # Generate lyrics using the LLM
+    model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
+    
+    # Configure generation parameters based on model capability
+    generation_params = {
+        "do_sample": True,
+        "temperature": 0.6,  # Lower for more consistent rhythm alignment
+        "top_p": 0.95,
+        "top_k": 50,  # Increased from 20 for more diversity
+        "repetition_penalty": 1.2,
+        "max_new_tokens": 2048  # Doubled from 1024 for more comprehensive lyrics
+    }
+    
+    # Generate output
+    generated_ids = llm_model.generate(
+        **model_inputs,
+        **generation_params
+    )
+    
+    # Extract output tokens
+    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+    
+    # Skip the thinking process completely and just get the raw output
+    lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+    
+    # If we find <thinking> tags, extract only the content after </thinking>
+    if "<thinking>" in lyrics and "</thinking>" in lyrics:
+        lyrics = lyrics.split("</thinking>")[1].strip()
+    
+    # Remove any other thinking indicators that might be present
+    thinking_markers = ["<think>", "</think>", "[thinking]", "[/thinking]", "I'll think step by step:"]
+    for marker in thinking_markers:
+        if marker in lyrics:
+            parts = lyrics.split(marker)
+            if len(parts) > 1:
+                lyrics = parts[-1].strip()  # Take the last part after any thinking marker
+    
+    # Verify syllable counts with enhanced verification
+    if templates_for_verification:
+        verified_lyrics = verify_flexible_syllable_counts(lyrics, templates_for_verification)
+        
+        # Check if significant issues were detected
+        if "[Note: Potential rhythm mismatches" in verified_lyrics and "Detailed Alignment Analysis" in verified_lyrics:
+            # Extract the original lyrics (before the notes section)
+            original_lyrics = lyrics.split("[Note:")[0].strip()
+            
+            # Extract the analysis
+            analysis = verified_lyrics.split("[Note:")[1]
+            
+            # If we have serious alignment issues, consider a refinement step
+            if "stress misalignments" in analysis and len(templates_for_verification) > 0:
+                # Add a refinement prompt with the specific analysis
+                refinement_prompt = f"""
+You need to fix rhythm issues in these lyrics. Here's the analysis of the problems:
+
+{analysis}
+
+Revise the lyrics to perfectly match the rhythm pattern while maintaining the theme.
+Focus on fixing the stress misalignments by placing stressed syllables on STRONG beats.
+
+Original lyrics:
+{original_lyrics}
+
+Improved lyrics with fixed rhythm:
+"""
+                # Format as a chat message for refinement
+                refinement_messages = [
+                    {"role": "user", "content": refinement_prompt}
+                ]
+                
+                # Use standard template for refinement (no thinking mode needed)
+                refinement_text = llm_tokenizer.apply_chat_template(
+                    refinement_messages,
+                    tokenize=False,
+                    add_generation_prompt=True
+                )
+                
+                try:
+                    # Generate refined lyrics with more focus on rhythm alignment
+                    refinement_inputs = llm_tokenizer([refinement_text], return_tensors="pt").to(llm_model.device)
+                    
+                    # Use stricter parameters for refinement
+                    refinement_params = {
+                        "do_sample": True,
+                        "temperature": 0.4,  # Lower temperature for more precise refinement
+                        "top_p": 0.9,
+                        "repetition_penalty": 1.3,
+                        "max_new_tokens": 1024
+                    }
+                    
+                    refined_ids = llm_model.generate(
+                        **refinement_inputs,
+                        **refinement_params
+                    )
+                    
+                    # Extract refined lyrics
+                    refined_output_ids = refined_ids[0][len(refinement_inputs.input_ids[0]):].tolist()
+                    refined_lyrics = llm_tokenizer.decode(refined_output_ids, skip_special_tokens=True).strip()
+                    
+                    # Verify the refined lyrics
+                    refined_verified_lyrics = verify_flexible_syllable_counts(refined_lyrics, templates_for_verification)
+                    
+                    # Only use refined lyrics if they're better (fewer notes)
+                    if "[Note: Potential rhythm mismatches" not in refined_verified_lyrics:
+                        lyrics = refined_lyrics
+                    elif refined_verified_lyrics.count("misalignments") < verified_lyrics.count("misalignments"):
+                        lyrics = refined_verified_lyrics
+                    else:
+                        lyrics = verified_lyrics
+                except Exception as e:
+                    print(f"Error in lyrics refinement: {str(e)}")
+                    lyrics = verified_lyrics
+            else:
+                # Minor issues, just use the verification notes
+                lyrics = verified_lyrics
+        else:
+            # No significant issues detected
+            lyrics = verified_lyrics
+    
+    # Check if we have the [RHYTHM_ANALYSIS_SECTION] tag
+    if "[RHYTHM_ANALYSIS_SECTION]" in lyrics:
+        # Split at our custom marker
+        parts = lyrics.split("[RHYTHM_ANALYSIS_SECTION]")
+        clean_lyrics = parts[0].strip()
+        rhythm_analysis = parts[1].strip()
+        
+        # Add our standard marker for compatibility with existing code
+        lyrics = clean_lyrics + "\n\n[Note: Rhythm Analysis]\n" + rhythm_analysis
+    
+    # For backwards compatibility - if we have the old format, still handle it
+    elif "[Note: Potential rhythm mismatches" in lyrics:
+        # Keep it as is, the existing parsing code can handle this format
+        pass
+    else:
+        # No analysis found, add a minimal one
+        lyrics = lyrics + "\n\n[Note: Rhythm Analysis]\nNo rhythm issues detected. All syllables align well with the beat pattern."
+    
+    # Before returning, add syllable analysis and prompt template
+    if isinstance(lyrics, str):
+        # Extract clean lyrics and analysis
+        if "[Note: Rhythm Analysis]" in lyrics:
+            clean_lyrics = lyrics.split("[Note: Rhythm Analysis]")[0].strip()
+            rhythm_analysis = lyrics.split("[Note: Rhythm Analysis]")[1]
+        elif "[Note: Potential rhythm mismatches" in lyrics:
+            clean_lyrics = lyrics.split("[Note:")[0].strip()
+            rhythm_analysis = "[Note:" + lyrics.split("[Note:")[1]
+        else:
+            clean_lyrics = lyrics
+            rhythm_analysis = "No rhythm analysis available"
+        
+        # Create syllable analysis
+        syllable_analysis = "=== SYLLABLE ANALYSIS ===\n\n"
+        if templates_for_verification:
+            syllable_analysis += "Template Analysis:\n"
+            for i, template in enumerate(templates_for_verification):
+                if i < min(len(templates_for_verification), 30):  # Limit to 30 to avoid overwhelming output
+                    syllable_analysis += f"Line {i+1}:\n"
+                    if isinstance(template, dict):
+                        if "syllable_template" in template:
+                            syllable_analysis += f"  Template: {template['syllable_template']}\n"
+                        if "syllable_count" in template:
+                            syllable_analysis += f"  Expected syllables: {template['syllable_count']}\n"
+                    elif isinstance(template, str):
+                        syllable_analysis += f"  Template: {template}\n"
+                    syllable_analysis += "\n"
+            
+            if len(templates_for_verification) > 30:
+                syllable_analysis += f"... and {len(templates_for_verification) - 30} more lines\n\n"
+                
+        # Add structure visualization to syllable analysis
+        syllable_analysis += "\n" + structure_visualization
+        
+        # Create prompt template
+        prompt_template = "=== PROMPT TEMPLATE ===\n\n"
+        prompt_template += "Genre: " + genre + "\n"
+        prompt_template += f"Duration: {duration:.1f} seconds\n"
+        prompt_template += f"Tempo: {tempo:.1f} BPM\n"
+        prompt_template += f"Key: {key} {mode}\n"
+        prompt_template += f"Primary Emotion: {primary_emotion}\n"
+        prompt_template += f"Primary Theme: {primary_theme}\n\n"
+        prompt_template += "Syllable Guidance:\n" + syllable_guidance_text
+        
+        # Return all components
+        return {
+            "lyrics": clean_lyrics,
+            "rhythm_analysis": rhythm_analysis,
+            "syllable_analysis": syllable_analysis,
+            "prompt_template": prompt_template
+        }
+    
+    return lyrics
+
+def process_audio(audio_file):
+    """Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
+    if audio_file is None:
+        return "Please upload an audio file.", None, None
+    
+    try:
+        print("Step 1/5: Extracting audio features...")
+        # Extract audio features
+        audio_data = extract_audio_features(audio_file)
+        
+        print("Step 2/5: Verifying audio contains music...")
+        # First check if it's music
+        try:
+            is_music, ast_results = detect_music(audio_data)
+        except Exception as e:
+            print(f"Error in music detection: {str(e)}")
+            return f"Error in music detection: {str(e)}", None, ast_results
+            
+        if not is_music:
+            return "The uploaded audio does not appear to be music. Please upload a music file.", None, ast_results
+        
+        print("Step 3/5: Classifying music genre...")
+        # Classify genre
+        try:
+            top_genres = classify_genre(audio_data)
+            # Format genre results using utility function
+            genre_results = format_genre_results(top_genres)
+        except Exception as e:
+            print(f"Error in genre classification: {str(e)}")
+            return f"Error in genre classification: {str(e)}", None, ast_results
+        
+        print("Step 4/5: Analyzing music emotions, themes, and structure...")
+        # Analyze music emotions and themes
+        try:
+            emotion_results = music_analyzer.analyze_music(audio_file)
+        except Exception as e:
+            print(f"Error in emotion analysis: {str(e)}")
+            # Continue even if emotion analysis fails
+            emotion_results = {
+                "emotion_analysis": {"primary_emotion": "Unknown"},
+                "theme_analysis": {"primary_theme": "Unknown"},
+                "rhythm_analysis": {"tempo": 0},
+                "tonal_analysis": {"key": "Unknown", "mode": ""},
+                "summary": {"tempo": 0, "key": "Unknown", "mode": "", "primary_emotion": "Unknown", "primary_theme": "Unknown"}
+            }
+        
+        # Calculate detailed song structure for better lyrics alignment
+        try:
+            # Enhanced song structure calculation for precise lyrics matching
+            y, sr = load_audio(audio_file, SAMPLE_RATE)
+            
+            # Analyze beats and phrases for music-aligned lyrics
+            beats_info = detect_beats(y, sr)
+            sections_info = detect_sections(y, sr)
+            
+            # Create structured segments for precise line-by-line matching
+            segments = []
+            
+            # Try to break audio into meaningful segments based on sections
+            # Each segment will correspond to one line of lyrics
+            if sections_info and len(sections_info) > 1:
+                min_segment_duration = 1.5  # Minimum 1.5 seconds per segment
+                
+                for section in sections_info:
+                    section_start = section["start"]
+                    section_end = section["end"]
+                    section_duration = section["duration"]
+                    
+                    # For very short sections, add as a single segment
+                    if section_duration < min_segment_duration * 1.5:
+                        segments.append({
+                            "start": section_start,
+                            "end": section_end
+                        })
+                    else:
+                        # Calculate ideal number of segments for this section
+                        # based on its duration - aiming for 2-4 second segments
+                        ideal_segment_duration = 3.0  # Target 3 seconds per segment
+                        segment_count = max(1, int(section_duration / ideal_segment_duration))
+                        
+                        # Create evenly-spaced segments within this section
+                        segment_duration = section_duration / segment_count
+                        for i in range(segment_count):
+                            segment_start = section_start + i * segment_duration
+                            segment_end = segment_start + segment_duration
+                            segments.append({
+                                "start": segment_start,
+                                "end": segment_end
+                            })
+            # If no good sections found, create segments based on beats
+            elif beats_info and len(beats_info["beat_times"]) > 4:
+                beats = beats_info["beat_times"]
+                time_signature = beats_info.get("time_signature", 4)
+                
+                # Target one segment per musical measure (typically 4 beats)
+                measure_size = time_signature
+                for i in range(0, len(beats), measure_size):
+                    if i + 1 < len(beats):  # Need at least 2 beats for a meaningful segment
+                        measure_start = beats[i]
+                        # If we have enough beats for the full measure
+                        if i + measure_size < len(beats):
+                            measure_end = beats[i + measure_size]
+                        else:
+                            # Use available beats and extrapolate for the last measure
+                            if i > 0:
+                                beat_interval = beats[i] - beats[i-1]
+                                measure_end = beats[-1] + (beat_interval * (measure_size - (len(beats) - i)))
+                            else:
+                                measure_end = audio_data["duration"]
+                        
+                        segments.append({
+                            "start": measure_start,
+                            "end": measure_end
+                        })
+            # Last resort: simple time-based segments
+            else:
+                # Create segments of approximately 3 seconds each
+                segment_duration = 3.0
+                total_segments = max(4, int(audio_data["duration"] / segment_duration))
+                segment_duration = audio_data["duration"] / total_segments
+                
+                for i in range(total_segments):
+                    segment_start = i * segment_duration
+                    segment_end = segment_start + segment_duration
+                    segments.append({
+                        "start": segment_start,
+                        "end": segment_end
+                    })
+            
+            # Create a flexible structure with the segments
+            flexible_structure = {
+                "beats": beats_info,
+                "segments": segments
+            }
+            
+            # Add to song structure
+            song_structure = {
+                "beats": beats_info,
+                "sections": sections_info,
+                "flexible_structure": flexible_structure
+            }
+            
+            # Add syllable counts to each section
+            song_structure["syllables"] = []
+            for section in sections_info:
+                # Create syllable templates for sections
+                section_beats_info = {
+                    "beat_times": [beat for beat in beats_info["beat_times"] 
+                                  if section["start"] <= beat < section["end"]],
+                    "tempo": beats_info.get("tempo", 120)
+                }
+                if "beat_strengths" in beats_info:
+                    section_beats_info["beat_strengths"] = [
+                        strength for i, strength in enumerate(beats_info["beat_strengths"])
+                        if i < len(beats_info["beat_times"]) and
+                        section["start"] <= beats_info["beat_times"][i] < section["end"]
+                    ]
+                
+                # Get a syllable count based on section duration and tempo
+                syllable_count = int(section["duration"] * (beats_info.get("tempo", 120) / 60) * 1.5)
+                
+                section_info = {
+                    "type": section["type"],
+                    "start": section["start"],
+                    "end": section["end"],
+                    "duration": section["duration"],
+                    "syllable_count": syllable_count,
+                    "beat_count": len(section_beats_info["beat_times"])
+                }
+                
+                # Try to create a more detailed syllable template
+                if len(section_beats_info["beat_times"]) >= 2:
+                    section_info["syllable_template"] = create_flexible_syllable_templates(
+                        section_beats_info,
+                        genre=top_genres[0][0]
+                    )
+                
+                song_structure["syllables"].append(section_info)
+            
+            print(f"Successfully analyzed song structure with {len(segments)} segments")
+            
+        except Exception as e:
+            print(f"Error analyzing song structure: {str(e)}")
+            # Continue with a simpler approach if this fails
+            song_structure = None
+        
+        print("Step 5/5: Generating rhythmically aligned lyrics...")
+        # Generate lyrics based on top genre, emotion analysis, and song structure
+        try:
+            primary_genre, _ = top_genres[0]
+            lyrics_result = generate_lyrics(primary_genre, audio_data["duration"], emotion_results, song_structure)
+            
+            # Handle both old and new return formats
+            if isinstance(lyrics_result, dict):
+                lyrics = lyrics_result["lyrics"]
+                rhythm_analysis = lyrics_result["rhythm_analysis"]
+                syllable_analysis = lyrics_result["syllable_analysis"]
+                prompt_template = lyrics_result["prompt_template"]
+            else:
+                lyrics = lyrics_result
+                rhythm_analysis = "No detailed rhythm analysis available"
+                syllable_analysis = "No syllable analysis available"
+                prompt_template = "No prompt template available"
+                
+        except Exception as e:
+            print(f"Error generating lyrics: {str(e)}")
+            lyrics = f"Error generating lyrics: {str(e)}"
+            rhythm_analysis = "No rhythm analysis available"
+            syllable_analysis = "No syllable analysis available"
+            prompt_template = "No prompt template available"
+        
+        # Prepare results dictionary with additional rhythm analysis
+        results = {
+            "genre_results": genre_results,
+            "lyrics": lyrics,
+            "rhythm_analysis": rhythm_analysis,
+            "syllable_analysis": syllable_analysis,
+            "prompt_template": prompt_template,
+            "ast_results": ast_results
+        }
+        
+        return results
+    
+    except Exception as e:
+        error_msg = f"Error processing audio: {str(e)}"
+        print(error_msg)
+        return error_msg, None, []
+
+# Create enhanced Gradio interface with tabs for better organization
+with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
+    gr.Markdown("# Music Genre Classifier & Lyrics Generator")
+    gr.Markdown("Upload a music file to classify its genre, analyze its emotions, and generate perfectly aligned lyrics.")
+    
+    with gr.Row():
+        with gr.Column(scale=1):
+            audio_input = gr.Audio(label="Upload Music", type="filepath")
+            submit_btn = gr.Button("Analyze & Generate", variant="primary")
+            
+            # Add genre info box 
+            with gr.Accordion("About Music Genres", open=False):
+                gr.Markdown("""
+                The system recognizes various music genres including:
+                - Pop, Rock, Hip-Hop, R&B
+                - Electronic, Dance, Techno, House
+                - Jazz, Blues, Classical
+                - Folk, Country, Acoustic
+                - Metal, Punk, Alternative
+                - And many others!
+                
+                For best results, use high-quality audio files (MP3, WAV, FLAC) with at least 10 seconds of music.
+                """)
+        
+        with gr.Column(scale=2):
+            # Use tabs for better organization of outputs
+            with gr.Tabs():
+                with gr.TabItem("Analysis Results"):
+                    genre_output = gr.Textbox(label="Detected Genres", lines=4)
+                    
+                    # Create 2 columns for emotion and audio classification
+                    with gr.Row():
+                        with gr.Column():
+                            emotion_output = gr.Textbox(label="Emotion & Structure Analysis", lines=8)
+                        with gr.Column():
+                            ast_output = gr.Textbox(label="Audio Classification", lines=8)
+                
+                with gr.TabItem("Generated Lyrics"):
+                    lyrics_output = gr.Textbox(label="Lyrics", lines=18)
+                
+                with gr.TabItem("Rhythm Analysis"):
+                    rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
+                
+                with gr.TabItem("Syllable Analysis"):
+                    syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
+                    prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
+    
+    # Processing function with better handling of results
+    def display_results(audio_file):
+        if audio_file is None:
+            return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.", "No syllable analysis available.", "No prompt template available."
+        
+        try:
+            # Process audio and get results
+            results = process_audio(audio_file)
+            
+            # Check if we got an error message instead of results
+            if isinstance(results, str) and "Error" in results:
+                return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available"
+            elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
+                return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available"
+            
+            # For backwards compatibility, handle both dictionary and tuple returns
+            if isinstance(results, dict):
+                genre_results = results.get("genre_results", "Genre classification failed")
+                lyrics = results.get("lyrics", "Lyrics generation failed")
+                ast_results = results.get("ast_results", [])
+                
+                # Use clean lyrics if available
+                clean_lyrics = results.get("clean_lyrics", lyrics)
+                rhythm_analysis = results.get("rhythm_analysis", "No detailed rhythm analysis available")
+                
+                # Extract syllable analysis and prompt template
+                syllable_analysis = results.get("syllable_analysis", "No syllable analysis available")
+                prompt_template = results.get("prompt_template", "No prompt template available")
+            else:
+                # Handle the old tuple return format
+                genre_results, lyrics, ast_results = results
+                clean_lyrics = lyrics
+                
+                # Extract rhythm analysis if present
+                rhythm_analysis = "No detailed rhythm analysis available"
+                if isinstance(lyrics, str):
+                    # First check for new format
+                    if "[Note: Rhythm Analysis]" in lyrics:
+                        clean_lyrics = lyrics.split("[Note: Rhythm Analysis]")[0].strip()
+                        rhythm_analysis = lyrics.split("[Note: Rhythm Analysis]")[1]
+                    # Check for old format
+                    elif "[Note: Potential rhythm mismatches" in lyrics:
+                        clean_lyrics = lyrics.split("[Note:")[0].strip()
+                        rhythm_analysis = "[Note:" + lyrics.split("[Note:")[1]
+                
+                # Default values for new fields
+                syllable_analysis = "No syllable analysis available"
+                prompt_template = "No prompt template available"
+            
+            # Format emotion analysis results
+            try:
+                emotion_results = music_analyzer.analyze_music(audio_file)
+                emotion_text = f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM\n"
+                emotion_text += f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}\n"
+                emotion_text += f"Primary Emotion: {emotion_results['summary']['primary_emotion']}\n"
+                emotion_text += f"Primary Theme: {emotion_results['summary']['primary_theme']}"
+                
+                # Add detailed song structure information if available
+                try:
+                    audio_data = extract_audio_features(audio_file)
+                    song_structure = calculate_detailed_song_structure(audio_data)
+                    
+                    emotion_text += "\n\nSong Structure:\n"
+                    for section in song_structure["syllables"]:
+                        emotion_text += f"- {section['type'].capitalize()}: {section['start']:.1f}s to {section['end']:.1f}s "
+                        emotion_text += f"({section['duration']:.1f}s, {section['beat_count']} beats, "
+                        
+                        if "syllable_template" in section:
+                            emotion_text += f"template: {section['syllable_template']})\n"
+                        else:
+                            emotion_text += f"~{section['syllable_count']} syllables)\n"
+                            
+                        # Add flexible structure info if available
+                        if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
+                            flexible = song_structure["flexible_structure"]
+                            if "segments" in flexible and flexible["segments"]:
+                                emotion_text += "\nDetailed Rhythm Analysis:\n"
+                                for i, segment in enumerate(flexible["segments"][:5]):  # Show first 5 segments
+                                    emotion_text += f"- Segment {i+1}: {segment['start']:.1f}s to {segment['end']:.1f}s, "
+                                    emotion_text += f"pattern: {segment.get('syllable_template', 'N/A')}\n"
+                                
+                                if len(flexible["segments"]) > 5:
+                                    emotion_text += f"  (+ {len(flexible['segments']) - 5} more segments)\n"
+                        
+                except Exception as e:
+                    print(f"Error displaying song structure: {str(e)}")
+                    # Continue without showing structure details
+                    
+            except Exception as e:
+                print(f"Error in emotion analysis: {str(e)}")
+                emotion_text = f"Error in emotion analysis: {str(e)}"
+            
+            # Format AST classification results
+            if ast_results and isinstance(ast_results, list):
+                ast_text = "Audio Classification Results:\n"
+                for result in ast_results[:5]:  # Show top 5 results
+                    ast_text += f"{result['label']}: {result['score']*100:.2f}%\n"
+            else:
+                ast_text = "No valid audio classification results available."
+            
+            # Return all results including new fields
+            return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis, syllable_analysis, prompt_template
+            
+        except Exception as e:
+            error_msg = f"Error: {str(e)}"
+            print(error_msg)
+            return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available"
+    
+    # Connect the button to the display function with updated outputs
+    submit_btn.click(
+        fn=display_results,
+        inputs=[audio_input],
+        outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output, syllable_analysis_output, prompt_template_output]
+    )
+    
+    # Enhanced explanation of how the system works
+    with gr.Accordion("How it works", open=False):
+        gr.Markdown("""
+        ## Advanced Lyrics Generation Process
+        
+        1. **Audio Analysis**: The system analyzes your uploaded music file using multiple machine learning models.
+        
+        2. **Genre Classification**: A specialized neural network identifies the musical genre, detecting subtle patterns in the audio.
+        
+        3. **Emotional Analysis**: The system examines harmonic, rhythmic, and timbral features to determine the emotional qualities of the music.
+        
+        4. **Rhythm Mapping**: Advanced beat detection algorithms create a detailed rhythmic map of the music, identifying:
+           - Strong and weak beats
+           - Natural phrase boundaries
+           - Time signature and tempo variations
+        
+        5. **Syllable Template Creation**: For each musical phrase, the system generates precise syllable templates that reflect:
+           - Beat stress patterns (strong, medium, weak)
+           - Appropriate syllable counts based on tempo
+           - Genre-specific rhythmic qualities
+        
+        6. **Lyrics Generation**: Using the detected genre, emotion, and rhythm patterns, a large language model generates lyrics that:
+           - Match the emotional quality of the music
+           - Follow the precise syllable templates
+           - Align stressed syllables with strong beats
+           - Maintain genre-appropriate style and themes
+        
+        7. **Rhythm Verification**: The system verifies the generated lyrics, analyzing:
+           - Syllable count accuracy
+           - Stress alignment with strong beats
+           - Word stress patterns
+           
+        8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
+        
+        This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
+        """)
+
+# Launch the app
+demo.launch()
\ No newline at end of file