Spaces:

jacob-c
/

largermodel_lyrics_generation

Paused

App Files Files Community

root commited on May 23

Commit

bea11aa

1 Parent(s): a1321b3

ss

Browse files

Files changed (2) hide show

app.py +76 -26
emotionanalysis.py +35 -24

app.py CHANGED Viewed

@@ -89,7 +89,7 @@ music_analyzer = MusicAnalyzer()
 # Process uploaded audio file
 def process_audio(audio_file):
     if audio_file is None:
-        return "No audio file provided", None, None, None, None, None, None, None
     try:
         # Load and analyze audio
@@ -107,8 +107,18 @@ def process_audio(audio_file):
         # Extract key information
         tempo = music_analysis["rhythm_analysis"]["tempo"]
-        emotion = music_analysis["emotion_analysis"]["primary_emotion"]
-        theme = music_analysis["theme_analysis"]["primary_theme"]
         # Use genre classification directly instead of pipeline
         if genre_model is not None and genre_feature_extractor is not None:
@@ -159,8 +169,15 @@ def process_audio(audio_file):
 **Tempo:** {tempo:.1f} BPM
 **Time Signature:** {time_signature} (Confidence: {time_sig_result["confidence"]:.1%})
 **Key:** {music_analysis["tonal_analysis"]["key"]} {music_analysis["tonal_analysis"]["mode"]}
-**Primary Emotion:** {emotion}
-**Primary Theme:** {theme}
 **Top Genre:** {primary_genre}
 {genre_results_text}
@@ -179,7 +196,6 @@ def process_audio(audio_file):
 """
         # Check if genre is supported for lyrics generation
-        # Use the supported_genres list from BeatAnalyzer
         genre_supported = any(genre.lower() in primary_genre.lower() for genre in beat_analyzer.supported_genres)
         # Generate lyrics only for supported genres
@@ -191,12 +207,12 @@ def process_audio(audio_file):
             lyrics = f"Lyrics generation is only supported for the following genres: {supported_genres_str}.\n\nDetected genre '{primary_genre}' doesn't have strong syllable-to-beat patterns required for our lyric generation algorithm."
             beat_match_analysis = "Lyrics generation not available for this genre."
-        return analysis_summary, lyrics, tempo, time_signature, emotion, theme, primary_genre, beat_match_analysis
     except Exception as e:
         error_msg = f"Error processing audio: {str(e)}"
         print(error_msg)
-        return error_msg, None, None, None, None, None, None, None
 def generate_lyrics(music_analysis, genre, duration):
     try:
@@ -204,8 +220,17 @@ def generate_lyrics(music_analysis, genre, duration):
         tempo = music_analysis["rhythm_analysis"]["tempo"]
         key = music_analysis["tonal_analysis"]["key"]
         mode = music_analysis["tonal_analysis"]["mode"]
-        emotion = music_analysis["emotion_analysis"]["primary_emotion"]
-        theme = music_analysis["theme_analysis"]["primary_theme"]
         # Get beat analysis and templates
         lyric_templates = music_analysis.get("lyric_templates", [])
@@ -219,8 +244,16 @@ def generate_lyrics(music_analysis, genre, duration):
         # If no templates, fall back to original method
         if not lyric_templates:
-            # Simplified prompt
-            prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM. The emotion is {emotion} and theme is {theme}.
 ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
 """
@@ -236,7 +269,7 @@ ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
                 max_syllables = 7
                 avg_syllables = 4
-            # Create random examples based on the song's theme and emotion
             # to avoid the LLM copying our examples directly
             example_themes = [
                 {"emotion": "love", "fragments": ["I see your face", "across the room", "my heart beats fast", "can't look away"]},
@@ -246,8 +279,8 @@ ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
                 {"emotion": "longing", "fragments": ["miles apart now", "under same stars", "thinking of you", "across the distance"]}
             ]
-            # Select a theme that doesn't match the song's emotion to avoid copying
-            selected_themes = [t for t in example_themes if t["emotion"].lower() != emotion.lower()]
             if not selected_themes:
                 selected_themes = example_themes
@@ -274,8 +307,13 @@ ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
             # Create a more direct prompt with examples and specific syllable count guidance
             prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM.
-PRIMARY THEME: {theme}
-EMOTION: {emotion}
 I need EXACTLY {num_phrases} lines of lyrics with these STRICT requirements:
@@ -288,6 +326,8 @@ CRITICAL INSTRUCTIONS:
 6. CONCRETE IMAGERY: Use specific, tangible details rather than abstract concepts
 7. NO CLICHÉS: Avoid common phrases like "time slips away" or "memories fade"
 8. ONE THOUGHT PER LINE: Express just one simple idea in each line
 FORMAT:
 - Write exactly {num_phrases} short text lines
@@ -310,7 +350,7 @@ by the front door (3 syllables)
 where shoes pile up (3 syllables)
 since you moved in (3 syllables)
-DO NOT copy my examples. Create ENTIRELY NEW lyrics about {theme} with {emotion} feeling.
 REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most important rule!
 """
@@ -590,11 +630,11 @@ REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most imp
                     # Make theme and emotion specific placeholders to add to the list
                     theme_specific = []
-                    if theme.lower() in ["love", "relationship", "romance"]:
                         theme_specific = ["Lipstick on glass", "Text left on read", "Scent on your coat"]
-                    elif theme.lower() in ["loss", "grief", "sadness"]:
                         theme_specific = ["Chair sits empty", "Photos face down", "Clothes in closet"]
-                    elif theme.lower() in ["hope", "inspiration", "triumph"]:
                         theme_specific = ["Seeds start to grow", "Finish line waits", "New day breaks through"]
                     # Get the closest matching syllable group
@@ -928,8 +968,14 @@ def create_interface():
                     with gr.Row():
                         tempo_output = gr.Number(label="Tempo (BPM)")
                         time_sig_output = gr.Textbox(label="Time Signature")
-                        emotion_output = gr.Textbox(label="Primary Emotion")
-                        theme_output = gr.Textbox(label="Primary Theme")
                         genre_output = gr.Textbox(label="Primary Genre")
                 with gr.Tab("Generated Lyrics"):
@@ -942,8 +988,12 @@ def create_interface():
         analyze_btn.click(
             fn=process_audio,
             inputs=[audio_input],
-            outputs=[analysis_output, lyrics_output, tempo_output, time_sig_output,
-                    emotion_output, theme_output, genre_output, beat_match_output]
         )
         # Format supported genres for display
@@ -953,7 +1003,7 @@ def create_interface():
         ## How it works
         1. Upload or record a music file
         2. The system analyzes tempo, beats, time signature and other musical features
-        3. It detects emotion, theme, and music genre
         4. Using beat patterns and syllable stress analysis, it generates perfectly aligned lyrics
         5. Each line of the lyrics is matched to the beat pattern of the corresponding musical phrase

 # Process uploaded audio file
 def process_audio(audio_file):
     if audio_file is None:
+        return "No audio file provided", None, None, None, None, None, None, None, None, None
     try:
         # Load and analyze audio
         # Extract key information
         tempo = music_analysis["rhythm_analysis"]["tempo"]
+        # Get top two emotions
+        emotion_scores = music_analysis["emotion_analysis"]["emotion_scores"]
+        sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True)
+        primary_emotion = sorted_emotions[0][0]
+        secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None
+        # Get top two themes
+        theme_scores = music_analysis["theme_analysis"]["theme_scores"]
+        sorted_themes = sorted(theme_scores.items(), key=lambda x: x[1], reverse=True)
+        primary_theme = sorted_themes[0][0]
+        secondary_theme = sorted_themes[1][0] if len(sorted_themes) > 1 else None
         # Use genre classification directly instead of pipeline
         if genre_model is not None and genre_feature_extractor is not None:
 **Tempo:** {tempo:.1f} BPM
 **Time Signature:** {time_signature} (Confidence: {time_sig_result["confidence"]:.1%})
 **Key:** {music_analysis["tonal_analysis"]["key"]} {music_analysis["tonal_analysis"]["mode"]}
+**Emotions:**
+- Primary: {primary_emotion} (Confidence: {emotion_scores[primary_emotion]:.1%})
+- Secondary: {secondary_emotion} (Confidence: {emotion_scores[secondary_emotion]:.1%})
+**Themes:**
+- Primary: {primary_theme} (Confidence: {theme_scores[primary_theme]:.1%})
+- Secondary: {secondary_theme} (Confidence: {theme_scores[secondary_theme]:.1%})
 **Top Genre:** {primary_genre}
 {genre_results_text}
 """
         # Check if genre is supported for lyrics generation
         genre_supported = any(genre.lower() in primary_genre.lower() for genre in beat_analyzer.supported_genres)
         # Generate lyrics only for supported genres
             lyrics = f"Lyrics generation is only supported for the following genres: {supported_genres_str}.\n\nDetected genre '{primary_genre}' doesn't have strong syllable-to-beat patterns required for our lyric generation algorithm."
             beat_match_analysis = "Lyrics generation not available for this genre."
+        return analysis_summary, lyrics, tempo, time_signature, primary_emotion, secondary_emotion, primary_theme, secondary_theme, primary_genre, beat_match_analysis
     except Exception as e:
         error_msg = f"Error processing audio: {str(e)}"
         print(error_msg)
+        return error_msg, None, None, None, None, None, None, None, None, None
 def generate_lyrics(music_analysis, genre, duration):
     try:
         tempo = music_analysis["rhythm_analysis"]["tempo"]
         key = music_analysis["tonal_analysis"]["key"]
         mode = music_analysis["tonal_analysis"]["mode"]
+        # Get both primary and secondary emotions and themes
+        emotion_scores = music_analysis["emotion_analysis"]["emotion_scores"]
+        sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True)
+        primary_emotion = sorted_emotions[0][0]
+        secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None
+        theme_scores = music_analysis["theme_analysis"]["theme_scores"]
+        sorted_themes = sorted(theme_scores.items(), key=lambda x: x[1], reverse=True)
+        primary_theme = sorted_themes[0][0]
+        secondary_theme = sorted_themes[1][0] if len(sorted_themes) > 1 else None
         # Get beat analysis and templates
         lyric_templates = music_analysis.get("lyric_templates", [])
         # If no templates, fall back to original method
         if not lyric_templates:
+            # Enhanced prompt with both emotions and themes
+            prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM.
+EMOTIONS:
+- Primary: {primary_emotion}
+- Secondary: {secondary_emotion}
+THEMES:
+- Primary: {primary_theme}
+- Secondary: {secondary_theme}
 ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
 """
                 max_syllables = 7
                 avg_syllables = 4
+            # Create random examples based on the song's themes and emotions
             # to avoid the LLM copying our examples directly
             example_themes = [
                 {"emotion": "love", "fragments": ["I see your face", "across the room", "my heart beats fast", "can't look away"]},
                 {"emotion": "longing", "fragments": ["miles apart now", "under same stars", "thinking of you", "across the distance"]}
             ]
+            # Select a theme that doesn't match the song's emotions to avoid copying
+            selected_themes = [t for t in example_themes if t["emotion"].lower() not in [primary_emotion.lower(), secondary_emotion.lower()]]
             if not selected_themes:
                 selected_themes = example_themes
             # Create a more direct prompt with examples and specific syllable count guidance
             prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM.
+EMOTIONS:
+- Primary: {primary_emotion}
+- Secondary: {secondary_emotion}
+THEMES:
+- Primary: {primary_theme}
+- Secondary: {secondary_theme}
 I need EXACTLY {num_phrases} lines of lyrics with these STRICT requirements:
 6. CONCRETE IMAGERY: Use specific, tangible details rather than abstract concepts
 7. NO CLICHÉS: Avoid common phrases like "time slips away" or "memories fade"
 8. ONE THOUGHT PER LINE: Express just one simple idea in each line
+9. EMOTION BLEND: Blend both {primary_emotion} and {secondary_emotion} emotions naturally
+10. THEME WEAVING: Weave both {primary_theme} and {secondary_theme} themes together
 FORMAT:
 - Write exactly {num_phrases} short text lines
 where shoes pile up (3 syllables)
 since you moved in (3 syllables)
+DO NOT copy my examples. Create ENTIRELY NEW lyrics that blend {primary_emotion} and {secondary_emotion} emotions while exploring {primary_theme} and {secondary_theme} themes.
 REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most important rule!
 """
                     # Make theme and emotion specific placeholders to add to the list
                     theme_specific = []
+                    if primary_theme.lower() in ["love", "relationship", "romance"]:
                         theme_specific = ["Lipstick on glass", "Text left on read", "Scent on your coat"]
+                    elif primary_theme.lower() in ["loss", "grief", "sadness"]:
                         theme_specific = ["Chair sits empty", "Photos face down", "Clothes in closet"]
+                    elif primary_theme.lower() in ["hope", "inspiration", "triumph"]:
                         theme_specific = ["Seeds start to grow", "Finish line waits", "New day breaks through"]
                     # Get the closest matching syllable group
                     with gr.Row():
                         tempo_output = gr.Number(label="Tempo (BPM)")
                         time_sig_output = gr.Textbox(label="Time Signature")
+                    with gr.Row():
+                        primary_emotion_output = gr.Textbox(label="Primary Emotion")
+                        secondary_emotion_output = gr.Textbox(label="Secondary Emotion")
+                    with gr.Row():
+                        primary_theme_output = gr.Textbox(label="Primary Theme")
+                        secondary_theme_output = gr.Textbox(label="Secondary Theme")
                         genre_output = gr.Textbox(label="Primary Genre")
                 with gr.Tab("Generated Lyrics"):
         analyze_btn.click(
             fn=process_audio,
             inputs=[audio_input],
+            outputs=[
+                analysis_output, lyrics_output, tempo_output, time_sig_output,
+                primary_emotion_output, secondary_emotion_output,
+                primary_theme_output, secondary_theme_output,
+                genre_output, beat_match_output
+            ]
         )
         # Format supported genres for display
         ## How it works
         1. Upload or record a music file
         2. The system analyzes tempo, beats, time signature and other musical features
+        3. It detects emotions, themes, and music genre
         4. Using beat patterns and syllable stress analysis, it generates perfectly aligned lyrics
         5. Each line of the lyrics is matched to the beat pattern of the corresponding musical phrase

emotionanalysis.py CHANGED Viewed

@@ -11,33 +11,33 @@ except ImportError:
 class MusicAnalyzer:
     def __init__(self):
-        # Scientifically grounded emotion classes (valence, arousal space)
-        # See: Eerola & Vuoskoski, 2011; Russell, 1980
         self.emotion_classes = {
-            'happy':      {'valence': 0.9,  'arousal': 0.7},
-            'excited':    {'valence': 0.8,  'arousal': 0.95},
-            'tender':     {'valence': 0.7,  'arousal': 0.3},
-            'calm':       {'valence': 0.65, 'arousal': 0.15},
-            'sad':        {'valence': 0.2,  'arousal': 0.25},
-            'depressed':  {'valence': 0.05, 'arousal': 0.05},
-            'angry':      {'valence': 0.1,  'arousal': 0.8},
-            'fearful':    {'valence': 0.05, 'arousal': 0.95}
         }
-        # Theme classes based on emotion clusters (from Allan, 2014, with mapping)
         self.theme_classes = {
-            'love':        ['tender', 'calm', 'happy'],
             'triumph':     ['excited', 'happy', 'angry'],
             'loss':        ['sad', 'depressed'],
             'adventure':   ['excited', 'fearful'],
-            'reflection':  ['calm', 'sad'],
             'conflict':    ['angry', 'fearful']
         }
         self.feature_weights = {
-            'mode': 0.25,
-            'tempo': 0.2,
-            'energy': 0.2,
-            'brightness': 0.2,
-            'rhythm_complexity': 0.15
         }
         self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
@@ -133,11 +133,13 @@ class MusicAnalyzer:
         }
     def feature_to_valence_arousal(self, features):
-        # Normalize features to [0, 1]
-        tempo_norm = np.clip((features['tempo'] - 40) / (200 - 40), 0, 1)
-        energy_norm = np.clip(features['energy'] / 1.0, 0, 1)
-        brightness_norm = np.clip(features['brightness'] / 1.0, 0, 1)
-        rhythm_complexity_norm = np.clip(features['rhythm_complexity'] / 2.0, 0, 1)
         valence = (
             self.feature_weights['mode'] * (1.0 if features['is_major'] else 0.0) +
             self.feature_weights['tempo'] * tempo_norm +
@@ -150,6 +152,12 @@ class MusicAnalyzer:
             self.feature_weights['brightness'] * brightness_norm +
             self.feature_weights['rhythm_complexity'] * rhythm_complexity_norm
         )
         return float(np.clip(valence, 0, 1)), float(np.clip(arousal, 0, 1))
     def analyze_emotion(self, rhythm_data, tonal_data, energy_data):
@@ -263,4 +271,7 @@ if __name__ == "__main__":
     # Show detailed results (optional)
     import json
     print("\n=== DETAILED ANALYSIS ===")
-    print(json.dumps(results, indent=2))

 class MusicAnalyzer:
     def __init__(self):
+        # Emotion coordinates (pop-optimized, more separation)
         self.emotion_classes = {
+            'happy':      {'valence': 0.96, 'arousal': 0.72},
+            'excited':    {'valence': 0.88, 'arousal': 0.96},
+            'tender':     {'valence': 0.70, 'arousal': 0.39},
+            'calm':       {'valence': 0.58, 'arousal': 0.18},
+            'sad':        {'valence': 0.18, 'arousal': 0.19},
+            'depressed':  {'valence': 0.09, 'arousal': 0.06},
+            'angry':      {'valence': 0.11, 'arousal': 0.80},
+            'fearful':    {'valence': 0.13, 'arousal': 0.99}
         }
+        # More realistic pop theme mapping
         self.theme_classes = {
+            'love':        ['happy', 'excited', 'tender'],
             'triumph':     ['excited', 'happy', 'angry'],
             'loss':        ['sad', 'depressed'],
             'adventure':   ['excited', 'fearful'],
+            'reflection':  ['calm', 'tender', 'sad'],
             'conflict':    ['angry', 'fearful']
         }
+        # Pop-tuned feature weights
         self.feature_weights = {
+            'mode': 0.34,
+            'tempo': 0.32,
+            'energy': 0.16,
+            'brightness': 0.14,
+            'rhythm_complexity': 0.04
         }
         self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
         }
     def feature_to_valence_arousal(self, features):
+        # Normalization for typical pop values
+        # tempo: 40-180 BPM, energy: 0.08-0.5 (librosa RMS), brightness: 0.25-0.7
+        tempo_norm = np.clip((features['tempo'] - 70) / (170 - 70), 0, 1)
+        energy_norm = np.clip((features['energy'] - 0.08) / (0.5 - 0.08), 0, 1)
+        brightness_norm = np.clip((features['brightness'] - 0.25) / (0.7 - 0.25), 0, 1)
+        rhythm_complexity_norm = np.clip((features['rhythm_complexity'] - 0.1) / (0.8 - 0.1), 0, 1)
         valence = (
             self.feature_weights['mode'] * (1.0 if features['is_major'] else 0.0) +
             self.feature_weights['tempo'] * tempo_norm +
             self.feature_weights['brightness'] * brightness_norm +
             self.feature_weights['rhythm_complexity'] * rhythm_complexity_norm
         )
+        # Explicit bias: if major mode + tempo > 100 + brightness > 0.5, boost valence/arousal toward happy/excited
+        if features['is_major'] and features['tempo'] > 100 and features['brightness'] > 0.5:
+            valence = max(valence, 0.85)
+            arousal = max(arousal, 0.7)
         return float(np.clip(valence, 0, 1)), float(np.clip(arousal, 0, 1))
     def analyze_emotion(self, rhythm_data, tonal_data, energy_data):
     # Show detailed results (optional)
     import json
     print("\n=== DETAILED ANALYSIS ===")
+    print(json.dumps(results, indent=2))
+    # Visualize the analysis
+    # analyzer.visualize_analysis(demo_file)