root commited on
Commit
547c4d0
·
1 Parent(s): 93aef48

adding emotion analysis

Browse files
Files changed (3) hide show
  1. app.py +51 -8
  2. emotionanalysis.py +457 -0
  3. example.py +12 -1
app.py CHANGED
@@ -21,6 +21,7 @@ from utils import (
21
  ensure_cuda_availability,
22
  preprocess_audio_for_model
23
  )
 
24
 
25
  # Login to Hugging Face Hub if token is provided
26
  if "HF_TOKEN" in os.environ:
@@ -98,6 +99,9 @@ llm_pipeline = pipeline(
98
  max_new_tokens=512,
99
  )
100
 
 
 
 
101
  def extract_audio_features(audio_file):
102
  """Extract audio features from an audio file."""
103
  # Load the audio file using utility function
@@ -162,7 +166,7 @@ def classify_genre(audio_data):
162
  # Fallback: return a default genre if everything fails
163
  return [("rock", 1.0)]
164
 
165
- def generate_lyrics(genre, duration):
166
  """Generate lyrics based on the genre and with appropriate length."""
167
  # Calculate appropriate lyrics length based on audio duration
168
  lines_count = calculate_lyrics_length(duration)
@@ -181,12 +185,27 @@ def generate_lyrics(genre, duration):
181
  verse_lines = 3
182
  chorus_lines = 2
183
 
 
 
 
 
 
 
 
184
  # Create prompt for the LLM
185
  prompt = f"""
186
  You are a talented songwriter who specializes in {genre} music.
187
  Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
 
 
 
 
 
 
 
188
  The lyrics should:
189
  - Perfectly capture the essence and style of {genre} music
 
190
  - Be approximately {lines_count} lines long
191
  - Have a coherent theme and flow
192
  - Follow this structure:
@@ -299,9 +318,12 @@ def process_audio(audio_file):
299
  # Format genre results using utility function
300
  genre_results = format_genre_results(top_genres)
301
 
302
- # Generate lyrics based on top genre
 
 
 
303
  primary_genre, _ = top_genres[0]
304
- lyrics = generate_lyrics(primary_genre, audio_data["duration"])
305
 
306
  return genre_results, lyrics
307
 
@@ -311,7 +333,7 @@ def process_audio(audio_file):
311
  # Create Gradio interface
312
  with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
313
  gr.Markdown("# Music Genre Classifier & Lyrics Generator")
314
- gr.Markdown("Upload a music file to classify its genre and generate matching lyrics.")
315
 
316
  with gr.Row():
317
  with gr.Column():
@@ -320,20 +342,41 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
320
 
321
  with gr.Column():
322
  genre_output = gr.Textbox(label="Detected Genres", lines=5)
 
323
  lyrics_output = gr.Textbox(label="Generated Lyrics", lines=15)
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  submit_btn.click(
326
- fn=process_audio,
327
  inputs=[audio_input],
328
- outputs=[genre_output, lyrics_output]
329
  )
330
 
331
  gr.Markdown("### How it works")
332
  gr.Markdown("""
333
  1. Upload an audio file of your choice
334
  2. The system will classify the genre using the dima806/music_genres_classification model
335
- 3. Based on the detected genre, it will generate appropriate lyrics using Llama-3.1-8B-Instruct
336
- 4. The lyrics length is automatically adjusted based on your audio duration
 
337
  """)
338
 
339
  # Launch the app
 
21
  ensure_cuda_availability,
22
  preprocess_audio_for_model
23
  )
24
+ from emotionanalysis import MusicAnalyzer
25
 
26
  # Login to Hugging Face Hub if token is provided
27
  if "HF_TOKEN" in os.environ:
 
99
  max_new_tokens=512,
100
  )
101
 
102
+ # Initialize music emotion analyzer
103
+ music_analyzer = MusicAnalyzer()
104
+
105
  def extract_audio_features(audio_file):
106
  """Extract audio features from an audio file."""
107
  # Load the audio file using utility function
 
166
  # Fallback: return a default genre if everything fails
167
  return [("rock", 1.0)]
168
 
169
+ def generate_lyrics(genre, duration, emotion_results):
170
  """Generate lyrics based on the genre and with appropriate length."""
171
  # Calculate appropriate lyrics length based on audio duration
172
  lines_count = calculate_lyrics_length(duration)
 
185
  verse_lines = 3
186
  chorus_lines = 2
187
 
188
+ # Extract emotion and theme data from analysis results
189
+ primary_emotion = emotion_results["emotion_analysis"]["primary_emotion"]
190
+ primary_theme = emotion_results["theme_analysis"]["primary_theme"]
191
+ tempo = emotion_results["rhythm_analysis"]["tempo"]
192
+ key = emotion_results["tonal_analysis"]["key"]
193
+ mode = emotion_results["tonal_analysis"]["mode"]
194
+
195
  # Create prompt for the LLM
196
  prompt = f"""
197
  You are a talented songwriter who specializes in {genre} music.
198
  Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
199
+
200
+ Music analysis has detected the following qualities in the music:
201
+ - Tempo: {tempo:.1f} BPM
202
+ - Key: {key} {mode}
203
+ - Primary emotion: {primary_emotion}
204
+ - Primary theme: {primary_theme}
205
+
206
  The lyrics should:
207
  - Perfectly capture the essence and style of {genre} music
208
+ - Express the {primary_emotion} emotion and {primary_theme} theme
209
  - Be approximately {lines_count} lines long
210
  - Have a coherent theme and flow
211
  - Follow this structure:
 
318
  # Format genre results using utility function
319
  genre_results = format_genre_results(top_genres)
320
 
321
+ # Analyze music emotions and themes
322
+ emotion_results = music_analyzer.analyze_music(audio_file)
323
+
324
+ # Generate lyrics based on top genre and emotion analysis
325
  primary_genre, _ = top_genres[0]
326
+ lyrics = generate_lyrics(primary_genre, audio_data["duration"], emotion_results)
327
 
328
  return genre_results, lyrics
329
 
 
333
  # Create Gradio interface
334
  with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
335
  gr.Markdown("# Music Genre Classifier & Lyrics Generator")
336
+ gr.Markdown("Upload a music file to classify its genre, analyze its emotions, and generate matching lyrics.")
337
 
338
  with gr.Row():
339
  with gr.Column():
 
342
 
343
  with gr.Column():
344
  genre_output = gr.Textbox(label="Detected Genres", lines=5)
345
+ emotion_output = gr.Textbox(label="Emotion Analysis", lines=5)
346
  lyrics_output = gr.Textbox(label="Generated Lyrics", lines=15)
347
 
348
+ def display_results(audio_file):
349
+ if audio_file is None:
350
+ return "Please upload an audio file.", "No emotion analysis available.", None
351
+
352
+ try:
353
+ # Process audio and get genre and lyrics
354
+ genre_results, lyrics = process_audio(audio_file)
355
+
356
+ # Format emotion analysis results
357
+ emotion_results = music_analyzer.analyze_music(audio_file)
358
+ emotion_text = f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM\n"
359
+ emotion_text += f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}\n"
360
+ emotion_text += f"Primary Emotion: {emotion_results['summary']['primary_emotion']}\n"
361
+ emotion_text += f"Primary Theme: {emotion_results['summary']['primary_theme']}"
362
+
363
+ return genre_results, emotion_text, lyrics
364
+ except Exception as e:
365
+ return f"Error: {str(e)}", "Error in emotion analysis", None
366
+
367
  submit_btn.click(
368
+ fn=display_results,
369
  inputs=[audio_input],
370
+ outputs=[genre_output, emotion_output, lyrics_output]
371
  )
372
 
373
  gr.Markdown("### How it works")
374
  gr.Markdown("""
375
  1. Upload an audio file of your choice
376
  2. The system will classify the genre using the dima806/music_genres_classification model
377
+ 3. The system will analyze the musical emotion and theme using advanced audio processing
378
+ 4. Based on the detected genre and emotion, it will generate appropriate lyrics using Llama-3.1-8B-Instruct
379
+ 5. The lyrics length is automatically adjusted based on your audio duration
380
  """)
381
 
382
  # Launch the app
emotionanalysis.py ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from scipy.stats import mode
5
+ import warnings
6
+ warnings.filterwarnings('ignore') # Suppress librosa warnings
7
+ class MusicAnalyzer:
8
+ def __init__(self):
9
+ # Emotion feature mappings - these define characteristics of different emotions
10
+ self.emotion_profiles = {
11
+ 'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)},
12
+ 'sad': {'tempo': (40, 90), 'energy': (0, 0.5), 'major_mode': False, 'brightness': (0, 0.5)},
13
+ 'calm': {'tempo': (50, 90), 'energy': (0, 0.4), 'major_mode': True, 'brightness': (0.3, 0.6)},
14
+ 'energetic': {'tempo': (110, 200), 'energy': (0.7, 1.0), 'major_mode': True, 'brightness': (0.5, 0.9)},
15
+ 'tense': {'tempo': (70, 140), 'energy': (0.5, 0.9), 'major_mode': False, 'brightness': (0.3, 0.7)},
16
+ 'nostalgic': {'tempo': (60, 100), 'energy': (0.3, 0.7), 'major_mode': None, 'brightness': (0.4, 0.7)}
17
+ }
18
+
19
+ # Theme mappings based on musical features
20
+ self.theme_profiles = {
21
+ 'love': {'emotion': ['happy', 'nostalgic', 'sad'], 'harmony_complexity': (0.3, 0.7)},
22
+ 'triumph': {'emotion': ['energetic', 'happy'], 'harmony_complexity': (0.4, 0.8)},
23
+ 'loss': {'emotion': ['sad', 'nostalgic'], 'harmony_complexity': (0.3, 0.7)},
24
+ 'adventure': {'emotion': ['energetic', 'tense'], 'harmony_complexity': (0.5, 0.9)},
25
+ 'reflection': {'emotion': ['calm', 'nostalgic'], 'harmony_complexity': (0.4, 0.8)},
26
+ 'conflict': {'emotion': ['tense', 'energetic'], 'harmony_complexity': (0.6, 1.0)}
27
+ }
28
+
29
+ # Musical key mapping
30
+ self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
31
+
32
+ def load_audio(self, file_path, sr=22050, duration=None):
33
+ """Load audio file and return time series and sample rate"""
34
+ try:
35
+ y, sr = librosa.load(file_path, sr=sr, duration=duration)
36
+ return y, sr
37
+ except Exception as e:
38
+ print(f"Error loading audio file: {e}")
39
+ return None, None
40
+
41
+ def analyze_rhythm(self, y, sr):
42
+ """Analyze rhythm-related features: tempo, beats, time signature"""
43
+ # Tempo and beat detection
44
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr)
45
+ tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
46
+ beat_times = librosa.frames_to_time(beat_frames, sr=sr)
47
+
48
+ # Beat intervals and regularity
49
+ beat_intervals = np.diff(beat_times) if len(beat_times) > 1 else np.array([0])
50
+ beat_regularity = 1.0 / np.std(beat_intervals) if len(beat_intervals) > 0 and np.std(beat_intervals) > 0 else 0
51
+
52
+ # Rhythm pattern analysis through autocorrelation
53
+ ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
54
+ ac = librosa.util.normalize(ac, norm=np.inf)
55
+
56
+ # Time signature estimation - a challenging task with many limitations
57
+ estimated_signature = self._estimate_time_signature(y, sr, beat_times, onset_env)
58
+
59
+ # Compute onset strength to get a measure of rhythm intensity
60
+ rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0
61
+
62
+ # Rhythm complexity based on variation in onset strength
63
+ rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0
64
+
65
+ return {
66
+ "tempo": float(tempo),
67
+ "beat_times": beat_times.tolist(),
68
+ "beat_intervals": beat_intervals.tolist(),
69
+ "beat_regularity": float(beat_regularity),
70
+ "rhythm_intensity": float(rhythm_intensity),
71
+ "rhythm_complexity": float(rhythm_complexity),
72
+ "estimated_time_signature": estimated_signature
73
+ }
74
+
75
+ def _estimate_time_signature(self, y, sr, beat_times, onset_env):
76
+ """Estimate the time signature based on beat patterns"""
77
+ # This is a simplified approach - accurate time signature detection is complex
78
+ if len(beat_times) < 4:
79
+ return "Unknown"
80
+
81
+ # Analyze beat emphasis patterns to detect meter
82
+ beat_intervals = np.diff(beat_times)
83
+
84
+ # Look for periodicity in the onset envelope
85
+ ac = librosa.autocorrelate(onset_env, max_size=sr)
86
+
87
+ # Find peaks in autocorrelation after the first one (which is at lag 0)
88
+ peaks = librosa.util.peak_pick(ac, pre_max=20, post_max=20, pre_avg=20, post_avg=20, delta=0.1, wait=1)
89
+ peaks = peaks[peaks > 0] # Remove the first peak which is at lag 0
90
+
91
+ if len(peaks) == 0:
92
+ return "4/4" # Default to most common
93
+
94
+ # Convert first significant peak to beats
95
+ first_peak_time = peaks[0] / sr
96
+ beats_per_bar = round(first_peak_time / np.median(beat_intervals))
97
+
98
+ # Map to common time signatures
99
+ if beats_per_bar == 4 or beats_per_bar == 8:
100
+ return "4/4"
101
+ elif beats_per_bar == 3 or beats_per_bar == 6:
102
+ return "3/4"
103
+ elif beats_per_bar == 2:
104
+ return "2/4"
105
+ else:
106
+ return f"{beats_per_bar}/4" # Default assumption
107
+
108
+ def analyze_tonality(self, y, sr):
109
+ """Analyze tonal features: key, mode, harmonic features"""
110
+ # Compute chromagram
111
+ chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
112
+
113
+ # Krumhansl-Schmuckler key-finding algorithm (simplified)
114
+ # Major and minor profiles from music theory research
115
+ major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
116
+ minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
117
+
118
+ # Calculate the correlation of the chroma with each key profile
119
+ chroma_avg = np.mean(chroma, axis=1)
120
+ major_corr = np.zeros(12)
121
+ minor_corr = np.zeros(12)
122
+
123
+ for i in range(12):
124
+ major_corr[i] = np.corrcoef(np.roll(chroma_avg, i), major_profile)[0, 1]
125
+ minor_corr[i] = np.corrcoef(np.roll(chroma_avg, i), minor_profile)[0, 1]
126
+
127
+ # Find the key with the highest correlation
128
+ max_major_idx = np.argmax(major_corr)
129
+ max_minor_idx = np.argmax(minor_corr)
130
+
131
+ # Determine if the piece is in a major or minor key
132
+ if major_corr[max_major_idx] > minor_corr[max_minor_idx]:
133
+ mode = "major"
134
+ key = self.key_names[max_major_idx]
135
+ else:
136
+ mode = "minor"
137
+ key = self.key_names[max_minor_idx]
138
+
139
+ # Calculate harmony complexity (variability in harmonic content)
140
+ harmony_complexity = np.std(chroma) / np.mean(chroma) if np.mean(chroma) > 0 else 0
141
+
142
+ # Calculate tonal stability (consistency of tonal center)
143
+ tonal_stability = 1.0 / (np.std(chroma_avg) + 0.001) # Add small value to avoid division by zero
144
+
145
+ # Calculate spectral brightness (center of mass of the spectrum)
146
+ spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
147
+ brightness = np.mean(spectral_centroid) / (sr/2) # Normalize by Nyquist frequency
148
+
149
+ # Calculate dissonance using spectral contrast
150
+ spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
151
+ dissonance = np.mean(spectral_contrast[0]) # Higher values may indicate more dissonance
152
+
153
+ return {
154
+ "key": key,
155
+ "mode": mode,
156
+ "is_major": mode == "major",
157
+ "harmony_complexity": float(harmony_complexity),
158
+ "tonal_stability": float(tonal_stability),
159
+ "brightness": float(brightness),
160
+ "dissonance": float(dissonance)
161
+ }
162
+
163
+ def analyze_energy(self, y, sr):
164
+ """Analyze energy characteristics of the audio"""
165
+ # RMS Energy (overall loudness)
166
+ rms = librosa.feature.rms(y=y)[0]
167
+
168
+ # Energy metrics
169
+ mean_energy = np.mean(rms)
170
+ energy_std = np.std(rms)
171
+ energy_dynamic_range = np.max(rms) - np.min(rms) if len(rms) > 0 else 0
172
+
173
+ # Energy distribution across frequency ranges
174
+ spec = np.abs(librosa.stft(y))
175
+
176
+ # Divide the spectrum into low, mid, and high ranges
177
+ freq_bins = spec.shape[0]
178
+ low_freq_energy = np.mean(spec[:int(freq_bins*0.2), :])
179
+ mid_freq_energy = np.mean(spec[int(freq_bins*0.2):int(freq_bins*0.8), :])
180
+ high_freq_energy = np.mean(spec[int(freq_bins*0.8):, :])
181
+
182
+ # Normalize to create a distribution
183
+ total_energy = low_freq_energy + mid_freq_energy + high_freq_energy
184
+ if total_energy > 0:
185
+ low_freq_ratio = low_freq_energy / total_energy
186
+ mid_freq_ratio = mid_freq_energy / total_energy
187
+ high_freq_ratio = high_freq_energy / total_energy
188
+ else:
189
+ low_freq_ratio = mid_freq_ratio = high_freq_ratio = 1/3
190
+
191
+ return {
192
+ "mean_energy": float(mean_energy),
193
+ "energy_std": float(energy_std),
194
+ "energy_dynamic_range": float(energy_dynamic_range),
195
+ "frequency_distribution": {
196
+ "low_freq": float(low_freq_ratio),
197
+ "mid_freq": float(mid_freq_ratio),
198
+ "high_freq": float(high_freq_ratio)
199
+ }
200
+ }
201
+
202
+ def analyze_emotion(self, rhythm_data, tonal_data, energy_data):
203
+ """Classify the emotion based on musical features"""
204
+ # Extract key features for emotion detection
205
+ tempo = rhythm_data["tempo"]
206
+ is_major = tonal_data["is_major"]
207
+ energy = energy_data["mean_energy"]
208
+ brightness = tonal_data["brightness"]
209
+
210
+ # Calculate scores for each emotion
211
+ emotion_scores = {}
212
+ for emotion, profile in self.emotion_profiles.items():
213
+ score = 0.0
214
+
215
+ # Tempo contribution (0-1 score)
216
+ tempo_range = profile["tempo"]
217
+ if tempo_range[0] <= tempo <= tempo_range[1]:
218
+ score += 1.0
219
+ else:
220
+ # Partial score based on distance
221
+ distance = min(abs(tempo - tempo_range[0]), abs(tempo - tempo_range[1]))
222
+ max_distance = 40 # Maximum distance to consider
223
+ score += max(0, 1 - (distance / max_distance))
224
+
225
+ # Energy contribution (0-1 score)
226
+ energy_range = profile["energy"]
227
+ if energy_range[0] <= energy <= energy_range[1]:
228
+ score += 1.0
229
+ else:
230
+ # Partial score based on distance
231
+ distance = min(abs(energy - energy_range[0]), abs(energy - energy_range[1]))
232
+ max_distance = 0.5 # Maximum distance to consider
233
+ score += max(0, 1 - (distance / max_distance))
234
+
235
+ # Mode contribution (0-1 score)
236
+ if profile["major_mode"] is not None: # Some emotions don't have strong mode preference
237
+ score += 1.0 if profile["major_mode"] == is_major else 0.0
238
+ else:
239
+ score += 0.5 # Neutral contribution
240
+
241
+ # Brightness contribution (0-1 score)
242
+ brightness_range = profile["brightness"]
243
+ if brightness_range[0] <= brightness <= brightness_range[1]:
244
+ score += 1.0
245
+ else:
246
+ # Partial score based on distance
247
+ distance = min(abs(brightness - brightness_range[0]), abs(brightness - brightness_range[1]))
248
+ max_distance = 0.5 # Maximum distance to consider
249
+ score += max(0, 1 - (distance / max_distance))
250
+
251
+ # Normalize score (0-1 range)
252
+ emotion_scores[emotion] = score / 4.0
253
+
254
+ # Find primary emotion
255
+ primary_emotion = max(emotion_scores.items(), key=lambda x: x[1])
256
+
257
+ # Calculate valence and arousal (dimensional emotion model)
258
+ # Mapping different emotions to valence-arousal space
259
+ valence_map = {
260
+ 'happy': 0.8, 'sad': 0.2, 'calm': 0.6,
261
+ 'energetic': 0.7, 'tense': 0.3, 'nostalgic': 0.5
262
+ }
263
+
264
+ arousal_map = {
265
+ 'happy': 0.7, 'sad': 0.3, 'calm': 0.2,
266
+ 'energetic': 0.9, 'tense': 0.8, 'nostalgic': 0.4
267
+ }
268
+
269
+ # Calculate weighted valence and arousal
270
+ total_weight = sum(emotion_scores.values())
271
+ if total_weight > 0:
272
+ valence = sum(score * valence_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
273
+ arousal = sum(score * arousal_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
274
+ else:
275
+ valence = 0.5
276
+ arousal = 0.5
277
+
278
+ return {
279
+ "primary_emotion": primary_emotion[0],
280
+ "confidence": primary_emotion[1],
281
+ "emotion_scores": emotion_scores,
282
+ "valence": float(valence), # Pleasure dimension (0-1)
283
+ "arousal": float(arousal) # Activity dimension (0-1)
284
+ }
285
+
286
+ def analyze_theme(self, rhythm_data, tonal_data, emotion_data):
287
+ """Infer potential themes based on musical features and emotion"""
288
+ # Extract relevant features
289
+ primary_emotion = emotion_data["primary_emotion"]
290
+ harmony_complexity = tonal_data["harmony_complexity"]
291
+
292
+ # Calculate theme scores
293
+ theme_scores = {}
294
+ for theme, profile in self.theme_profiles.items():
295
+ score = 0.0
296
+
297
+ # Emotion contribution
298
+ if primary_emotion in profile["emotion"]:
299
+ # Emotions listed earlier have stronger connection to the theme
300
+ position_weight = 1.0 / (profile["emotion"].index(primary_emotion) + 1)
301
+ score += position_weight
302
+
303
+ # Secondary emotions contribution
304
+ secondary_emotions = [e for e, s in emotion_data["emotion_scores"].items()
305
+ if s > 0.5 and e != primary_emotion]
306
+ for emotion in secondary_emotions:
307
+ if emotion in profile["emotion"]:
308
+ score += 0.3 # Less weight than primary emotion
309
+
310
+ # Harmony complexity contribution
311
+ complexity_range = profile["harmony_complexity"]
312
+ if complexity_range[0] <= harmony_complexity <= complexity_range[1]:
313
+ score += 1.0
314
+ else:
315
+ # Partial score based on distance
316
+ distance = min(abs(harmony_complexity - complexity_range[0]),
317
+ abs(harmony_complexity - complexity_range[1]))
318
+ max_distance = 0.5 # Maximum distance to consider
319
+ score += max(0, 1 - (distance / max_distance))
320
+
321
+ # Normalize score
322
+ theme_scores[theme] = min(1.0, score / 2.5)
323
+
324
+ # Find primary theme
325
+ primary_theme = max(theme_scores.items(), key=lambda x: x[1])
326
+
327
+ # Find secondary themes (scores > 0.5)
328
+ secondary_themes = [(theme, score) for theme, score in theme_scores.items()
329
+ if score > 0.5 and theme != primary_theme[0]]
330
+ secondary_themes.sort(key=lambda x: x[1], reverse=True)
331
+
332
+ return {
333
+ "primary_theme": primary_theme[0],
334
+ "confidence": primary_theme[1],
335
+ "secondary_themes": [t[0] for t in secondary_themes[:2]], # Top 2 secondary themes
336
+ "theme_scores": theme_scores
337
+ }
338
+
339
+ def analyze_music(self, file_path):
340
+ """Main function to perform comprehensive music analysis"""
341
+ # Load the audio file
342
+ y, sr = self.load_audio(file_path)
343
+ if y is None:
344
+ return {"error": "Failed to load audio file"}
345
+
346
+ # Run all analyses
347
+ rhythm_data = self.analyze_rhythm(y, sr)
348
+ tonal_data = self.analyze_tonality(y, sr)
349
+ energy_data = self.analyze_energy(y, sr)
350
+
351
+ # Higher-level analyses that depend on the basic features
352
+ emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
353
+ theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)
354
+
355
+ # Combine all results
356
+ return {
357
+ "file": file_path,
358
+ "rhythm_analysis": rhythm_data,
359
+ "tonal_analysis": tonal_data,
360
+ "energy_analysis": energy_data,
361
+ "emotion_analysis": emotion_data,
362
+ "theme_analysis": theme_data,
363
+ "summary": {
364
+ "tempo": rhythm_data["tempo"],
365
+ "time_signature": rhythm_data["estimated_time_signature"],
366
+ "key": tonal_data["key"],
367
+ "mode": tonal_data["mode"],
368
+ "primary_emotion": emotion_data["primary_emotion"],
369
+ "primary_theme": theme_data["primary_theme"]
370
+ }
371
+ }
372
+
373
+ # def visualize_analysis(self, file_path):
374
+ # """Create visualizations for the music analysis results"""
375
+ # # Load audio and run analysis
376
+ # y, sr = self.load_audio(file_path)
377
+ # if y is None:
378
+ # print("Error: Failed to load audio file")
379
+ # return
380
+
381
+ # results = self.analyze_music(file_path)
382
+
383
+ # # Create visualization
384
+ # plt.figure(figsize=(15, 12))
385
+
386
+ # # Waveform
387
+ # plt.subplot(3, 2, 1)
388
+ # librosa.display.waveshow(y, sr=sr, alpha=0.6)
389
+ # plt.title(f'Waveform (Tempo: {results["rhythm_analysis"]["tempo"]:.1f} BPM)')
390
+
391
+ # # Spectrogram
392
+ # plt.subplot(3, 2, 2)
393
+ # D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
394
+ # librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
395
+ # plt.colorbar(format='%+2.0f dB')
396
+ # plt.title(f'Spectrogram (Key: {results["tonal_analysis"]["key"]} {results["tonal_analysis"]["mode"]})')
397
+
398
+ # # Chromagram
399
+ # plt.subplot(3, 2, 3)
400
+ # chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
401
+ # librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
402
+ # plt.colorbar()
403
+ # plt.title('Chromagram')
404
+
405
+ # # Onset strength and beats
406
+ # plt.subplot(3, 2, 4)
407
+ # onset_env = librosa.onset.onset_strength(y=y, sr=sr)
408
+ # times = librosa.times_like(onset_env, sr=sr)
409
+ # plt.plot(times, librosa.util.normalize(onset_env), label='Onset strength')
410
+ # plt.vlines(results["rhythm_analysis"]["beat_times"], 0, 1, alpha=0.5, color='r',
411
+ # linestyle='--', label='Beats')
412
+ # plt.legend()
413
+ # plt.title('Rhythm Analysis')
414
+
415
+ # # Emotion scores
416
+ # plt.subplot(3, 2, 5)
417
+ # emotions = list(results["emotion_analysis"]["emotion_scores"].keys())
418
+ # scores = list(results["emotion_analysis"]["emotion_scores"].values())
419
+ # plt.bar(emotions, scores, color='skyblue')
420
+ # plt.ylim(0, 1)
421
+ # plt.title(f'Emotion Analysis (Primary: {results["emotion_analysis"]["primary_emotion"]})')
422
+ # plt.xticks(rotation=45)
423
+
424
+ # # Theme scores
425
+ # plt.subplot(3, 2, 6)
426
+ # themes = list(results["theme_analysis"]["theme_scores"].keys())
427
+ # scores = list(results["theme_analysis"]["theme_scores"].values())
428
+ # plt.bar(themes, scores, color='lightgreen')
429
+ # plt.ylim(0, 1)
430
+ # plt.title(f'Theme Analysis (Primary: {results["theme_analysis"]["primary_theme"]})')
431
+ # plt.xticks(rotation=45)
432
+
433
+ # plt.tight_layout()
434
+ # plt.show()
435
+
436
+
437
+ # Create an instance of the analyzer
438
+ analyzer = MusicAnalyzer()
439
+
440
+ # Analyze the uploaded audio file
441
+ results = analyzer.analyze_music(audio_file)
442
+
443
+ # Print analysis summary
444
+ print("\n=== MUSIC ANALYSIS SUMMARY ===")
445
+ print(f"Tempo: {results['summary']['tempo']:.1f} BPM")
446
+ print(f"Time Signature: {results['summary']['time_signature']}")
447
+ print(f"Key: {results['summary']['key']} {results['summary']['mode']}")
448
+ print(f"Primary Emotion: {results['summary']['primary_emotion']}")
449
+ print(f"Primary Theme: {results['summary']['primary_theme']}")
450
+
451
+ # Show detailed results (optional)
452
+ import json
453
+ print("\n=== DETAILED ANALYSIS ===")
454
+ print(json.dumps(results, indent=2))
455
+
456
+ # Visualize the analysis
457
+ # analyzer.visualize_analysis(audio_file)
example.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import sys
3
- from app import process_audio
4
 
5
  def main():
6
  """
@@ -23,12 +23,23 @@ def main():
23
  # Call the main processing function
24
  genre_results, lyrics = process_audio(audio_file)
25
 
 
 
 
26
  # Print results
27
  print("\n" + "="*50)
28
  print("GENRE CLASSIFICATION RESULTS:")
29
  print("="*50)
30
  print(genre_results)
31
 
 
 
 
 
 
 
 
 
32
  print("\n" + "="*50)
33
  print("GENERATED LYRICS:")
34
  print("="*50)
 
1
  import os
2
  import sys
3
+ from app import process_audio, music_analyzer
4
 
5
  def main():
6
  """
 
23
  # Call the main processing function
24
  genre_results, lyrics = process_audio(audio_file)
25
 
26
+ # Get emotion analysis results
27
+ emotion_results = music_analyzer.analyze_music(audio_file)
28
+
29
  # Print results
30
  print("\n" + "="*50)
31
  print("GENRE CLASSIFICATION RESULTS:")
32
  print("="*50)
33
  print(genre_results)
34
 
35
+ print("\n" + "="*50)
36
+ print("EMOTION ANALYSIS RESULTS:")
37
+ print("="*50)
38
+ print(f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM")
39
+ print(f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}")
40
+ print(f"Primary Emotion: {emotion_results['summary']['primary_emotion']}")
41
+ print(f"Primary Theme: {emotion_results['summary']['primary_theme']}")
42
+
43
  print("\n" + "="*50)
44
  print("GENERATED LYRICS:")
45
  print("="*50)