root
commited on
Commit
·
547c4d0
1
Parent(s):
93aef48
adding emotion analysis
Browse files- app.py +51 -8
- emotionanalysis.py +457 -0
- example.py +12 -1
app.py
CHANGED
@@ -21,6 +21,7 @@ from utils import (
|
|
21 |
ensure_cuda_availability,
|
22 |
preprocess_audio_for_model
|
23 |
)
|
|
|
24 |
|
25 |
# Login to Hugging Face Hub if token is provided
|
26 |
if "HF_TOKEN" in os.environ:
|
@@ -98,6 +99,9 @@ llm_pipeline = pipeline(
|
|
98 |
max_new_tokens=512,
|
99 |
)
|
100 |
|
|
|
|
|
|
|
101 |
def extract_audio_features(audio_file):
|
102 |
"""Extract audio features from an audio file."""
|
103 |
# Load the audio file using utility function
|
@@ -162,7 +166,7 @@ def classify_genre(audio_data):
|
|
162 |
# Fallback: return a default genre if everything fails
|
163 |
return [("rock", 1.0)]
|
164 |
|
165 |
-
def generate_lyrics(genre, duration):
|
166 |
"""Generate lyrics based on the genre and with appropriate length."""
|
167 |
# Calculate appropriate lyrics length based on audio duration
|
168 |
lines_count = calculate_lyrics_length(duration)
|
@@ -181,12 +185,27 @@ def generate_lyrics(genre, duration):
|
|
181 |
verse_lines = 3
|
182 |
chorus_lines = 2
|
183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
# Create prompt for the LLM
|
185 |
prompt = f"""
|
186 |
You are a talented songwriter who specializes in {genre} music.
|
187 |
Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
The lyrics should:
|
189 |
- Perfectly capture the essence and style of {genre} music
|
|
|
190 |
- Be approximately {lines_count} lines long
|
191 |
- Have a coherent theme and flow
|
192 |
- Follow this structure:
|
@@ -299,9 +318,12 @@ def process_audio(audio_file):
|
|
299 |
# Format genre results using utility function
|
300 |
genre_results = format_genre_results(top_genres)
|
301 |
|
302 |
-
#
|
|
|
|
|
|
|
303 |
primary_genre, _ = top_genres[0]
|
304 |
-
lyrics = generate_lyrics(primary_genre, audio_data["duration"])
|
305 |
|
306 |
return genre_results, lyrics
|
307 |
|
@@ -311,7 +333,7 @@ def process_audio(audio_file):
|
|
311 |
# Create Gradio interface
|
312 |
with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
313 |
gr.Markdown("# Music Genre Classifier & Lyrics Generator")
|
314 |
-
gr.Markdown("Upload a music file to classify its genre and generate matching lyrics.")
|
315 |
|
316 |
with gr.Row():
|
317 |
with gr.Column():
|
@@ -320,20 +342,41 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
320 |
|
321 |
with gr.Column():
|
322 |
genre_output = gr.Textbox(label="Detected Genres", lines=5)
|
|
|
323 |
lyrics_output = gr.Textbox(label="Generated Lyrics", lines=15)
|
324 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
submit_btn.click(
|
326 |
-
fn=
|
327 |
inputs=[audio_input],
|
328 |
-
outputs=[genre_output, lyrics_output]
|
329 |
)
|
330 |
|
331 |
gr.Markdown("### How it works")
|
332 |
gr.Markdown("""
|
333 |
1. Upload an audio file of your choice
|
334 |
2. The system will classify the genre using the dima806/music_genres_classification model
|
335 |
-
3.
|
336 |
-
4.
|
|
|
337 |
""")
|
338 |
|
339 |
# Launch the app
|
|
|
21 |
ensure_cuda_availability,
|
22 |
preprocess_audio_for_model
|
23 |
)
|
24 |
+
from emotionanalysis import MusicAnalyzer
|
25 |
|
26 |
# Login to Hugging Face Hub if token is provided
|
27 |
if "HF_TOKEN" in os.environ:
|
|
|
99 |
max_new_tokens=512,
|
100 |
)
|
101 |
|
102 |
+
# Initialize music emotion analyzer
|
103 |
+
music_analyzer = MusicAnalyzer()
|
104 |
+
|
105 |
def extract_audio_features(audio_file):
|
106 |
"""Extract audio features from an audio file."""
|
107 |
# Load the audio file using utility function
|
|
|
166 |
# Fallback: return a default genre if everything fails
|
167 |
return [("rock", 1.0)]
|
168 |
|
169 |
+
def generate_lyrics(genre, duration, emotion_results):
|
170 |
"""Generate lyrics based on the genre and with appropriate length."""
|
171 |
# Calculate appropriate lyrics length based on audio duration
|
172 |
lines_count = calculate_lyrics_length(duration)
|
|
|
185 |
verse_lines = 3
|
186 |
chorus_lines = 2
|
187 |
|
188 |
+
# Extract emotion and theme data from analysis results
|
189 |
+
primary_emotion = emotion_results["emotion_analysis"]["primary_emotion"]
|
190 |
+
primary_theme = emotion_results["theme_analysis"]["primary_theme"]
|
191 |
+
tempo = emotion_results["rhythm_analysis"]["tempo"]
|
192 |
+
key = emotion_results["tonal_analysis"]["key"]
|
193 |
+
mode = emotion_results["tonal_analysis"]["mode"]
|
194 |
+
|
195 |
# Create prompt for the LLM
|
196 |
prompt = f"""
|
197 |
You are a talented songwriter who specializes in {genre} music.
|
198 |
Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
|
199 |
+
|
200 |
+
Music analysis has detected the following qualities in the music:
|
201 |
+
- Tempo: {tempo:.1f} BPM
|
202 |
+
- Key: {key} {mode}
|
203 |
+
- Primary emotion: {primary_emotion}
|
204 |
+
- Primary theme: {primary_theme}
|
205 |
+
|
206 |
The lyrics should:
|
207 |
- Perfectly capture the essence and style of {genre} music
|
208 |
+
- Express the {primary_emotion} emotion and {primary_theme} theme
|
209 |
- Be approximately {lines_count} lines long
|
210 |
- Have a coherent theme and flow
|
211 |
- Follow this structure:
|
|
|
318 |
# Format genre results using utility function
|
319 |
genre_results = format_genre_results(top_genres)
|
320 |
|
321 |
+
# Analyze music emotions and themes
|
322 |
+
emotion_results = music_analyzer.analyze_music(audio_file)
|
323 |
+
|
324 |
+
# Generate lyrics based on top genre and emotion analysis
|
325 |
primary_genre, _ = top_genres[0]
|
326 |
+
lyrics = generate_lyrics(primary_genre, audio_data["duration"], emotion_results)
|
327 |
|
328 |
return genre_results, lyrics
|
329 |
|
|
|
333 |
# Create Gradio interface
|
334 |
with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
335 |
gr.Markdown("# Music Genre Classifier & Lyrics Generator")
|
336 |
+
gr.Markdown("Upload a music file to classify its genre, analyze its emotions, and generate matching lyrics.")
|
337 |
|
338 |
with gr.Row():
|
339 |
with gr.Column():
|
|
|
342 |
|
343 |
with gr.Column():
|
344 |
genre_output = gr.Textbox(label="Detected Genres", lines=5)
|
345 |
+
emotion_output = gr.Textbox(label="Emotion Analysis", lines=5)
|
346 |
lyrics_output = gr.Textbox(label="Generated Lyrics", lines=15)
|
347 |
|
348 |
+
def display_results(audio_file):
|
349 |
+
if audio_file is None:
|
350 |
+
return "Please upload an audio file.", "No emotion analysis available.", None
|
351 |
+
|
352 |
+
try:
|
353 |
+
# Process audio and get genre and lyrics
|
354 |
+
genre_results, lyrics = process_audio(audio_file)
|
355 |
+
|
356 |
+
# Format emotion analysis results
|
357 |
+
emotion_results = music_analyzer.analyze_music(audio_file)
|
358 |
+
emotion_text = f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM\n"
|
359 |
+
emotion_text += f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}\n"
|
360 |
+
emotion_text += f"Primary Emotion: {emotion_results['summary']['primary_emotion']}\n"
|
361 |
+
emotion_text += f"Primary Theme: {emotion_results['summary']['primary_theme']}"
|
362 |
+
|
363 |
+
return genre_results, emotion_text, lyrics
|
364 |
+
except Exception as e:
|
365 |
+
return f"Error: {str(e)}", "Error in emotion analysis", None
|
366 |
+
|
367 |
submit_btn.click(
|
368 |
+
fn=display_results,
|
369 |
inputs=[audio_input],
|
370 |
+
outputs=[genre_output, emotion_output, lyrics_output]
|
371 |
)
|
372 |
|
373 |
gr.Markdown("### How it works")
|
374 |
gr.Markdown("""
|
375 |
1. Upload an audio file of your choice
|
376 |
2. The system will classify the genre using the dima806/music_genres_classification model
|
377 |
+
3. The system will analyze the musical emotion and theme using advanced audio processing
|
378 |
+
4. Based on the detected genre and emotion, it will generate appropriate lyrics using Llama-3.1-8B-Instruct
|
379 |
+
5. The lyrics length is automatically adjusted based on your audio duration
|
380 |
""")
|
381 |
|
382 |
# Launch the app
|
emotionanalysis.py
ADDED
@@ -0,0 +1,457 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from scipy.stats import mode
|
5 |
+
import warnings
|
6 |
+
warnings.filterwarnings('ignore') # Suppress librosa warnings
|
7 |
+
class MusicAnalyzer:
|
8 |
+
def __init__(self):
|
9 |
+
# Emotion feature mappings - these define characteristics of different emotions
|
10 |
+
self.emotion_profiles = {
|
11 |
+
'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)},
|
12 |
+
'sad': {'tempo': (40, 90), 'energy': (0, 0.5), 'major_mode': False, 'brightness': (0, 0.5)},
|
13 |
+
'calm': {'tempo': (50, 90), 'energy': (0, 0.4), 'major_mode': True, 'brightness': (0.3, 0.6)},
|
14 |
+
'energetic': {'tempo': (110, 200), 'energy': (0.7, 1.0), 'major_mode': True, 'brightness': (0.5, 0.9)},
|
15 |
+
'tense': {'tempo': (70, 140), 'energy': (0.5, 0.9), 'major_mode': False, 'brightness': (0.3, 0.7)},
|
16 |
+
'nostalgic': {'tempo': (60, 100), 'energy': (0.3, 0.7), 'major_mode': None, 'brightness': (0.4, 0.7)}
|
17 |
+
}
|
18 |
+
|
19 |
+
# Theme mappings based on musical features
|
20 |
+
self.theme_profiles = {
|
21 |
+
'love': {'emotion': ['happy', 'nostalgic', 'sad'], 'harmony_complexity': (0.3, 0.7)},
|
22 |
+
'triumph': {'emotion': ['energetic', 'happy'], 'harmony_complexity': (0.4, 0.8)},
|
23 |
+
'loss': {'emotion': ['sad', 'nostalgic'], 'harmony_complexity': (0.3, 0.7)},
|
24 |
+
'adventure': {'emotion': ['energetic', 'tense'], 'harmony_complexity': (0.5, 0.9)},
|
25 |
+
'reflection': {'emotion': ['calm', 'nostalgic'], 'harmony_complexity': (0.4, 0.8)},
|
26 |
+
'conflict': {'emotion': ['tense', 'energetic'], 'harmony_complexity': (0.6, 1.0)}
|
27 |
+
}
|
28 |
+
|
29 |
+
# Musical key mapping
|
30 |
+
self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
31 |
+
|
32 |
+
def load_audio(self, file_path, sr=22050, duration=None):
|
33 |
+
"""Load audio file and return time series and sample rate"""
|
34 |
+
try:
|
35 |
+
y, sr = librosa.load(file_path, sr=sr, duration=duration)
|
36 |
+
return y, sr
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Error loading audio file: {e}")
|
39 |
+
return None, None
|
40 |
+
|
41 |
+
def analyze_rhythm(self, y, sr):
|
42 |
+
"""Analyze rhythm-related features: tempo, beats, time signature"""
|
43 |
+
# Tempo and beat detection
|
44 |
+
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
45 |
+
tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
|
46 |
+
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
|
47 |
+
|
48 |
+
# Beat intervals and regularity
|
49 |
+
beat_intervals = np.diff(beat_times) if len(beat_times) > 1 else np.array([0])
|
50 |
+
beat_regularity = 1.0 / np.std(beat_intervals) if len(beat_intervals) > 0 and np.std(beat_intervals) > 0 else 0
|
51 |
+
|
52 |
+
# Rhythm pattern analysis through autocorrelation
|
53 |
+
ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
|
54 |
+
ac = librosa.util.normalize(ac, norm=np.inf)
|
55 |
+
|
56 |
+
# Time signature estimation - a challenging task with many limitations
|
57 |
+
estimated_signature = self._estimate_time_signature(y, sr, beat_times, onset_env)
|
58 |
+
|
59 |
+
# Compute onset strength to get a measure of rhythm intensity
|
60 |
+
rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0
|
61 |
+
|
62 |
+
# Rhythm complexity based on variation in onset strength
|
63 |
+
rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0
|
64 |
+
|
65 |
+
return {
|
66 |
+
"tempo": float(tempo),
|
67 |
+
"beat_times": beat_times.tolist(),
|
68 |
+
"beat_intervals": beat_intervals.tolist(),
|
69 |
+
"beat_regularity": float(beat_regularity),
|
70 |
+
"rhythm_intensity": float(rhythm_intensity),
|
71 |
+
"rhythm_complexity": float(rhythm_complexity),
|
72 |
+
"estimated_time_signature": estimated_signature
|
73 |
+
}
|
74 |
+
|
75 |
+
def _estimate_time_signature(self, y, sr, beat_times, onset_env):
|
76 |
+
"""Estimate the time signature based on beat patterns"""
|
77 |
+
# This is a simplified approach - accurate time signature detection is complex
|
78 |
+
if len(beat_times) < 4:
|
79 |
+
return "Unknown"
|
80 |
+
|
81 |
+
# Analyze beat emphasis patterns to detect meter
|
82 |
+
beat_intervals = np.diff(beat_times)
|
83 |
+
|
84 |
+
# Look for periodicity in the onset envelope
|
85 |
+
ac = librosa.autocorrelate(onset_env, max_size=sr)
|
86 |
+
|
87 |
+
# Find peaks in autocorrelation after the first one (which is at lag 0)
|
88 |
+
peaks = librosa.util.peak_pick(ac, pre_max=20, post_max=20, pre_avg=20, post_avg=20, delta=0.1, wait=1)
|
89 |
+
peaks = peaks[peaks > 0] # Remove the first peak which is at lag 0
|
90 |
+
|
91 |
+
if len(peaks) == 0:
|
92 |
+
return "4/4" # Default to most common
|
93 |
+
|
94 |
+
# Convert first significant peak to beats
|
95 |
+
first_peak_time = peaks[0] / sr
|
96 |
+
beats_per_bar = round(first_peak_time / np.median(beat_intervals))
|
97 |
+
|
98 |
+
# Map to common time signatures
|
99 |
+
if beats_per_bar == 4 or beats_per_bar == 8:
|
100 |
+
return "4/4"
|
101 |
+
elif beats_per_bar == 3 or beats_per_bar == 6:
|
102 |
+
return "3/4"
|
103 |
+
elif beats_per_bar == 2:
|
104 |
+
return "2/4"
|
105 |
+
else:
|
106 |
+
return f"{beats_per_bar}/4" # Default assumption
|
107 |
+
|
108 |
+
def analyze_tonality(self, y, sr):
|
109 |
+
"""Analyze tonal features: key, mode, harmonic features"""
|
110 |
+
# Compute chromagram
|
111 |
+
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
112 |
+
|
113 |
+
# Krumhansl-Schmuckler key-finding algorithm (simplified)
|
114 |
+
# Major and minor profiles from music theory research
|
115 |
+
major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
|
116 |
+
minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
|
117 |
+
|
118 |
+
# Calculate the correlation of the chroma with each key profile
|
119 |
+
chroma_avg = np.mean(chroma, axis=1)
|
120 |
+
major_corr = np.zeros(12)
|
121 |
+
minor_corr = np.zeros(12)
|
122 |
+
|
123 |
+
for i in range(12):
|
124 |
+
major_corr[i] = np.corrcoef(np.roll(chroma_avg, i), major_profile)[0, 1]
|
125 |
+
minor_corr[i] = np.corrcoef(np.roll(chroma_avg, i), minor_profile)[0, 1]
|
126 |
+
|
127 |
+
# Find the key with the highest correlation
|
128 |
+
max_major_idx = np.argmax(major_corr)
|
129 |
+
max_minor_idx = np.argmax(minor_corr)
|
130 |
+
|
131 |
+
# Determine if the piece is in a major or minor key
|
132 |
+
if major_corr[max_major_idx] > minor_corr[max_minor_idx]:
|
133 |
+
mode = "major"
|
134 |
+
key = self.key_names[max_major_idx]
|
135 |
+
else:
|
136 |
+
mode = "minor"
|
137 |
+
key = self.key_names[max_minor_idx]
|
138 |
+
|
139 |
+
# Calculate harmony complexity (variability in harmonic content)
|
140 |
+
harmony_complexity = np.std(chroma) / np.mean(chroma) if np.mean(chroma) > 0 else 0
|
141 |
+
|
142 |
+
# Calculate tonal stability (consistency of tonal center)
|
143 |
+
tonal_stability = 1.0 / (np.std(chroma_avg) + 0.001) # Add small value to avoid division by zero
|
144 |
+
|
145 |
+
# Calculate spectral brightness (center of mass of the spectrum)
|
146 |
+
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
|
147 |
+
brightness = np.mean(spectral_centroid) / (sr/2) # Normalize by Nyquist frequency
|
148 |
+
|
149 |
+
# Calculate dissonance using spectral contrast
|
150 |
+
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
|
151 |
+
dissonance = np.mean(spectral_contrast[0]) # Higher values may indicate more dissonance
|
152 |
+
|
153 |
+
return {
|
154 |
+
"key": key,
|
155 |
+
"mode": mode,
|
156 |
+
"is_major": mode == "major",
|
157 |
+
"harmony_complexity": float(harmony_complexity),
|
158 |
+
"tonal_stability": float(tonal_stability),
|
159 |
+
"brightness": float(brightness),
|
160 |
+
"dissonance": float(dissonance)
|
161 |
+
}
|
162 |
+
|
163 |
+
def analyze_energy(self, y, sr):
|
164 |
+
"""Analyze energy characteristics of the audio"""
|
165 |
+
# RMS Energy (overall loudness)
|
166 |
+
rms = librosa.feature.rms(y=y)[0]
|
167 |
+
|
168 |
+
# Energy metrics
|
169 |
+
mean_energy = np.mean(rms)
|
170 |
+
energy_std = np.std(rms)
|
171 |
+
energy_dynamic_range = np.max(rms) - np.min(rms) if len(rms) > 0 else 0
|
172 |
+
|
173 |
+
# Energy distribution across frequency ranges
|
174 |
+
spec = np.abs(librosa.stft(y))
|
175 |
+
|
176 |
+
# Divide the spectrum into low, mid, and high ranges
|
177 |
+
freq_bins = spec.shape[0]
|
178 |
+
low_freq_energy = np.mean(spec[:int(freq_bins*0.2), :])
|
179 |
+
mid_freq_energy = np.mean(spec[int(freq_bins*0.2):int(freq_bins*0.8), :])
|
180 |
+
high_freq_energy = np.mean(spec[int(freq_bins*0.8):, :])
|
181 |
+
|
182 |
+
# Normalize to create a distribution
|
183 |
+
total_energy = low_freq_energy + mid_freq_energy + high_freq_energy
|
184 |
+
if total_energy > 0:
|
185 |
+
low_freq_ratio = low_freq_energy / total_energy
|
186 |
+
mid_freq_ratio = mid_freq_energy / total_energy
|
187 |
+
high_freq_ratio = high_freq_energy / total_energy
|
188 |
+
else:
|
189 |
+
low_freq_ratio = mid_freq_ratio = high_freq_ratio = 1/3
|
190 |
+
|
191 |
+
return {
|
192 |
+
"mean_energy": float(mean_energy),
|
193 |
+
"energy_std": float(energy_std),
|
194 |
+
"energy_dynamic_range": float(energy_dynamic_range),
|
195 |
+
"frequency_distribution": {
|
196 |
+
"low_freq": float(low_freq_ratio),
|
197 |
+
"mid_freq": float(mid_freq_ratio),
|
198 |
+
"high_freq": float(high_freq_ratio)
|
199 |
+
}
|
200 |
+
}
|
201 |
+
|
202 |
+
def analyze_emotion(self, rhythm_data, tonal_data, energy_data):
|
203 |
+
"""Classify the emotion based on musical features"""
|
204 |
+
# Extract key features for emotion detection
|
205 |
+
tempo = rhythm_data["tempo"]
|
206 |
+
is_major = tonal_data["is_major"]
|
207 |
+
energy = energy_data["mean_energy"]
|
208 |
+
brightness = tonal_data["brightness"]
|
209 |
+
|
210 |
+
# Calculate scores for each emotion
|
211 |
+
emotion_scores = {}
|
212 |
+
for emotion, profile in self.emotion_profiles.items():
|
213 |
+
score = 0.0
|
214 |
+
|
215 |
+
# Tempo contribution (0-1 score)
|
216 |
+
tempo_range = profile["tempo"]
|
217 |
+
if tempo_range[0] <= tempo <= tempo_range[1]:
|
218 |
+
score += 1.0
|
219 |
+
else:
|
220 |
+
# Partial score based on distance
|
221 |
+
distance = min(abs(tempo - tempo_range[0]), abs(tempo - tempo_range[1]))
|
222 |
+
max_distance = 40 # Maximum distance to consider
|
223 |
+
score += max(0, 1 - (distance / max_distance))
|
224 |
+
|
225 |
+
# Energy contribution (0-1 score)
|
226 |
+
energy_range = profile["energy"]
|
227 |
+
if energy_range[0] <= energy <= energy_range[1]:
|
228 |
+
score += 1.0
|
229 |
+
else:
|
230 |
+
# Partial score based on distance
|
231 |
+
distance = min(abs(energy - energy_range[0]), abs(energy - energy_range[1]))
|
232 |
+
max_distance = 0.5 # Maximum distance to consider
|
233 |
+
score += max(0, 1 - (distance / max_distance))
|
234 |
+
|
235 |
+
# Mode contribution (0-1 score)
|
236 |
+
if profile["major_mode"] is not None: # Some emotions don't have strong mode preference
|
237 |
+
score += 1.0 if profile["major_mode"] == is_major else 0.0
|
238 |
+
else:
|
239 |
+
score += 0.5 # Neutral contribution
|
240 |
+
|
241 |
+
# Brightness contribution (0-1 score)
|
242 |
+
brightness_range = profile["brightness"]
|
243 |
+
if brightness_range[0] <= brightness <= brightness_range[1]:
|
244 |
+
score += 1.0
|
245 |
+
else:
|
246 |
+
# Partial score based on distance
|
247 |
+
distance = min(abs(brightness - brightness_range[0]), abs(brightness - brightness_range[1]))
|
248 |
+
max_distance = 0.5 # Maximum distance to consider
|
249 |
+
score += max(0, 1 - (distance / max_distance))
|
250 |
+
|
251 |
+
# Normalize score (0-1 range)
|
252 |
+
emotion_scores[emotion] = score / 4.0
|
253 |
+
|
254 |
+
# Find primary emotion
|
255 |
+
primary_emotion = max(emotion_scores.items(), key=lambda x: x[1])
|
256 |
+
|
257 |
+
# Calculate valence and arousal (dimensional emotion model)
|
258 |
+
# Mapping different emotions to valence-arousal space
|
259 |
+
valence_map = {
|
260 |
+
'happy': 0.8, 'sad': 0.2, 'calm': 0.6,
|
261 |
+
'energetic': 0.7, 'tense': 0.3, 'nostalgic': 0.5
|
262 |
+
}
|
263 |
+
|
264 |
+
arousal_map = {
|
265 |
+
'happy': 0.7, 'sad': 0.3, 'calm': 0.2,
|
266 |
+
'energetic': 0.9, 'tense': 0.8, 'nostalgic': 0.4
|
267 |
+
}
|
268 |
+
|
269 |
+
# Calculate weighted valence and arousal
|
270 |
+
total_weight = sum(emotion_scores.values())
|
271 |
+
if total_weight > 0:
|
272 |
+
valence = sum(score * valence_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
|
273 |
+
arousal = sum(score * arousal_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
|
274 |
+
else:
|
275 |
+
valence = 0.5
|
276 |
+
arousal = 0.5
|
277 |
+
|
278 |
+
return {
|
279 |
+
"primary_emotion": primary_emotion[0],
|
280 |
+
"confidence": primary_emotion[1],
|
281 |
+
"emotion_scores": emotion_scores,
|
282 |
+
"valence": float(valence), # Pleasure dimension (0-1)
|
283 |
+
"arousal": float(arousal) # Activity dimension (0-1)
|
284 |
+
}
|
285 |
+
|
286 |
+
def analyze_theme(self, rhythm_data, tonal_data, emotion_data):
|
287 |
+
"""Infer potential themes based on musical features and emotion"""
|
288 |
+
# Extract relevant features
|
289 |
+
primary_emotion = emotion_data["primary_emotion"]
|
290 |
+
harmony_complexity = tonal_data["harmony_complexity"]
|
291 |
+
|
292 |
+
# Calculate theme scores
|
293 |
+
theme_scores = {}
|
294 |
+
for theme, profile in self.theme_profiles.items():
|
295 |
+
score = 0.0
|
296 |
+
|
297 |
+
# Emotion contribution
|
298 |
+
if primary_emotion in profile["emotion"]:
|
299 |
+
# Emotions listed earlier have stronger connection to the theme
|
300 |
+
position_weight = 1.0 / (profile["emotion"].index(primary_emotion) + 1)
|
301 |
+
score += position_weight
|
302 |
+
|
303 |
+
# Secondary emotions contribution
|
304 |
+
secondary_emotions = [e for e, s in emotion_data["emotion_scores"].items()
|
305 |
+
if s > 0.5 and e != primary_emotion]
|
306 |
+
for emotion in secondary_emotions:
|
307 |
+
if emotion in profile["emotion"]:
|
308 |
+
score += 0.3 # Less weight than primary emotion
|
309 |
+
|
310 |
+
# Harmony complexity contribution
|
311 |
+
complexity_range = profile["harmony_complexity"]
|
312 |
+
if complexity_range[0] <= harmony_complexity <= complexity_range[1]:
|
313 |
+
score += 1.0
|
314 |
+
else:
|
315 |
+
# Partial score based on distance
|
316 |
+
distance = min(abs(harmony_complexity - complexity_range[0]),
|
317 |
+
abs(harmony_complexity - complexity_range[1]))
|
318 |
+
max_distance = 0.5 # Maximum distance to consider
|
319 |
+
score += max(0, 1 - (distance / max_distance))
|
320 |
+
|
321 |
+
# Normalize score
|
322 |
+
theme_scores[theme] = min(1.0, score / 2.5)
|
323 |
+
|
324 |
+
# Find primary theme
|
325 |
+
primary_theme = max(theme_scores.items(), key=lambda x: x[1])
|
326 |
+
|
327 |
+
# Find secondary themes (scores > 0.5)
|
328 |
+
secondary_themes = [(theme, score) for theme, score in theme_scores.items()
|
329 |
+
if score > 0.5 and theme != primary_theme[0]]
|
330 |
+
secondary_themes.sort(key=lambda x: x[1], reverse=True)
|
331 |
+
|
332 |
+
return {
|
333 |
+
"primary_theme": primary_theme[0],
|
334 |
+
"confidence": primary_theme[1],
|
335 |
+
"secondary_themes": [t[0] for t in secondary_themes[:2]], # Top 2 secondary themes
|
336 |
+
"theme_scores": theme_scores
|
337 |
+
}
|
338 |
+
|
339 |
+
def analyze_music(self, file_path):
|
340 |
+
"""Main function to perform comprehensive music analysis"""
|
341 |
+
# Load the audio file
|
342 |
+
y, sr = self.load_audio(file_path)
|
343 |
+
if y is None:
|
344 |
+
return {"error": "Failed to load audio file"}
|
345 |
+
|
346 |
+
# Run all analyses
|
347 |
+
rhythm_data = self.analyze_rhythm(y, sr)
|
348 |
+
tonal_data = self.analyze_tonality(y, sr)
|
349 |
+
energy_data = self.analyze_energy(y, sr)
|
350 |
+
|
351 |
+
# Higher-level analyses that depend on the basic features
|
352 |
+
emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
|
353 |
+
theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)
|
354 |
+
|
355 |
+
# Combine all results
|
356 |
+
return {
|
357 |
+
"file": file_path,
|
358 |
+
"rhythm_analysis": rhythm_data,
|
359 |
+
"tonal_analysis": tonal_data,
|
360 |
+
"energy_analysis": energy_data,
|
361 |
+
"emotion_analysis": emotion_data,
|
362 |
+
"theme_analysis": theme_data,
|
363 |
+
"summary": {
|
364 |
+
"tempo": rhythm_data["tempo"],
|
365 |
+
"time_signature": rhythm_data["estimated_time_signature"],
|
366 |
+
"key": tonal_data["key"],
|
367 |
+
"mode": tonal_data["mode"],
|
368 |
+
"primary_emotion": emotion_data["primary_emotion"],
|
369 |
+
"primary_theme": theme_data["primary_theme"]
|
370 |
+
}
|
371 |
+
}
|
372 |
+
|
373 |
+
# def visualize_analysis(self, file_path):
|
374 |
+
# """Create visualizations for the music analysis results"""
|
375 |
+
# # Load audio and run analysis
|
376 |
+
# y, sr = self.load_audio(file_path)
|
377 |
+
# if y is None:
|
378 |
+
# print("Error: Failed to load audio file")
|
379 |
+
# return
|
380 |
+
|
381 |
+
# results = self.analyze_music(file_path)
|
382 |
+
|
383 |
+
# # Create visualization
|
384 |
+
# plt.figure(figsize=(15, 12))
|
385 |
+
|
386 |
+
# # Waveform
|
387 |
+
# plt.subplot(3, 2, 1)
|
388 |
+
# librosa.display.waveshow(y, sr=sr, alpha=0.6)
|
389 |
+
# plt.title(f'Waveform (Tempo: {results["rhythm_analysis"]["tempo"]:.1f} BPM)')
|
390 |
+
|
391 |
+
# # Spectrogram
|
392 |
+
# plt.subplot(3, 2, 2)
|
393 |
+
# D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
|
394 |
+
# librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
395 |
+
# plt.colorbar(format='%+2.0f dB')
|
396 |
+
# plt.title(f'Spectrogram (Key: {results["tonal_analysis"]["key"]} {results["tonal_analysis"]["mode"]})')
|
397 |
+
|
398 |
+
# # Chromagram
|
399 |
+
# plt.subplot(3, 2, 3)
|
400 |
+
# chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
401 |
+
# librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
|
402 |
+
# plt.colorbar()
|
403 |
+
# plt.title('Chromagram')
|
404 |
+
|
405 |
+
# # Onset strength and beats
|
406 |
+
# plt.subplot(3, 2, 4)
|
407 |
+
# onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
408 |
+
# times = librosa.times_like(onset_env, sr=sr)
|
409 |
+
# plt.plot(times, librosa.util.normalize(onset_env), label='Onset strength')
|
410 |
+
# plt.vlines(results["rhythm_analysis"]["beat_times"], 0, 1, alpha=0.5, color='r',
|
411 |
+
# linestyle='--', label='Beats')
|
412 |
+
# plt.legend()
|
413 |
+
# plt.title('Rhythm Analysis')
|
414 |
+
|
415 |
+
# # Emotion scores
|
416 |
+
# plt.subplot(3, 2, 5)
|
417 |
+
# emotions = list(results["emotion_analysis"]["emotion_scores"].keys())
|
418 |
+
# scores = list(results["emotion_analysis"]["emotion_scores"].values())
|
419 |
+
# plt.bar(emotions, scores, color='skyblue')
|
420 |
+
# plt.ylim(0, 1)
|
421 |
+
# plt.title(f'Emotion Analysis (Primary: {results["emotion_analysis"]["primary_emotion"]})')
|
422 |
+
# plt.xticks(rotation=45)
|
423 |
+
|
424 |
+
# # Theme scores
|
425 |
+
# plt.subplot(3, 2, 6)
|
426 |
+
# themes = list(results["theme_analysis"]["theme_scores"].keys())
|
427 |
+
# scores = list(results["theme_analysis"]["theme_scores"].values())
|
428 |
+
# plt.bar(themes, scores, color='lightgreen')
|
429 |
+
# plt.ylim(0, 1)
|
430 |
+
# plt.title(f'Theme Analysis (Primary: {results["theme_analysis"]["primary_theme"]})')
|
431 |
+
# plt.xticks(rotation=45)
|
432 |
+
|
433 |
+
# plt.tight_layout()
|
434 |
+
# plt.show()
|
435 |
+
|
436 |
+
|
437 |
+
# Create an instance of the analyzer
|
438 |
+
analyzer = MusicAnalyzer()
|
439 |
+
|
440 |
+
# Analyze the uploaded audio file
|
441 |
+
results = analyzer.analyze_music(audio_file)
|
442 |
+
|
443 |
+
# Print analysis summary
|
444 |
+
print("\n=== MUSIC ANALYSIS SUMMARY ===")
|
445 |
+
print(f"Tempo: {results['summary']['tempo']:.1f} BPM")
|
446 |
+
print(f"Time Signature: {results['summary']['time_signature']}")
|
447 |
+
print(f"Key: {results['summary']['key']} {results['summary']['mode']}")
|
448 |
+
print(f"Primary Emotion: {results['summary']['primary_emotion']}")
|
449 |
+
print(f"Primary Theme: {results['summary']['primary_theme']}")
|
450 |
+
|
451 |
+
# Show detailed results (optional)
|
452 |
+
import json
|
453 |
+
print("\n=== DETAILED ANALYSIS ===")
|
454 |
+
print(json.dumps(results, indent=2))
|
455 |
+
|
456 |
+
# Visualize the analysis
|
457 |
+
# analyzer.visualize_analysis(audio_file)
|
example.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
-
from app import process_audio
|
4 |
|
5 |
def main():
|
6 |
"""
|
@@ -23,12 +23,23 @@ def main():
|
|
23 |
# Call the main processing function
|
24 |
genre_results, lyrics = process_audio(audio_file)
|
25 |
|
|
|
|
|
|
|
26 |
# Print results
|
27 |
print("\n" + "="*50)
|
28 |
print("GENRE CLASSIFICATION RESULTS:")
|
29 |
print("="*50)
|
30 |
print(genre_results)
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
print("\n" + "="*50)
|
33 |
print("GENERATED LYRICS:")
|
34 |
print("="*50)
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
+
from app import process_audio, music_analyzer
|
4 |
|
5 |
def main():
|
6 |
"""
|
|
|
23 |
# Call the main processing function
|
24 |
genre_results, lyrics = process_audio(audio_file)
|
25 |
|
26 |
+
# Get emotion analysis results
|
27 |
+
emotion_results = music_analyzer.analyze_music(audio_file)
|
28 |
+
|
29 |
# Print results
|
30 |
print("\n" + "="*50)
|
31 |
print("GENRE CLASSIFICATION RESULTS:")
|
32 |
print("="*50)
|
33 |
print(genre_results)
|
34 |
|
35 |
+
print("\n" + "="*50)
|
36 |
+
print("EMOTION ANALYSIS RESULTS:")
|
37 |
+
print("="*50)
|
38 |
+
print(f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM")
|
39 |
+
print(f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}")
|
40 |
+
print(f"Primary Emotion: {emotion_results['summary']['primary_emotion']}")
|
41 |
+
print(f"Primary Theme: {emotion_results['summary']['primary_theme']}")
|
42 |
+
|
43 |
print("\n" + "="*50)
|
44 |
print("GENERATED LYRICS:")
|
45 |
print("="*50)
|