root
commited on
Commit
·
a1321b3
1
Parent(s):
1ff1aab
ss
Browse files- app.py +4 -13
- emotionanalysis.py +91 -308
app.py
CHANGED
@@ -98,16 +98,13 @@ def process_audio(audio_file):
|
|
98 |
# Basic audio information
|
99 |
duration = extract_audio_duration(y, sr)
|
100 |
|
101 |
-
# Detect time signature using
|
102 |
time_sig_result = beat_analyzer.detect_time_signature(audio_file)
|
103 |
time_signature = time_sig_result["time_signature"]
|
104 |
|
105 |
# Analyze music with MusicAnalyzer for emotion and theme analysis
|
106 |
music_analysis = music_analyzer.analyze_music(audio_file)
|
107 |
|
108 |
-
# Override MusicAnalyzer's time signature with the one detected by BeatAnalyzer
|
109 |
-
music_analysis["rhythm_analysis"]["estimated_time_signature"] = time_signature
|
110 |
-
|
111 |
# Extract key information
|
112 |
tempo = music_analysis["rhythm_analysis"]["tempo"]
|
113 |
emotion = music_analysis["emotion_analysis"]["primary_emotion"]
|
@@ -142,15 +139,9 @@ def process_audio(audio_file):
|
|
142 |
genre_results_text = format_genre_results(top_genres)
|
143 |
primary_genre = top_genres[0][0]
|
144 |
|
145 |
-
#
|
146 |
-
if
|
147 |
-
|
148 |
-
time_signature = "4/4"
|
149 |
-
else:
|
150 |
-
# Ensure time signature is one of the supported ones (4/4, 3/4, 6/8)
|
151 |
-
if time_signature not in ["4/4", "3/4", "6/8"]:
|
152 |
-
time_signature = "4/4" # Default to 4/4 if unsupported
|
153 |
-
music_analysis["rhythm_analysis"]["estimated_time_signature"] = time_signature
|
154 |
|
155 |
# Analyze beat patterns and create lyrics template using the time signature
|
156 |
beat_analysis = beat_analyzer.analyze_beat_pattern(audio_file, time_signature=time_signature, auto_detect=False)
|
|
|
98 |
# Basic audio information
|
99 |
duration = extract_audio_duration(y, sr)
|
100 |
|
101 |
+
# Detect time signature using BeatAnalyzer
|
102 |
time_sig_result = beat_analyzer.detect_time_signature(audio_file)
|
103 |
time_signature = time_sig_result["time_signature"]
|
104 |
|
105 |
# Analyze music with MusicAnalyzer for emotion and theme analysis
|
106 |
music_analysis = music_analyzer.analyze_music(audio_file)
|
107 |
|
|
|
|
|
|
|
108 |
# Extract key information
|
109 |
tempo = music_analysis["rhythm_analysis"]["tempo"]
|
110 |
emotion = music_analysis["emotion_analysis"]["primary_emotion"]
|
|
|
139 |
genre_results_text = format_genre_results(top_genres)
|
140 |
primary_genre = top_genres[0][0]
|
141 |
|
142 |
+
# Ensure time signature is one of the supported ones (4/4, 3/4, 6/8)
|
143 |
+
if time_signature not in ["4/4", "3/4", "6/8"]:
|
144 |
+
time_signature = "4/4" # Default to 4/4 if unsupported
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
# Analyze beat patterns and create lyrics template using the time signature
|
147 |
beat_analysis = beat_analyzer.analyze_beat_pattern(audio_file, time_signature=time_signature, auto_detect=False)
|
emotionanalysis.py
CHANGED
@@ -2,45 +2,46 @@ import librosa
|
|
2 |
import numpy as np
|
3 |
from scipy import signal
|
4 |
from collections import Counter
|
|
|
|
|
5 |
try:
|
6 |
import matplotlib.pyplot as plt
|
7 |
except ImportError:
|
8 |
plt = None
|
9 |
-
from scipy.stats import mode
|
10 |
-
import warnings
|
11 |
-
warnings.filterwarnings('ignore') # Suppress librosa warnings
|
12 |
-
from beat_analysis import BeatAnalyzer # Import BeatAnalyzer for rhythm analysis
|
13 |
|
14 |
class MusicAnalyzer:
|
15 |
def __init__(self):
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
'
|
22 |
-
'
|
23 |
-
'
|
24 |
-
'
|
25 |
-
'
|
26 |
-
'
|
27 |
}
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
'
|
32 |
-
'
|
33 |
-
'
|
34 |
-
'
|
35 |
-
'
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
}
|
38 |
-
|
39 |
-
# Musical key mapping
|
40 |
self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
41 |
|
42 |
def load_audio(self, file_path, sr=22050, duration=None):
|
43 |
-
"""Load audio file and return time series and sample rate"""
|
44 |
try:
|
45 |
y, sr = librosa.load(file_path, sr=sr, duration=duration)
|
46 |
return y, sr
|
@@ -49,102 +50,50 @@ class MusicAnalyzer:
|
|
49 |
return None, None
|
50 |
|
51 |
def analyze_rhythm(self, y, sr):
|
52 |
-
"""Analyze rhythm-related features: tempo, beats, time signature"""
|
53 |
-
# Tempo and beat detection
|
54 |
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
55 |
tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
|
56 |
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
|
57 |
-
|
58 |
-
# Beat intervals and regularity
|
59 |
beat_intervals = np.diff(beat_times) if len(beat_times) > 1 else np.array([0])
|
60 |
beat_regularity = 1.0 / np.std(beat_intervals) if len(beat_intervals) > 0 and np.std(beat_intervals) > 0 else 0
|
61 |
-
|
62 |
-
# Rhythm pattern analysis through autocorrelation
|
63 |
ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
|
64 |
ac = librosa.util.normalize(ac, norm=np.inf)
|
65 |
-
|
66 |
-
# Use BeatAnalyzer for advanced time signature detection
|
67 |
-
# We need to save the audio temporarily to use the BeatAnalyzer method
|
68 |
-
import tempfile
|
69 |
-
import soundfile as sf
|
70 |
-
|
71 |
-
# Create a temporary file
|
72 |
-
with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as temp_file:
|
73 |
-
sf.write(temp_file.name, y, sr)
|
74 |
-
# Use BeatAnalyzer's advanced time signature detection
|
75 |
-
time_sig_result = self.beat_analyzer.detect_time_signature(temp_file.name)
|
76 |
-
|
77 |
-
# Extract results from the time signature detection
|
78 |
-
estimated_signature = time_sig_result["time_signature"]
|
79 |
-
time_sig_confidence = time_sig_result["confidence"]
|
80 |
-
|
81 |
-
# Compute onset strength to get a measure of rhythm intensity
|
82 |
rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0
|
83 |
-
|
84 |
-
# Rhythm complexity based on variation in onset strength
|
85 |
rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0
|
86 |
-
|
87 |
-
# Convert numpy arrays to regular Python types for JSON serialization
|
88 |
beat_times_list = [float(t) for t in beat_times.tolist()]
|
89 |
beat_intervals_list = [float(i) for i in beat_intervals.tolist()]
|
90 |
-
|
91 |
return {
|
92 |
"tempo": float(tempo),
|
93 |
"beat_times": beat_times_list,
|
94 |
"beat_intervals": beat_intervals_list,
|
95 |
"beat_regularity": float(beat_regularity),
|
96 |
"rhythm_intensity": float(rhythm_intensity),
|
97 |
-
"rhythm_complexity": float(rhythm_complexity)
|
98 |
-
"estimated_time_signature": estimated_signature,
|
99 |
-
"time_signature_confidence": float(time_sig_confidence),
|
100 |
-
"time_signature_candidates": time_sig_result.get("all_candidates", {})
|
101 |
}
|
102 |
|
103 |
def analyze_tonality(self, y, sr):
|
104 |
-
"""Analyze tonal features: key, mode, harmonic features"""
|
105 |
-
# Compute chromagram
|
106 |
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
107 |
-
|
108 |
-
# Krumhansl-Schmuckler key-finding algorithm (simplified)
|
109 |
-
# Major and minor profiles from music theory research
|
110 |
major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
|
111 |
minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
|
112 |
-
|
113 |
-
# Calculate the correlation of the chroma with each key profile
|
114 |
chroma_avg = np.mean(chroma, axis=1)
|
115 |
major_corr = np.zeros(12)
|
116 |
minor_corr = np.zeros(12)
|
117 |
-
|
118 |
for i in range(12):
|
119 |
major_corr[i] = np.corrcoef(np.roll(chroma_avg, i), major_profile)[0, 1]
|
120 |
minor_corr[i] = np.corrcoef(np.roll(chroma_avg, i), minor_profile)[0, 1]
|
121 |
-
|
122 |
-
# Find the key with the highest correlation
|
123 |
max_major_idx = np.argmax(major_corr)
|
124 |
max_minor_idx = np.argmax(minor_corr)
|
125 |
-
|
126 |
-
# Determine if the piece is in a major or minor key
|
127 |
if major_corr[max_major_idx] > minor_corr[max_minor_idx]:
|
128 |
mode = "major"
|
129 |
key = self.key_names[max_major_idx]
|
130 |
else:
|
131 |
mode = "minor"
|
132 |
key = self.key_names[max_minor_idx]
|
133 |
-
|
134 |
-
# Calculate harmony complexity (variability in harmonic content)
|
135 |
harmony_complexity = np.std(chroma) / np.mean(chroma) if np.mean(chroma) > 0 else 0
|
136 |
-
|
137 |
-
# Calculate tonal stability (consistency of tonal center)
|
138 |
-
tonal_stability = 1.0 / (np.std(chroma_avg) + 0.001) # Add small value to avoid division by zero
|
139 |
-
|
140 |
-
# Calculate spectral brightness (center of mass of the spectrum)
|
141 |
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
|
142 |
-
brightness = np.mean(spectral_centroid) / (sr/2)
|
143 |
-
|
144 |
-
# Calculate dissonance using spectral contrast
|
145 |
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
|
146 |
-
dissonance = np.mean(spectral_contrast[0])
|
147 |
-
|
148 |
return {
|
149 |
"key": key,
|
150 |
"mode": mode,
|
@@ -156,33 +105,22 @@ class MusicAnalyzer:
|
|
156 |
}
|
157 |
|
158 |
def analyze_energy(self, y, sr):
|
159 |
-
"""Analyze energy characteristics of the audio"""
|
160 |
-
# RMS Energy (overall loudness)
|
161 |
rms = librosa.feature.rms(y=y)[0]
|
162 |
-
|
163 |
-
# Energy metrics
|
164 |
mean_energy = np.mean(rms)
|
165 |
energy_std = np.std(rms)
|
166 |
energy_dynamic_range = np.max(rms) - np.min(rms) if len(rms) > 0 else 0
|
167 |
-
|
168 |
-
# Energy distribution across frequency ranges
|
169 |
spec = np.abs(librosa.stft(y))
|
170 |
-
|
171 |
-
# Divide the spectrum into low, mid, and high ranges
|
172 |
freq_bins = spec.shape[0]
|
173 |
-
low_freq_energy = np.mean(spec[:int(freq_bins*0.2), :])
|
174 |
-
mid_freq_energy = np.mean(spec[int(freq_bins*0.2):int(freq_bins*0.8), :])
|
175 |
-
high_freq_energy = np.mean(spec[int(freq_bins*0.8):, :])
|
176 |
-
|
177 |
-
# Normalize to create a distribution
|
178 |
total_energy = low_freq_energy + mid_freq_energy + high_freq_energy
|
179 |
if total_energy > 0:
|
180 |
low_freq_ratio = low_freq_energy / total_energy
|
181 |
mid_freq_ratio = mid_freq_energy / total_energy
|
182 |
high_freq_ratio = high_freq_energy / total_energy
|
183 |
else:
|
184 |
-
low_freq_ratio = mid_freq_ratio = high_freq_ratio = 1/3
|
185 |
-
|
186 |
return {
|
187 |
"mean_energy": float(mean_energy),
|
188 |
"energy_std": float(energy_std),
|
@@ -194,160 +132,86 @@ class MusicAnalyzer:
|
|
194 |
}
|
195 |
}
|
196 |
|
197 |
-
def
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
emotion_scores = {}
|
207 |
-
for emotion,
|
208 |
-
|
209 |
-
|
210 |
-
# Tempo contribution (0-1 score)
|
211 |
-
tempo_range = profile["tempo"]
|
212 |
-
if tempo_range[0] <= tempo <= tempo_range[1]:
|
213 |
-
score += 1.0
|
214 |
-
else:
|
215 |
-
# Partial score based on distance
|
216 |
-
distance = min(abs(tempo - tempo_range[0]), abs(tempo - tempo_range[1]))
|
217 |
-
max_distance = 40 # Maximum distance to consider
|
218 |
-
score += max(0, 1 - (distance / max_distance))
|
219 |
-
|
220 |
-
# Energy contribution (0-1 score)
|
221 |
-
energy_range = profile["energy"]
|
222 |
-
if energy_range[0] <= energy <= energy_range[1]:
|
223 |
-
score += 1.0
|
224 |
-
else:
|
225 |
-
# Partial score based on distance
|
226 |
-
distance = min(abs(energy - energy_range[0]), abs(energy - energy_range[1]))
|
227 |
-
max_distance = 0.5 # Maximum distance to consider
|
228 |
-
score += max(0, 1 - (distance / max_distance))
|
229 |
-
|
230 |
-
# Mode contribution (0-1 score)
|
231 |
-
if profile["major_mode"] is not None: # Some emotions don't have strong mode preference
|
232 |
-
score += 1.0 if profile["major_mode"] == is_major else 0.0
|
233 |
-
else:
|
234 |
-
score += 0.5 # Neutral contribution
|
235 |
-
|
236 |
-
# Brightness contribution (0-1 score)
|
237 |
-
brightness_range = profile["brightness"]
|
238 |
-
if brightness_range[0] <= brightness <= brightness_range[1]:
|
239 |
-
score += 1.0
|
240 |
-
else:
|
241 |
-
# Partial score based on distance
|
242 |
-
distance = min(abs(brightness - brightness_range[0]), abs(brightness - brightness_range[1]))
|
243 |
-
max_distance = 0.5 # Maximum distance to consider
|
244 |
-
score += max(0, 1 - (distance / max_distance))
|
245 |
-
|
246 |
-
# Normalize score (0-1 range)
|
247 |
-
emotion_scores[emotion] = score / 4.0
|
248 |
-
|
249 |
-
# Find primary emotion
|
250 |
primary_emotion = max(emotion_scores.items(), key=lambda x: x[1])
|
251 |
-
|
252 |
-
|
253 |
-
# Mapping different emotions to valence-arousal space
|
254 |
-
valence_map = {
|
255 |
-
'happy': 0.8, 'sad': 0.2, 'calm': 0.6,
|
256 |
-
'energetic': 0.7, 'tense': 0.3, 'nostalgic': 0.5
|
257 |
-
}
|
258 |
-
|
259 |
-
arousal_map = {
|
260 |
-
'happy': 0.7, 'sad': 0.3, 'calm': 0.2,
|
261 |
-
'energetic': 0.9, 'tense': 0.8, 'nostalgic': 0.4
|
262 |
-
}
|
263 |
-
|
264 |
-
# Calculate weighted valence and arousal
|
265 |
-
total_weight = sum(emotion_scores.values())
|
266 |
-
if total_weight > 0:
|
267 |
-
valence = sum(score * valence_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
|
268 |
-
arousal = sum(score * arousal_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
|
269 |
-
else:
|
270 |
-
valence = 0.5
|
271 |
-
arousal = 0.5
|
272 |
-
|
273 |
return {
|
274 |
"primary_emotion": primary_emotion[0],
|
275 |
-
"confidence": primary_emotion[1],
|
276 |
-
"emotion_scores": emotion_scores,
|
277 |
-
"valence":
|
278 |
-
"arousal":
|
|
|
279 |
}
|
280 |
|
281 |
def analyze_theme(self, rhythm_data, tonal_data, emotion_data):
|
282 |
-
|
283 |
-
|
284 |
-
primary_emotion = emotion_data["primary_emotion"]
|
285 |
-
harmony_complexity = tonal_data["harmony_complexity"]
|
286 |
-
|
287 |
-
# Calculate theme scores
|
288 |
theme_scores = {}
|
289 |
-
for theme,
|
290 |
score = 0.0
|
291 |
-
|
292 |
-
|
293 |
-
if
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
for emotion in secondary_emotions:
|
302 |
-
if emotion in profile["emotion"]:
|
303 |
-
score += 0.3 # Less weight than primary emotion
|
304 |
-
|
305 |
-
# Harmony complexity contribution
|
306 |
-
complexity_range = profile["harmony_complexity"]
|
307 |
-
if complexity_range[0] <= harmony_complexity <= complexity_range[1]:
|
308 |
-
score += 1.0
|
309 |
-
else:
|
310 |
-
# Partial score based on distance
|
311 |
-
distance = min(abs(harmony_complexity - complexity_range[0]),
|
312 |
-
abs(harmony_complexity - complexity_range[1]))
|
313 |
-
max_distance = 0.5 # Maximum distance to consider
|
314 |
-
score += max(0, 1 - (distance / max_distance))
|
315 |
-
|
316 |
-
# Normalize score
|
317 |
-
theme_scores[theme] = min(1.0, score / 2.5)
|
318 |
-
|
319 |
-
# Find primary theme
|
320 |
primary_theme = max(theme_scores.items(), key=lambda x: x[1])
|
321 |
-
|
322 |
-
|
323 |
-
secondary_themes = [(theme, score) for theme, score in theme_scores.items()
|
324 |
-
if score > 0.5 and theme != primary_theme[0]]
|
325 |
-
secondary_themes.sort(key=lambda x: x[1], reverse=True)
|
326 |
-
|
327 |
return {
|
328 |
"primary_theme": primary_theme[0],
|
329 |
"confidence": primary_theme[1],
|
330 |
-
"secondary_themes":
|
331 |
"theme_scores": theme_scores
|
332 |
}
|
333 |
|
334 |
def analyze_music(self, file_path):
|
335 |
-
"""Main function to perform comprehensive music analysis"""
|
336 |
-
# Load the audio file
|
337 |
y, sr = self.load_audio(file_path)
|
338 |
if y is None:
|
339 |
return {"error": "Failed to load audio file"}
|
340 |
-
|
341 |
-
# Run all analyses
|
342 |
rhythm_data = self.analyze_rhythm(y, sr)
|
343 |
tonal_data = self.analyze_tonality(y, sr)
|
344 |
energy_data = self.analyze_energy(y, sr)
|
345 |
-
|
346 |
-
# Higher-level analyses that depend on the basic features
|
347 |
emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
|
348 |
theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)
|
349 |
-
|
350 |
-
# Convert any remaining numpy values to native Python types
|
351 |
def convert_numpy_to_python(obj):
|
352 |
if isinstance(obj, dict):
|
353 |
return {k: convert_numpy_to_python(v) for k, v in obj.items()}
|
@@ -359,15 +223,11 @@ class MusicAnalyzer:
|
|
359 |
return float(obj)
|
360 |
else:
|
361 |
return obj
|
362 |
-
|
363 |
-
# Ensure all numpy values are converted
|
364 |
rhythm_data = convert_numpy_to_python(rhythm_data)
|
365 |
tonal_data = convert_numpy_to_python(tonal_data)
|
366 |
energy_data = convert_numpy_to_python(energy_data)
|
367 |
emotion_data = convert_numpy_to_python(emotion_data)
|
368 |
theme_data = convert_numpy_to_python(theme_data)
|
369 |
-
|
370 |
-
# Combine all results
|
371 |
return {
|
372 |
"file": file_path,
|
373 |
"rhythm_analysis": rhythm_data,
|
@@ -377,83 +237,11 @@ class MusicAnalyzer:
|
|
377 |
"theme_analysis": theme_data,
|
378 |
"summary": {
|
379 |
"tempo": float(rhythm_data["tempo"]),
|
380 |
-
"time_signature": rhythm_data["estimated_time_signature"],
|
381 |
-
"key": tonal_data["key"],
|
382 |
-
"mode": tonal_data["mode"],
|
383 |
"primary_emotion": emotion_data["primary_emotion"],
|
384 |
"primary_theme": theme_data["primary_theme"]
|
385 |
}
|
386 |
}
|
387 |
|
388 |
-
# def visualize_analysis(self, file_path):
|
389 |
-
# """Create visualizations for the music analysis results"""
|
390 |
-
# # Check if matplotlib is available
|
391 |
-
# if plt is None:
|
392 |
-
# print("Error: matplotlib is not installed. Visualization is not available.")
|
393 |
-
# return
|
394 |
-
#
|
395 |
-
# # Load audio and run analysis
|
396 |
-
# y, sr = self.load_audio(file_path)
|
397 |
-
# if y is None:
|
398 |
-
# print("Error: Failed to load audio file")
|
399 |
-
# return
|
400 |
-
#
|
401 |
-
# results = self.analyze_music(file_path)
|
402 |
-
#
|
403 |
-
# # Create visualization
|
404 |
-
# plt.figure(figsize=(15, 12))
|
405 |
-
|
406 |
-
# # Waveform
|
407 |
-
# plt.subplot(3, 2, 1)
|
408 |
-
# librosa.display.waveshow(y, sr=sr, alpha=0.6)
|
409 |
-
# plt.title(f'Waveform (Tempo: {results["rhythm_analysis"]["tempo"]:.1f} BPM)')
|
410 |
-
|
411 |
-
# # Spectrogram
|
412 |
-
# plt.subplot(3, 2, 2)
|
413 |
-
# D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
|
414 |
-
# librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
415 |
-
# plt.colorbar(format='%+2.0f dB')
|
416 |
-
# plt.title(f'Spectrogram (Key: {results["tonal_analysis"]["key"]} {results["tonal_analysis"]["mode"]})')
|
417 |
-
|
418 |
-
# # Chromagram
|
419 |
-
# plt.subplot(3, 2, 3)
|
420 |
-
# chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
421 |
-
# librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
|
422 |
-
# plt.colorbar()
|
423 |
-
# plt.title('Chromagram')
|
424 |
-
|
425 |
-
# # Onset strength and beats
|
426 |
-
# plt.subplot(3, 2, 4)
|
427 |
-
# onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
428 |
-
# times = librosa.times_like(onset_env, sr=sr)
|
429 |
-
# plt.plot(times, librosa.util.normalize(onset_env), label='Onset strength')
|
430 |
-
# plt.vlines(results["rhythm_analysis"]["beat_times"], 0, 1, alpha=0.5, color='r',
|
431 |
-
# linestyle='--', label='Beats')
|
432 |
-
# plt.legend()
|
433 |
-
# plt.title('Rhythm Analysis')
|
434 |
-
|
435 |
-
# # Emotion scores
|
436 |
-
# plt.subplot(3, 2, 5)
|
437 |
-
# emotions = list(results["emotion_analysis"]["emotion_scores"].keys())
|
438 |
-
# scores = list(results["emotion_analysis"]["emotion_scores"].values())
|
439 |
-
# plt.bar(emotions, scores, color='skyblue')
|
440 |
-
# plt.ylim(0, 1)
|
441 |
-
# plt.title(f'Emotion Analysis (Primary: {results["emotion_analysis"]["primary_emotion"]})')
|
442 |
-
# plt.xticks(rotation=45)
|
443 |
-
|
444 |
-
# # Theme scores
|
445 |
-
# plt.subplot(3, 2, 6)
|
446 |
-
# themes = list(results["theme_analysis"]["theme_scores"].keys())
|
447 |
-
# scores = list(results["theme_analysis"]["theme_scores"].values())
|
448 |
-
# plt.bar(themes, scores, color='lightgreen')
|
449 |
-
# plt.ylim(0, 1)
|
450 |
-
# plt.title(f'Theme Analysis (Primary: {results["theme_analysis"]["primary_theme"]})')
|
451 |
-
# plt.xticks(rotation=45)
|
452 |
-
|
453 |
-
# plt.tight_layout()
|
454 |
-
# plt.show()
|
455 |
-
|
456 |
-
|
457 |
# Create an instance of the analyzer
|
458 |
analyzer = MusicAnalyzer()
|
459 |
|
@@ -469,15 +257,10 @@ if __name__ == "__main__":
|
|
469 |
# Print analysis summary
|
470 |
print("\n=== MUSIC ANALYSIS SUMMARY ===")
|
471 |
print(f"Tempo: {results['summary']['tempo']:.1f} BPM")
|
472 |
-
print(f"Time Signature: {results['summary']['time_signature']}")
|
473 |
-
print(f"Key: {results['summary']['key']} {results['summary']['mode']}")
|
474 |
print(f"Primary Emotion: {results['summary']['primary_emotion']}")
|
475 |
print(f"Primary Theme: {results['summary']['primary_theme']}")
|
476 |
|
477 |
# Show detailed results (optional)
|
478 |
import json
|
479 |
print("\n=== DETAILED ANALYSIS ===")
|
480 |
-
print(json.dumps(results, indent=2))
|
481 |
-
|
482 |
-
# Visualize the analysis
|
483 |
-
# analyzer.visualize_analysis(demo_file)
|
|
|
2 |
import numpy as np
|
3 |
from scipy import signal
|
4 |
from collections import Counter
|
5 |
+
import warnings
|
6 |
+
warnings.filterwarnings('ignore') # Suppress librosa warnings
|
7 |
try:
|
8 |
import matplotlib.pyplot as plt
|
9 |
except ImportError:
|
10 |
plt = None
|
|
|
|
|
|
|
|
|
11 |
|
12 |
class MusicAnalyzer:
|
13 |
def __init__(self):
|
14 |
+
# Scientifically grounded emotion classes (valence, arousal space)
|
15 |
+
# See: Eerola & Vuoskoski, 2011; Russell, 1980
|
16 |
+
self.emotion_classes = {
|
17 |
+
'happy': {'valence': 0.9, 'arousal': 0.7},
|
18 |
+
'excited': {'valence': 0.8, 'arousal': 0.95},
|
19 |
+
'tender': {'valence': 0.7, 'arousal': 0.3},
|
20 |
+
'calm': {'valence': 0.65, 'arousal': 0.15},
|
21 |
+
'sad': {'valence': 0.2, 'arousal': 0.25},
|
22 |
+
'depressed': {'valence': 0.05, 'arousal': 0.05},
|
23 |
+
'angry': {'valence': 0.1, 'arousal': 0.8},
|
24 |
+
'fearful': {'valence': 0.05, 'arousal': 0.95}
|
25 |
}
|
26 |
+
# Theme classes based on emotion clusters (from Allan, 2014, with mapping)
|
27 |
+
self.theme_classes = {
|
28 |
+
'love': ['tender', 'calm', 'happy'],
|
29 |
+
'triumph': ['excited', 'happy', 'angry'],
|
30 |
+
'loss': ['sad', 'depressed'],
|
31 |
+
'adventure': ['excited', 'fearful'],
|
32 |
+
'reflection': ['calm', 'sad'],
|
33 |
+
'conflict': ['angry', 'fearful']
|
34 |
+
}
|
35 |
+
self.feature_weights = {
|
36 |
+
'mode': 0.25,
|
37 |
+
'tempo': 0.2,
|
38 |
+
'energy': 0.2,
|
39 |
+
'brightness': 0.2,
|
40 |
+
'rhythm_complexity': 0.15
|
41 |
}
|
|
|
|
|
42 |
self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
43 |
|
44 |
def load_audio(self, file_path, sr=22050, duration=None):
|
|
|
45 |
try:
|
46 |
y, sr = librosa.load(file_path, sr=sr, duration=duration)
|
47 |
return y, sr
|
|
|
50 |
return None, None
|
51 |
|
52 |
def analyze_rhythm(self, y, sr):
|
|
|
|
|
53 |
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
54 |
tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
|
55 |
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
|
|
|
|
|
56 |
beat_intervals = np.diff(beat_times) if len(beat_times) > 1 else np.array([0])
|
57 |
beat_regularity = 1.0 / np.std(beat_intervals) if len(beat_intervals) > 0 and np.std(beat_intervals) > 0 else 0
|
|
|
|
|
58 |
ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
|
59 |
ac = librosa.util.normalize(ac, norm=np.inf)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0
|
|
|
|
|
61 |
rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0
|
|
|
|
|
62 |
beat_times_list = [float(t) for t in beat_times.tolist()]
|
63 |
beat_intervals_list = [float(i) for i in beat_intervals.tolist()]
|
|
|
64 |
return {
|
65 |
"tempo": float(tempo),
|
66 |
"beat_times": beat_times_list,
|
67 |
"beat_intervals": beat_intervals_list,
|
68 |
"beat_regularity": float(beat_regularity),
|
69 |
"rhythm_intensity": float(rhythm_intensity),
|
70 |
+
"rhythm_complexity": float(rhythm_complexity)
|
|
|
|
|
|
|
71 |
}
|
72 |
|
73 |
def analyze_tonality(self, y, sr):
|
|
|
|
|
74 |
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
|
|
|
|
|
|
75 |
major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
|
76 |
minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
|
|
|
|
|
77 |
chroma_avg = np.mean(chroma, axis=1)
|
78 |
major_corr = np.zeros(12)
|
79 |
minor_corr = np.zeros(12)
|
|
|
80 |
for i in range(12):
|
81 |
major_corr[i] = np.corrcoef(np.roll(chroma_avg, i), major_profile)[0, 1]
|
82 |
minor_corr[i] = np.corrcoef(np.roll(chroma_avg, i), minor_profile)[0, 1]
|
|
|
|
|
83 |
max_major_idx = np.argmax(major_corr)
|
84 |
max_minor_idx = np.argmax(minor_corr)
|
|
|
|
|
85 |
if major_corr[max_major_idx] > minor_corr[max_minor_idx]:
|
86 |
mode = "major"
|
87 |
key = self.key_names[max_major_idx]
|
88 |
else:
|
89 |
mode = "minor"
|
90 |
key = self.key_names[max_minor_idx]
|
|
|
|
|
91 |
harmony_complexity = np.std(chroma) / np.mean(chroma) if np.mean(chroma) > 0 else 0
|
92 |
+
tonal_stability = 1.0 / (np.std(chroma_avg) + 0.001)
|
|
|
|
|
|
|
|
|
93 |
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
|
94 |
+
brightness = np.mean(spectral_centroid) / (sr / 2)
|
|
|
|
|
95 |
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
|
96 |
+
dissonance = np.mean(spectral_contrast[0])
|
|
|
97 |
return {
|
98 |
"key": key,
|
99 |
"mode": mode,
|
|
|
105 |
}
|
106 |
|
107 |
def analyze_energy(self, y, sr):
|
|
|
|
|
108 |
rms = librosa.feature.rms(y=y)[0]
|
|
|
|
|
109 |
mean_energy = np.mean(rms)
|
110 |
energy_std = np.std(rms)
|
111 |
energy_dynamic_range = np.max(rms) - np.min(rms) if len(rms) > 0 else 0
|
|
|
|
|
112 |
spec = np.abs(librosa.stft(y))
|
|
|
|
|
113 |
freq_bins = spec.shape[0]
|
114 |
+
low_freq_energy = np.mean(spec[:int(freq_bins * 0.2), :])
|
115 |
+
mid_freq_energy = np.mean(spec[int(freq_bins * 0.2):int(freq_bins * 0.8), :])
|
116 |
+
high_freq_energy = np.mean(spec[int(freq_bins * 0.8):, :])
|
|
|
|
|
117 |
total_energy = low_freq_energy + mid_freq_energy + high_freq_energy
|
118 |
if total_energy > 0:
|
119 |
low_freq_ratio = low_freq_energy / total_energy
|
120 |
mid_freq_ratio = mid_freq_energy / total_energy
|
121 |
high_freq_ratio = high_freq_energy / total_energy
|
122 |
else:
|
123 |
+
low_freq_ratio = mid_freq_ratio = high_freq_ratio = 1 / 3
|
|
|
124 |
return {
|
125 |
"mean_energy": float(mean_energy),
|
126 |
"energy_std": float(energy_std),
|
|
|
132 |
}
|
133 |
}
|
134 |
|
135 |
+
def feature_to_valence_arousal(self, features):
|
136 |
+
# Normalize features to [0, 1]
|
137 |
+
tempo_norm = np.clip((features['tempo'] - 40) / (200 - 40), 0, 1)
|
138 |
+
energy_norm = np.clip(features['energy'] / 1.0, 0, 1)
|
139 |
+
brightness_norm = np.clip(features['brightness'] / 1.0, 0, 1)
|
140 |
+
rhythm_complexity_norm = np.clip(features['rhythm_complexity'] / 2.0, 0, 1)
|
141 |
+
valence = (
|
142 |
+
self.feature_weights['mode'] * (1.0 if features['is_major'] else 0.0) +
|
143 |
+
self.feature_weights['tempo'] * tempo_norm +
|
144 |
+
self.feature_weights['energy'] * energy_norm +
|
145 |
+
self.feature_weights['brightness'] * brightness_norm
|
146 |
+
)
|
147 |
+
arousal = (
|
148 |
+
self.feature_weights['tempo'] * tempo_norm +
|
149 |
+
self.feature_weights['energy'] * energy_norm +
|
150 |
+
self.feature_weights['brightness'] * brightness_norm +
|
151 |
+
self.feature_weights['rhythm_complexity'] * rhythm_complexity_norm
|
152 |
+
)
|
153 |
+
return float(np.clip(valence, 0, 1)), float(np.clip(arousal, 0, 1))
|
154 |
|
155 |
+
def analyze_emotion(self, rhythm_data, tonal_data, energy_data):
|
156 |
+
features = {
|
157 |
+
'tempo': rhythm_data['tempo'],
|
158 |
+
'energy': energy_data['mean_energy'],
|
159 |
+
'is_major': tonal_data['is_major'],
|
160 |
+
'brightness': tonal_data['brightness'],
|
161 |
+
'rhythm_complexity': rhythm_data['rhythm_complexity']
|
162 |
+
}
|
163 |
+
valence, arousal = self.feature_to_valence_arousal(features)
|
164 |
emotion_scores = {}
|
165 |
+
for emotion, va in self.emotion_classes.items():
|
166 |
+
dist = np.sqrt((valence - va['valence']) ** 2 + (arousal - va['arousal']) ** 2)
|
167 |
+
emotion_scores[emotion] = 1.0 - dist # Higher = closer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
primary_emotion = max(emotion_scores.items(), key=lambda x: x[1])
|
169 |
+
sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True)
|
170 |
+
secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
return {
|
172 |
"primary_emotion": primary_emotion[0],
|
173 |
+
"confidence": float(primary_emotion[1]),
|
174 |
+
"emotion_scores": {k: float(v) for k, v in emotion_scores.items()},
|
175 |
+
"valence": valence,
|
176 |
+
"arousal": arousal,
|
177 |
+
"secondary_emotion": secondary_emotion
|
178 |
}
|
179 |
|
180 |
def analyze_theme(self, rhythm_data, tonal_data, emotion_data):
|
181 |
+
primary_emotion = emotion_data['primary_emotion']
|
182 |
+
secondary_emotion = emotion_data.get('secondary_emotion')
|
|
|
|
|
|
|
|
|
183 |
theme_scores = {}
|
184 |
+
for theme, emolist in self.theme_classes.items():
|
185 |
score = 0.0
|
186 |
+
if primary_emotion in emolist:
|
187 |
+
score += 0.7
|
188 |
+
if secondary_emotion in emolist:
|
189 |
+
score += 0.3
|
190 |
+
harmony_complexity = tonal_data.get('harmony_complexity', 0.5)
|
191 |
+
if theme in ['adventure', 'conflict']:
|
192 |
+
score += 0.3 * np.clip((harmony_complexity - 0.4) / 0.6, 0, 1)
|
193 |
+
elif theme in ['love', 'reflection']:
|
194 |
+
score += 0.3 * np.clip((0.6 - harmony_complexity) / 0.6, 0, 1)
|
195 |
+
theme_scores[theme] = float(np.clip(score, 0, 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
primary_theme = max(theme_scores.items(), key=lambda x: x[1])
|
197 |
+
secondary_themes = [k for k, v in sorted(theme_scores.items(), key=lambda x: x[1], reverse=True)
|
198 |
+
if k != primary_theme[0] and v > 0.5]
|
|
|
|
|
|
|
|
|
199 |
return {
|
200 |
"primary_theme": primary_theme[0],
|
201 |
"confidence": primary_theme[1],
|
202 |
+
"secondary_themes": secondary_themes[:2],
|
203 |
"theme_scores": theme_scores
|
204 |
}
|
205 |
|
206 |
def analyze_music(self, file_path):
|
|
|
|
|
207 |
y, sr = self.load_audio(file_path)
|
208 |
if y is None:
|
209 |
return {"error": "Failed to load audio file"}
|
|
|
|
|
210 |
rhythm_data = self.analyze_rhythm(y, sr)
|
211 |
tonal_data = self.analyze_tonality(y, sr)
|
212 |
energy_data = self.analyze_energy(y, sr)
|
|
|
|
|
213 |
emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
|
214 |
theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)
|
|
|
|
|
215 |
def convert_numpy_to_python(obj):
|
216 |
if isinstance(obj, dict):
|
217 |
return {k: convert_numpy_to_python(v) for k, v in obj.items()}
|
|
|
223 |
return float(obj)
|
224 |
else:
|
225 |
return obj
|
|
|
|
|
226 |
rhythm_data = convert_numpy_to_python(rhythm_data)
|
227 |
tonal_data = convert_numpy_to_python(tonal_data)
|
228 |
energy_data = convert_numpy_to_python(energy_data)
|
229 |
emotion_data = convert_numpy_to_python(emotion_data)
|
230 |
theme_data = convert_numpy_to_python(theme_data)
|
|
|
|
|
231 |
return {
|
232 |
"file": file_path,
|
233 |
"rhythm_analysis": rhythm_data,
|
|
|
237 |
"theme_analysis": theme_data,
|
238 |
"summary": {
|
239 |
"tempo": float(rhythm_data["tempo"]),
|
|
|
|
|
|
|
240 |
"primary_emotion": emotion_data["primary_emotion"],
|
241 |
"primary_theme": theme_data["primary_theme"]
|
242 |
}
|
243 |
}
|
244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
# Create an instance of the analyzer
|
246 |
analyzer = MusicAnalyzer()
|
247 |
|
|
|
257 |
# Print analysis summary
|
258 |
print("\n=== MUSIC ANALYSIS SUMMARY ===")
|
259 |
print(f"Tempo: {results['summary']['tempo']:.1f} BPM")
|
|
|
|
|
260 |
print(f"Primary Emotion: {results['summary']['primary_emotion']}")
|
261 |
print(f"Primary Theme: {results['summary']['primary_theme']}")
|
262 |
|
263 |
# Show detailed results (optional)
|
264 |
import json
|
265 |
print("\n=== DETAILED ANALYSIS ===")
|
266 |
+
print(json.dumps(results, indent=2))
|
|
|
|
|
|