jacob-c commited on
Commit
2ef1fb1
·
1 Parent(s): 1bfb4bb
Files changed (2) hide show
  1. beat_analysis.py +411 -1
  2. emotionanalysis.py +14 -503
beat_analysis.py CHANGED
@@ -6,6 +6,7 @@ from functools import lru_cache
6
  import string
7
  from nltk.corpus import cmudict
8
  import nltk
 
9
 
10
  try:
11
  nltk.data.find('corpora/cmudict')
@@ -126,9 +127,418 @@ class BeatAnalyzer:
126
 
127
  # Ensure at least one syllable
128
  return max(count, 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- def analyze_beat_pattern(self, audio_path, sr=22050, time_signature="4/4"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  """Analyze beat patterns and stresses in music using the provided time signature."""
 
 
 
 
 
132
  # Load audio
133
  y, sr = librosa.load(audio_path, sr=sr)
134
 
 
6
  import string
7
  from nltk.corpus import cmudict
8
  import nltk
9
+ from scipy import signal
10
 
11
  try:
12
  nltk.data.find('corpora/cmudict')
 
127
 
128
  # Ensure at least one syllable
129
  return max(count, 1)
130
+
131
+ def detect_time_signature(self, audio_path, sr=22050):
132
+ """
133
+ Advanced multi-method approach to time signature detection
134
+
135
+ Args:
136
+ audio_path: Path to audio file
137
+ sr: Sample rate
138
+
139
+ Returns:
140
+ dict with detected time signature and confidence
141
+ """
142
+ # Load audio
143
+ y, sr = librosa.load(audio_path, sr=sr)
144
+
145
+ # 1. Compute onset envelope and beat positions
146
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
147
+
148
+ # Get tempo and beat frames
149
+ tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
150
+ beat_times = librosa.frames_to_time(beat_frames, sr=sr)
151
+
152
+ # Return default if not enough beats detected
153
+ if len(beat_times) < 8:
154
+ return {"time_signature": "4/4", "confidence": 0.5}
155
+
156
+ # 2. Extract beat strengths and normalize
157
+ beat_strengths = self._get_beat_strengths(y, sr, beat_times, onset_env)
158
+
159
+ # 3. Compute various time signature features using different methods
160
+ results = {}
161
+
162
+ # Method 1: Beat pattern autocorrelation
163
+ autocorr_result = self._detect_by_autocorrelation(onset_env, sr)
164
+ results["autocorrelation"] = autocorr_result
165
+
166
+ # Method 2: Beat strength pattern matching
167
+ pattern_result = self._detect_by_pattern_matching(beat_strengths)
168
+ results["pattern_matching"] = pattern_result
169
+
170
+ # Method 3: Spectral rhythmic analysis
171
+ spectral_result = self._detect_by_spectral_analysis(onset_env, sr)
172
+ results["spectral"] = spectral_result
173
+
174
+ # Method 4: Note density analysis
175
+ density_result = self._detect_by_note_density(y, sr, beat_times)
176
+ results["note_density"] = density_result
177
+
178
+ # Method 5: Tempo-based estimation
179
+ tempo_result = self._estimate_from_tempo(tempo)
180
+ results["tempo_based"] = tempo_result
181
+
182
+ # 4. Combine results with weighted voting
183
+ final_result = self._combine_detection_results(results, tempo)
184
+
185
+ return final_result
186
+
187
+ def _get_beat_strengths(self, y, sr, beat_times, onset_env):
188
+ """Extract normalized strengths at beat positions"""
189
+ # Convert beat times to frames
190
+ beat_frames = librosa.time_to_frames(beat_times, sr=sr, hop_length=512)
191
+ beat_frames = [min(f, len(onset_env)-1) for f in beat_frames]
192
+
193
+ # Get beat strengths from onset envelope
194
+ beat_strengths = np.array([onset_env[f] for f in beat_frames])
195
+
196
+ # Also look at energy and spectral flux at beat positions
197
+ hop_length = 512
198
+ frame_length = 2048
199
+
200
+ # Get energy at each beat
201
+ energy = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
202
+ beat_energy = np.array([energy[min(f, len(energy)-1)] for f in beat_frames])
203
+
204
+ # Combine onset strength with energy (weighted average)
205
+ beat_strengths = 0.7 * beat_strengths + 0.3 * beat_energy
206
+
207
+ # Normalize
208
+ if np.max(beat_strengths) > 0:
209
+ beat_strengths = beat_strengths / np.max(beat_strengths)
210
+
211
+ return beat_strengths
212
+
213
+ def _detect_by_autocorrelation(self, onset_env, sr):
214
+ """Detect meter using autocorrelation of onset strength"""
215
+ # Calculate autocorrelation of onset envelope
216
+ hop_length = 512
217
+ ac = librosa.autocorrelate(onset_env, max_size=4 * sr // hop_length)
218
+ ac = librosa.util.normalize(ac)
219
+
220
+ # Find significant peaks in autocorrelation
221
+ peaks = signal.find_peaks(ac, height=0.2, distance=sr//(8*hop_length))[0]
222
+
223
+ if len(peaks) < 2:
224
+ return {"time_signature": "4/4", "confidence": 0.4}
225
+
226
+ # Analyze peak intervals in terms of beats
227
+ peak_intervals = np.diff(peaks)
228
+
229
+ # Convert peaks to time
230
+ peak_times = peaks * hop_length / sr
231
+
232
+ # Analyze for common time signature patterns
233
+ time_sig_votes = {}
234
+
235
+ # Check if peaks match expected bar lengths
236
+ for ts, info in self.common_time_signatures.items():
237
+ beats_per_bar = info["beats_per_bar"]
238
+
239
+ # Check how well peaks match this meter
240
+ score = 0
241
+ for interval in peak_intervals:
242
+ # Check if this interval corresponds to this time signature
243
+ # Allow some tolerance around the expected value
244
+ expected = beats_per_bar * (hop_length / sr) # in seconds
245
+ tolerance = 0.25 * expected
246
+
247
+ if abs(interval * hop_length / sr - expected) < tolerance:
248
+ score += 1
249
+
250
+ if len(peak_intervals) > 0:
251
+ time_sig_votes[ts] = score / len(peak_intervals)
252
+
253
+ # Return most likely time signature
254
+ if time_sig_votes:
255
+ best_ts = max(time_sig_votes.items(), key=lambda x: x[1])
256
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
257
+
258
+ return {"time_signature": "4/4", "confidence": 0.4}
259
+
260
+ def _detect_by_pattern_matching(self, beat_strengths):
261
+ """Match beat strength patterns against known time signature patterns"""
262
+ if len(beat_strengths) < 6:
263
+ return {"time_signature": "4/4", "confidence": 0.4}
264
+
265
+ results = {}
266
+
267
+ # Try each possible time signature
268
+ for ts, info in self.common_time_signatures.items():
269
+ beats_per_bar = info["beats_per_bar"]
270
+ expected_pattern = info["beat_pattern"]
271
+
272
+ # Calculate correlation scores for overlapping segments
273
+ scores = []
274
+
275
+ # We need at least one complete pattern
276
+ if len(beat_strengths) >= beats_per_bar:
277
+ # Try different offsets to find best alignment
278
+ for offset in range(min(beats_per_bar, len(beat_strengths) - beats_per_bar + 1)):
279
+ # Calculate scores for each complete pattern
280
+ pattern_scores = []
281
+
282
+ for i in range(offset, len(beat_strengths) - beats_per_bar + 1, beats_per_bar):
283
+ segment = beat_strengths[i:i+beats_per_bar]
284
+
285
+ # If expected pattern is longer than segment, truncate it
286
+ pattern = expected_pattern[:len(segment)]
287
+
288
+ # Normalize segment and pattern
289
+ if np.std(segment) > 0 and np.std(pattern) > 0:
290
+ # Calculate correlation
291
+ corr = np.corrcoef(segment, pattern)[0, 1]
292
+ if not np.isnan(corr):
293
+ pattern_scores.append(corr)
294
+
295
+ if pattern_scores:
296
+ scores.append(np.mean(pattern_scores))
297
+
298
+ # Use the best score among different offsets
299
+ if scores:
300
+ confidence = max(scores)
301
+ results[ts] = confidence
302
+
303
+ # Find best match
304
+ if results:
305
+ best_ts = max(results.items(), key=lambda x: x[1])
306
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
307
+
308
+ # Default
309
+ return {"time_signature": "4/4", "confidence": 0.5}
310
+
311
+ def _detect_by_spectral_analysis(self, onset_env, sr):
312
+ """Analyze rhythm in frequency domain"""
313
+ # Get rhythm periodicity through Fourier Transform
314
+ # Focus on periods corresponding to typical bar lengths (1-8 seconds)
315
+ hop_length = 512
316
+
317
+ # Calculate rhythm periodicity
318
+ fft_size = 2**13 # Large enough to give good frequency resolution
319
+ S = np.abs(np.fft.rfft(onset_env, n=fft_size))
320
+
321
+ # Convert frequency to tempo in BPM
322
+ freqs = np.fft.rfftfreq(fft_size, d=hop_length/sr)
323
+ tempos = 60 * freqs
324
+
325
+ # Focus on reasonable tempo range (40-240 BPM)
326
+ tempo_mask = (tempos >= 40) & (tempos <= 240)
327
+ S_tempo = S[tempo_mask]
328
+ tempos = tempos[tempo_mask]
329
+
330
+ # Find peaks in spectrum
331
+ peaks = signal.find_peaks(S_tempo, height=np.max(S_tempo)*0.1, distance=5)[0]
332
+
333
+ if len(peaks) == 0:
334
+ return {"time_signature": "4/4", "confidence": 0.4}
335
+
336
+ # Get peak tempos and strengths
337
+ peak_tempos = tempos[peaks]
338
+ peak_strengths = S_tempo[peaks]
339
+
340
+ # Sort by strength
341
+ peak_indices = np.argsort(peak_strengths)[::-1]
342
+ peak_tempos = peak_tempos[peak_indices]
343
+ peak_strengths = peak_strengths[peak_indices]
344
+
345
+ # Analyze relationships between peaks
346
+ # For example, 3/4 typically has peaks at multiples of 3 beats
347
+ # 4/4 has peaks at multiples of 4 beats
348
+
349
+ time_sig_scores = {}
350
+
351
+ # Check relationships between top peaks
352
+ if len(peak_tempos) >= 2:
353
+ tempo_ratios = []
354
+ for i in range(len(peak_tempos)):
355
+ for j in range(i+1, len(peak_tempos)):
356
+ if peak_tempos[j] > 0:
357
+ ratio = peak_tempos[i] / peak_tempos[j]
358
+ tempo_ratios.append(ratio)
359
+
360
+ # Check for patterns indicative of different time signatures
361
+ for ts in self.common_time_signatures:
362
+ score = 0
363
+
364
+ if ts == "4/4" or ts == "6/8":
365
+ # Look for ratios close to 4 or 6
366
+ for ratio in tempo_ratios:
367
+ if abs(ratio - 4) < 0.2 or abs(ratio - 6) < 0.3:
368
+ score += 1
369
+
370
+ # Normalize score
371
+ if tempo_ratios:
372
+ time_sig_scores[ts] = min(1.0, score / len(tempo_ratios) + 0.4)
373
+
374
+ # If we have meaningful scores, return best match
375
+ if time_sig_scores:
376
+ best_ts = max(time_sig_scores.items(), key=lambda x: x[1])
377
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
378
+
379
+ # Default fallback
380
+ return {"time_signature": "4/4", "confidence": 0.4}
381
 
382
+ def _detect_by_note_density(self, y, sr, beat_times):
383
+ """Analyze note density patterns between beats"""
384
+ if len(beat_times) < 6:
385
+ return {"time_signature": "4/4", "confidence": 0.4}
386
+
387
+ # Extract note onsets (not just beats)
388
+ onset_times = librosa.onset.onset_detect(y=y, sr=sr, units='time')
389
+
390
+ if len(onset_times) < len(beat_times):
391
+ return {"time_signature": "4/4", "confidence": 0.4}
392
+
393
+ # Count onsets between consecutive beats
394
+ note_counts = []
395
+ for i in range(len(beat_times) - 1):
396
+ start = beat_times[i]
397
+ end = beat_times[i+1]
398
+
399
+ # Count onsets in this beat
400
+ count = sum(1 for t in onset_times if start <= t < end)
401
+ note_counts.append(count)
402
+
403
+ # Look for repeating patterns in the note counts
404
+ time_sig_scores = {}
405
+
406
+ for ts, info in self.common_time_signatures.items():
407
+ beats_per_bar = info["beats_per_bar"]
408
+
409
+ # Skip if we don't have enough data
410
+ if len(note_counts) < beats_per_bar:
411
+ continue
412
+
413
+ # Calculate pattern similarity for this time signature
414
+ scores = []
415
+
416
+ for offset in range(min(beats_per_bar, len(note_counts) - beats_per_bar + 1)):
417
+ similarities = []
418
+
419
+ for i in range(offset, len(note_counts) - beats_per_bar + 1, beats_per_bar):
420
+ # Get current bar pattern
421
+ pattern = note_counts[i:i+beats_per_bar]
422
+
423
+ # Compare with expected density pattern
424
+ expected = self.rhythm_density.get(ts, [1.0] * beats_per_bar)
425
+ expected = expected[:len(pattern)] # Truncate if needed
426
+
427
+ # Normalize both patterns
428
+ if sum(pattern) > 0 and sum(expected) > 0:
429
+ pattern_norm = [p/max(1, sum(pattern)) for p in pattern]
430
+ expected_norm = [e/sum(expected) for e in expected]
431
+
432
+ # Calculate similarity (1 - distance)
433
+ distance = sum(abs(p - e) for p, e in zip(pattern_norm, expected_norm)) / len(pattern)
434
+ similarity = 1 - min(1.0, distance)
435
+ similarities.append(similarity)
436
+
437
+ if similarities:
438
+ scores.append(np.mean(similarities))
439
+
440
+ # Use the best score
441
+ if scores:
442
+ time_sig_scores[ts] = max(scores)
443
+
444
+ # Return best match
445
+ if time_sig_scores:
446
+ best_ts = max(time_sig_scores.items(), key=lambda x: x[1])
447
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
448
+
449
+ # Default
450
+ return {"time_signature": "4/4", "confidence": 0.4}
451
+
452
+ def _estimate_from_tempo(self, tempo):
453
+ """Use tempo to help estimate likely time signature"""
454
+ # Statistical tendencies: slower tempos often in compound meters (6/8)
455
+ # Fast tempos favor 4/4
456
+
457
+ scores = {}
458
+
459
+ if tempo < 70:
460
+ # Slow tempos favor compound meters
461
+ scores = {
462
+ "4/4": 0.5,
463
+ "3/4": 0.4,
464
+ "6/8": 0.7
465
+ }
466
+ elif 70 <= tempo <= 120:
467
+ # Medium tempos favor 4/4, 3/4
468
+ scores = {
469
+ "4/4": 0.7,
470
+ "3/4": 0.6,
471
+ "6/8": 0.3
472
+ }
473
+ else:
474
+ # Fast tempos favor 4/4
475
+ scores = {
476
+ "4/4": 0.8,
477
+ "3/4": 0.4,
478
+ "6/8": 0.2
479
+ }
480
+
481
+ # Find best match
482
+ best_ts = max(scores.items(), key=lambda x: x[1])
483
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
484
+
485
+ def _combine_detection_results(self, results, tempo):
486
+ """Combine results from different detection methods"""
487
+ # Define weights for different methods
488
+ method_weights = {
489
+ "autocorrelation": 0.25,
490
+ "pattern_matching": 0.30,
491
+ "spectral": 0.20,
492
+ "note_density": 0.20,
493
+ "tempo_based": 0.05
494
+ }
495
+
496
+ # Prior probability (based on frequency in music)
497
+ prior_weights = {ts: info["weight"] for ts, info in self.common_time_signatures.items()}
498
+
499
+ # Combine votes
500
+ total_votes = {ts: prior_weights.get(ts, 0.1) for ts in self.common_time_signatures}
501
+
502
+ for method, result in results.items():
503
+ ts = result["time_signature"]
504
+ confidence = result["confidence"]
505
+ weight = method_weights.get(method, 0.1)
506
+
507
+ # Add weighted vote
508
+ if ts in total_votes:
509
+ total_votes[ts] += confidence * weight
510
+ else:
511
+ total_votes[ts] = confidence * weight
512
+
513
+ # Special case: disambiguate between 3/4 and 6/8
514
+ if "3/4" in total_votes and "6/8" in total_votes:
515
+ # If the two are close, use tempo to break tie
516
+ if abs(total_votes["3/4"] - total_votes["6/8"]) < 0.1:
517
+ if tempo < 100: # Slower tempo favors 6/8
518
+ total_votes["6/8"] += 0.1
519
+ else: # Faster tempo favors 3/4
520
+ total_votes["3/4"] += 0.1
521
+
522
+ # Get highest scoring time signature
523
+ best_ts = max(total_votes.items(), key=lambda x: x[1])
524
+
525
+ # Calculate confidence score (normalize to 0-1)
526
+ confidence = best_ts[1] / (sum(total_votes.values()) + 0.001)
527
+ confidence = min(0.95, max(0.4, confidence)) # Bound confidence
528
+
529
+ return {
530
+ "time_signature": best_ts[0],
531
+ "confidence": confidence,
532
+ "all_candidates": {ts: float(score) for ts, score in total_votes.items()}
533
+ }
534
+
535
+ def analyze_beat_pattern(self, audio_path, sr=22050, time_signature="4/4", auto_detect=False):
536
  """Analyze beat patterns and stresses in music using the provided time signature."""
537
+ # Auto-detect time signature if requested
538
+ if auto_detect:
539
+ time_sig_result = self.detect_time_signature(audio_path, sr)
540
+ time_signature = time_sig_result["time_signature"]
541
+
542
  # Load audio
543
  y, sr = librosa.load(audio_path, sr=sr)
544
 
emotionanalysis.py CHANGED
@@ -9,9 +9,13 @@ except ImportError:
9
  from scipy.stats import mode
10
  import warnings
11
  warnings.filterwarnings('ignore') # Suppress librosa warnings
 
12
 
13
  class MusicAnalyzer:
14
  def __init__(self):
 
 
 
15
  # Emotion feature mappings - these define characteristics of different emotions
16
  self.emotion_profiles = {
17
  'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)},
@@ -34,28 +38,6 @@ class MusicAnalyzer:
34
 
35
  # Musical key mapping
36
  self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
37
-
38
- # Common time signatures and their beat patterns with weights for prior probability
39
- # Simplified to only include 4/4, 3/4, and 6/8
40
- self.common_time_signatures = {
41
- "4/4": {"beats_per_bar": 4, "beat_pattern": [1.0, 0.2, 0.5, 0.2], "weight": 0.45},
42
- "3/4": {"beats_per_bar": 3, "beat_pattern": [1.0, 0.2, 0.3], "weight": 0.25},
43
- "6/8": {"beats_per_bar": 6, "beat_pattern": [1.0, 0.2, 0.3, 0.8, 0.2, 0.3], "weight": 0.30}
44
- }
45
-
46
- # Add common accent patterns for different time signatures
47
- self.accent_patterns = {
48
- "4/4": [[1, 0, 0, 0], [1, 0, 2, 0], [1, 0, 2, 0, 3, 0, 2, 0]],
49
- "3/4": [[1, 0, 0], [1, 0, 2]],
50
- "6/8": [[1, 0, 0, 2, 0, 0], [1, 0, 0, 2, 0, 3]]
51
- }
52
-
53
- # Expected rhythm density (relative note density per beat) for different time signatures
54
- self.rhythm_density = {
55
- "4/4": [1.0, 0.7, 0.8, 0.6],
56
- "3/4": [1.0, 0.6, 0.7],
57
- "6/8": [1.0, 0.5, 0.4, 0.8, 0.5, 0.4]
58
- }
59
 
60
  def load_audio(self, file_path, sr=22050, duration=None):
61
  """Load audio file and return time series and sample rate"""
@@ -81,8 +63,16 @@ class MusicAnalyzer:
81
  ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
82
  ac = librosa.util.normalize(ac, norm=np.inf)
83
 
84
- # Advanced time signature detection
85
- time_sig_result = self._detect_time_signature(y, sr)
 
 
 
 
 
 
 
 
86
 
87
  # Extract results from the time signature detection
88
  estimated_signature = time_sig_result["time_signature"]
@@ -110,485 +100,6 @@ class MusicAnalyzer:
110
  "time_signature_candidates": time_sig_result.get("all_candidates", {})
111
  }
112
 
113
- def _detect_time_signature(self, y, sr):
114
- """
115
- Multi-method approach to time signature detection
116
-
117
- Args:
118
- y: Audio signal
119
- sr: Sample rate
120
-
121
- Returns:
122
- dict with detected time signature and confidence
123
- """
124
- # 1. Compute onset envelope and beat positions
125
- onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
126
-
127
- # Get tempo and beat frames
128
- tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
129
- beat_times = librosa.frames_to_time(beat_frames, sr=sr)
130
-
131
- # Return default if not enough beats detected
132
- if len(beat_times) < 8:
133
- return {"time_signature": "4/4", "confidence": 0.5}
134
-
135
- # 2. Extract beat strengths and normalize
136
- beat_strengths = self._get_beat_strengths(y, sr, beat_times, onset_env)
137
-
138
- # 3. Compute various time signature features using different methods
139
- results = {}
140
-
141
- # Method 1: Beat pattern autocorrelation
142
- autocorr_result = self._detect_by_autocorrelation(onset_env, sr)
143
- results["autocorrelation"] = autocorr_result
144
-
145
- # Method 2: Beat strength pattern matching
146
- pattern_result = self._detect_by_pattern_matching(beat_strengths)
147
- results["pattern_matching"] = pattern_result
148
-
149
- # Method 3: Spectral rhythmic analysis
150
- spectral_result = self._detect_by_spectral_analysis(onset_env, sr)
151
- results["spectral"] = spectral_result
152
-
153
- # Method 4: Note density analysis
154
- density_result = self._detect_by_note_density(y, sr, beat_times)
155
- results["note_density"] = density_result
156
-
157
- # Method 5: Tempo-based estimation
158
- tempo_result = self._estimate_from_tempo(tempo)
159
- results["tempo_based"] = tempo_result
160
-
161
- # 4. Combine results with weighted voting
162
- final_result = self._combine_detection_results(results, tempo)
163
-
164
- return final_result
165
-
166
- def _get_beat_strengths(self, y, sr, beat_times, onset_env):
167
- """Extract normalized strengths at beat positions"""
168
- # Convert beat times to frames
169
- beat_frames = librosa.time_to_frames(beat_times, sr=sr, hop_length=512)
170
- beat_frames = [min(f, len(onset_env)-1) for f in beat_frames]
171
-
172
- # Get beat strengths from onset envelope
173
- beat_strengths = np.array([onset_env[f] for f in beat_frames])
174
-
175
- # Also look at energy and spectral flux at beat positions
176
- hop_length = 512
177
- frame_length = 2048
178
-
179
- # Get energy at each beat
180
- energy = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
181
- beat_energy = np.array([energy[min(f, len(energy)-1)] for f in beat_frames])
182
-
183
- # Combine onset strength with energy (weighted average)
184
- beat_strengths = 0.7 * beat_strengths + 0.3 * beat_energy
185
-
186
- # Normalize
187
- if np.max(beat_strengths) > 0:
188
- beat_strengths = beat_strengths / np.max(beat_strengths)
189
-
190
- return beat_strengths
191
-
192
- def _detect_by_autocorrelation(self, onset_env, sr):
193
- """Detect meter using autocorrelation of onset strength"""
194
- # Calculate autocorrelation of onset envelope
195
- hop_length = 512
196
- ac = librosa.autocorrelate(onset_env, max_size=4 * sr // hop_length)
197
- ac = librosa.util.normalize(ac)
198
-
199
- # Find significant peaks in autocorrelation
200
- peaks = signal.find_peaks(ac, height=0.2, distance=sr//(8*hop_length))[0]
201
-
202
- if len(peaks) < 2:
203
- return {"time_signature": "4/4", "confidence": 0.4}
204
-
205
- # Analyze peak intervals in terms of beats
206
- peak_intervals = np.diff(peaks)
207
-
208
- # Convert peaks to time
209
- peak_times = peaks * hop_length / sr
210
-
211
- # Analyze for common time signature patterns
212
- time_sig_votes = {}
213
-
214
- # Check if peaks match expected bar lengths
215
- for ts, info in self.common_time_signatures.items():
216
- beats_per_bar = info["beats_per_bar"]
217
-
218
- # Check how well peaks match this meter
219
- score = 0
220
- for interval in peak_intervals:
221
- # Check if this interval corresponds to this time signature
222
- # Allow some tolerance around the expected value
223
- expected = beats_per_bar * (hop_length / sr) # in seconds
224
- tolerance = 0.25 * expected
225
-
226
- if abs(interval * hop_length / sr - expected) < tolerance:
227
- score += 1
228
-
229
- if len(peak_intervals) > 0:
230
- time_sig_votes[ts] = score / len(peak_intervals)
231
-
232
- # Return most likely time signature
233
- if time_sig_votes:
234
- best_ts = max(time_sig_votes.items(), key=lambda x: x[1])
235
- return {"time_signature": best_ts[0], "confidence": best_ts[1]}
236
-
237
- return {"time_signature": "4/4", "confidence": 0.4}
238
-
239
- def _detect_by_pattern_matching(self, beat_strengths):
240
- """Match beat strength patterns against known time signature patterns"""
241
- if len(beat_strengths) < 6:
242
- return {"time_signature": "4/4", "confidence": 0.4}
243
-
244
- results = {}
245
-
246
- # Try each possible time signature
247
- for ts, info in self.common_time_signatures.items():
248
- beats_per_bar = info["beats_per_bar"]
249
- expected_pattern = info["beat_pattern"]
250
-
251
- # Calculate correlation scores for overlapping segments
252
- scores = []
253
-
254
- # We need at least one complete pattern
255
- if len(beat_strengths) >= beats_per_bar:
256
- # Try different offsets to find best alignment
257
- for offset in range(min(beats_per_bar, len(beat_strengths) - beats_per_bar + 1)):
258
- # Calculate scores for each complete pattern
259
- pattern_scores = []
260
-
261
- for i in range(offset, len(beat_strengths) - beats_per_bar + 1, beats_per_bar):
262
- segment = beat_strengths[i:i+beats_per_bar]
263
-
264
- # If expected pattern is longer than segment, truncate it
265
- pattern = expected_pattern[:len(segment)]
266
-
267
- # Normalize segment and pattern
268
- if np.std(segment) > 0 and np.std(pattern) > 0:
269
- # Calculate correlation
270
- corr = np.corrcoef(segment, pattern)[0, 1]
271
- if not np.isnan(corr):
272
- pattern_scores.append(corr)
273
-
274
- if pattern_scores:
275
- scores.append(np.mean(pattern_scores))
276
-
277
- # Use the best score among different offsets
278
- if scores:
279
- confidence = max(scores)
280
- results[ts] = confidence
281
-
282
- # Find best match
283
- if results:
284
- best_ts = max(results.items(), key=lambda x: x[1])
285
- return {"time_signature": best_ts[0], "confidence": best_ts[1]}
286
-
287
- # Default
288
- return {"time_signature": "4/4", "confidence": 0.5}
289
-
290
- def _detect_by_spectral_analysis(self, onset_env, sr):
291
- """Analyze rhythm in frequency domain"""
292
- # Get rhythm periodicity through Fourier Transform
293
- # Focus on periods corresponding to typical bar lengths (1-8 seconds)
294
- hop_length = 512
295
-
296
- # Calculate rhythm periodicity
297
- fft_size = 2**13 # Large enough to give good frequency resolution
298
- S = np.abs(np.fft.rfft(onset_env, n=fft_size))
299
-
300
- # Convert frequency to tempo in BPM
301
- freqs = np.fft.rfftfreq(fft_size, d=hop_length/sr)
302
- tempos = 60 * freqs
303
-
304
- # Focus on reasonable tempo range (40-240 BPM)
305
- tempo_mask = (tempos >= 40) & (tempos <= 240)
306
- S_tempo = S[tempo_mask]
307
- tempos = tempos[tempo_mask]
308
-
309
- # Find peaks in spectrum
310
- peaks = signal.find_peaks(S_tempo, height=np.max(S_tempo)*0.1, distance=5)[0]
311
-
312
- if len(peaks) == 0:
313
- return {"time_signature": "4/4", "confidence": 0.4}
314
-
315
- # Get peak tempos and strengths
316
- peak_tempos = tempos[peaks]
317
- peak_strengths = S_tempo[peaks]
318
-
319
- # Sort by strength
320
- peak_indices = np.argsort(peak_strengths)[::-1]
321
- peak_tempos = peak_tempos[peak_indices]
322
- peak_strengths = peak_strengths[peak_indices]
323
-
324
- # Analyze relationships between peaks
325
- # For example, 3/4 typically has peaks at multiples of 3 beats
326
- # 4/4 has peaks at multiples of 4 beats
327
-
328
- time_sig_scores = {}
329
-
330
- # Check relationships between top peaks
331
- if len(peak_tempos) >= 2:
332
- tempo_ratios = []
333
- for i in range(len(peak_tempos)):
334
- for j in range(i+1, len(peak_tempos)):
335
- if peak_tempos[j] > 0:
336
- ratio = peak_tempos[i] / peak_tempos[j]
337
- tempo_ratios.append(ratio)
338
-
339
- # Check for patterns indicative of different time signatures
340
- for ts in self.common_time_signatures:
341
- score = 0
342
-
343
- if ts == "4/4" or ts == "6/8":
344
- # Look for ratios close to 4 or 6
345
- for ratio in tempo_ratios:
346
- if abs(ratio - 4) < 0.2 or abs(ratio - 6) < 0.3:
347
- score += 1
348
-
349
- # Normalize score
350
- if tempo_ratios:
351
- time_sig_scores[ts] = min(1.0, score / len(tempo_ratios) + 0.4)
352
-
353
- # If we have meaningful scores, return best match
354
- if time_sig_scores:
355
- best_ts = max(time_sig_scores.items(), key=lambda x: x[1])
356
- return {"time_signature": best_ts[0], "confidence": best_ts[1]}
357
-
358
- # Default fallback
359
- return {"time_signature": "4/4", "confidence": 0.4}
360
-
361
- def _detect_by_note_density(self, y, sr, beat_times):
362
- """Analyze note density patterns between beats"""
363
- if len(beat_times) < 6:
364
- return {"time_signature": "4/4", "confidence": 0.4}
365
-
366
- # Extract note onsets (not just beats)
367
- onset_times = librosa.onset.onset_detect(y=y, sr=sr, units='time')
368
-
369
- if len(onset_times) < len(beat_times):
370
- return {"time_signature": "4/4", "confidence": 0.4}
371
-
372
- # Count onsets between consecutive beats
373
- note_counts = []
374
- for i in range(len(beat_times) - 1):
375
- start = beat_times[i]
376
- end = beat_times[i+1]
377
-
378
- # Count onsets in this beat
379
- count = sum(1 for t in onset_times if start <= t < end)
380
- note_counts.append(count)
381
-
382
- # Look for repeating patterns in the note counts
383
- time_sig_scores = {}
384
-
385
- for ts, info in self.common_time_signatures.items():
386
- beats_per_bar = info["beats_per_bar"]
387
-
388
- # Skip if we don't have enough data
389
- if len(note_counts) < beats_per_bar:
390
- continue
391
-
392
- # Calculate pattern similarity for this time signature
393
- scores = []
394
-
395
- for offset in range(min(beats_per_bar, len(note_counts) - beats_per_bar + 1)):
396
- similarities = []
397
-
398
- for i in range(offset, len(note_counts) - beats_per_bar + 1, beats_per_bar):
399
- # Get current bar pattern
400
- pattern = note_counts[i:i+beats_per_bar]
401
-
402
- # Compare with expected density pattern
403
- expected = self.rhythm_density.get(ts, [1.0] * beats_per_bar)
404
- expected = expected[:len(pattern)] # Truncate if needed
405
-
406
- # Normalize both patterns
407
- if sum(pattern) > 0 and sum(expected) > 0:
408
- pattern_norm = [p/max(1, sum(pattern)) for p in pattern]
409
- expected_norm = [e/sum(expected) for e in expected]
410
-
411
- # Calculate similarity (1 - distance)
412
- distance = sum(abs(p - e) for p, e in zip(pattern_norm, expected_norm)) / len(pattern)
413
- similarity = 1 - min(1.0, distance)
414
- similarities.append(similarity)
415
-
416
- if similarities:
417
- scores.append(np.mean(similarities))
418
-
419
- # Use the best score
420
- if scores:
421
- time_sig_scores[ts] = max(scores)
422
-
423
- # Return best match
424
- if time_sig_scores:
425
- best_ts = max(time_sig_scores.items(), key=lambda x: x[1])
426
- return {"time_signature": best_ts[0], "confidence": best_ts[1]}
427
-
428
- # Default
429
- return {"time_signature": "4/4", "confidence": 0.4}
430
-
431
- def _estimate_from_tempo(self, tempo):
432
- """Use tempo to help estimate likely time signature"""
433
- # Statistical tendencies: slower tempos often in compound meters (6/8)
434
- # Fast tempos favor 4/4
435
-
436
- scores = {}
437
-
438
- if tempo < 70:
439
- # Slow tempos favor compound meters
440
- scores = {
441
- "4/4": 0.5,
442
- "3/4": 0.4,
443
- "6/8": 0.7
444
- }
445
- elif 70 <= tempo <= 120:
446
- # Medium tempos favor 4/4, 3/4
447
- scores = {
448
- "4/4": 0.7,
449
- "3/4": 0.6,
450
- "6/8": 0.3
451
- }
452
- else:
453
- # Fast tempos favor 4/4
454
- scores = {
455
- "4/4": 0.8,
456
- "3/4": 0.4,
457
- "6/8": 0.2
458
- }
459
-
460
- # Find best match
461
- best_ts = max(scores.items(), key=lambda x: x[1])
462
- return {"time_signature": best_ts[0], "confidence": best_ts[1]}
463
-
464
- def _combine_detection_results(self, results, tempo):
465
- """Combine results from different detection methods"""
466
- # Define weights for different methods
467
- method_weights = {
468
- "autocorrelation": 0.25,
469
- "pattern_matching": 0.30,
470
- "spectral": 0.20,
471
- "note_density": 0.20,
472
- "tempo_based": 0.05
473
- }
474
-
475
- # Prior probability (based on frequency in music)
476
- prior_weights = {ts: info["weight"] for ts, info in self.common_time_signatures.items()}
477
-
478
- # Combine votes
479
- total_votes = {ts: prior_weights.get(ts, 0.1) for ts in self.common_time_signatures}
480
-
481
- for method, result in results.items():
482
- ts = result["time_signature"]
483
- confidence = result["confidence"]
484
- weight = method_weights.get(method, 0.1)
485
-
486
- # Add weighted vote
487
- if ts in total_votes:
488
- total_votes[ts] += confidence * weight
489
- else:
490
- total_votes[ts] = confidence * weight
491
-
492
- # Special case: disambiguate between 3/4 and 6/8
493
- if "3/4" in total_votes and "6/8" in total_votes:
494
- # If the two are close, use tempo to break tie
495
- if abs(total_votes["3/4"] - total_votes["6/8"]) < 0.1:
496
- if tempo < 100: # Slower tempo favors 6/8
497
- total_votes["6/8"] += 0.1
498
- else: # Faster tempo favors 3/4
499
- total_votes["3/4"] += 0.1
500
-
501
- # Get highest scoring time signature
502
- best_ts = max(total_votes.items(), key=lambda x: x[1])
503
-
504
- # Calculate confidence score (normalize to 0-1)
505
- confidence = best_ts[1] / (sum(total_votes.values()) + 0.001)
506
- confidence = min(0.95, max(0.4, confidence)) # Bound confidence
507
-
508
- return {
509
- "time_signature": best_ts[0],
510
- "confidence": confidence,
511
- "all_candidates": {ts: float(score) for ts, score in total_votes.items()}
512
- }
513
-
514
- def _evaluate_beat_pattern(self, beat_strengths, pattern_length):
515
- """
516
- Evaluate how consistently a specific pattern length fits the beat strengths
517
-
518
- Args:
519
- beat_strengths: Array of normalized beat strengths
520
- pattern_length: Length of pattern to evaluate
521
-
522
- Returns:
523
- score: How well this pattern length explains the data (0-1)
524
- """
525
- if len(beat_strengths) < pattern_length * 2:
526
- return 0.0
527
-
528
- # Calculate correlation between consecutive patterns
529
- correlations = []
530
-
531
- num_full_patterns = len(beat_strengths) // pattern_length
532
- for i in range(num_full_patterns - 1):
533
- pattern1 = beat_strengths[i*pattern_length:(i+1)*pattern_length]
534
- pattern2 = beat_strengths[(i+1)*pattern_length:(i+2)*pattern_length]
535
-
536
- # Calculate similarity between consecutive patterns
537
- if len(pattern1) == len(pattern2) and len(pattern1) > 0:
538
- corr = np.corrcoef(pattern1, pattern2)[0, 1]
539
- if not np.isnan(corr):
540
- correlations.append(corr)
541
-
542
- # Calculate variance of beat strengths within each position
543
- variance_score = 0
544
- if num_full_patterns >= 2:
545
- position_values = [[] for _ in range(pattern_length)]
546
-
547
- for i in range(num_full_patterns):
548
- for pos in range(pattern_length):
549
- idx = i * pattern_length + pos
550
- if idx < len(beat_strengths):
551
- position_values[pos].append(beat_strengths[idx])
552
-
553
- # Calculate variance ratio (higher means consistent accent patterns)
554
- between_pos_var = np.var([np.mean(vals) for vals in position_values if vals])
555
- within_pos_var = np.mean([np.var(vals) for vals in position_values if len(vals) > 1])
556
-
557
- if within_pos_var > 0:
558
- variance_score = between_pos_var / within_pos_var
559
- variance_score = min(1.0, variance_score / 2.0) # Normalize
560
-
561
- # Combine correlation and variance scores
562
- if correlations:
563
- correlation_score = np.mean(correlations)
564
- return 0.7 * correlation_score + 0.3 * variance_score
565
-
566
- return 0.5 * variance_score # Lower confidence if we couldn't calculate correlations
567
-
568
- def _extract_average_pattern(self, beat_strengths, pattern_length):
569
- """
570
- Extract the average beat pattern of specified length
571
-
572
- Args:
573
- beat_strengths: Array of beat strengths
574
- pattern_length: Length of pattern to extract
575
-
576
- Returns:
577
- Average pattern of the specified length
578
- """
579
- if len(beat_strengths) < pattern_length:
580
- return np.array([])
581
-
582
- # Number of complete patterns
583
- num_patterns = len(beat_strengths) // pattern_length
584
-
585
- if num_patterns == 0:
586
- return np.array([])
587
-
588
- # Reshape to stack patterns and calculate average
589
- patterns = beat_strengths[:num_patterns * pattern_length].reshape((num_patterns, pattern_length))
590
- return np.mean(patterns, axis=0)
591
-
592
  def analyze_tonality(self, y, sr):
593
  """Analyze tonal features: key, mode, harmonic features"""
594
  # Compute chromagram
 
9
  from scipy.stats import mode
10
  import warnings
11
  warnings.filterwarnings('ignore') # Suppress librosa warnings
12
+ from beat_analysis import BeatAnalyzer # Import BeatAnalyzer for rhythm analysis
13
 
14
  class MusicAnalyzer:
15
  def __init__(self):
16
+ # Create an instance of BeatAnalyzer for rhythm detection
17
+ self.beat_analyzer = BeatAnalyzer()
18
+
19
  # Emotion feature mappings - these define characteristics of different emotions
20
  self.emotion_profiles = {
21
  'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)},
 
38
 
39
  # Musical key mapping
40
  self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def load_audio(self, file_path, sr=22050, duration=None):
43
  """Load audio file and return time series and sample rate"""
 
63
  ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
64
  ac = librosa.util.normalize(ac, norm=np.inf)
65
 
66
+ # Use BeatAnalyzer for advanced time signature detection
67
+ # We need to save the audio temporarily to use the BeatAnalyzer method
68
+ import tempfile
69
+ import soundfile as sf
70
+
71
+ # Create a temporary file
72
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as temp_file:
73
+ sf.write(temp_file.name, y, sr)
74
+ # Use BeatAnalyzer's advanced time signature detection
75
+ time_sig_result = self.beat_analyzer.detect_time_signature(temp_file.name)
76
 
77
  # Extract results from the time signature detection
78
  estimated_signature = time_sig_result["time_signature"]
 
100
  "time_signature_candidates": time_sig_result.get("all_candidates", {})
101
  }
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def analyze_tonality(self, y, sr):
104
  """Analyze tonal features: key, mode, harmonic features"""
105
  # Compute chromagram