root commited on
Commit
1ff1aab
·
1 Parent(s): 3ef75d1
Files changed (4) hide show
  1. app.py +0 -0
  2. beat_analysis.py +854 -0
  3. emotionanalysis.py +51 -39
  4. requirements.txt +1 -0
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
beat_analysis.py ADDED
@@ -0,0 +1,854 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import pronouncing
4
+ import re
5
+ from functools import lru_cache
6
+ import string
7
+ from nltk.corpus import cmudict
8
+ import nltk
9
+ from scipy import signal
10
+
11
+ try:
12
+ nltk.data.find('corpora/cmudict')
13
+ except LookupError:
14
+ nltk.download('cmudict')
15
+
16
+ class BeatAnalyzer:
17
+ def __init__(self):
18
+ # Mapping for standard stress patterns by time signature
19
+ # Simplified to only include 4/4, 3/4, and 6/8
20
+ self.stress_patterns = {
21
+ # Format: Strong (1.0), Medium (0.5), Weak (0.0)
22
+ "4/4": [1.0, 0.0, 0.5, 0.0], # Strong, weak, medium, weak
23
+ "3/4": [1.0, 0.0, 0.0], # Strong, weak, weak
24
+ "6/8": [1.0, 0.0, 0.0, 0.5, 0.0, 0.0] # Strong, weak, weak, medium, weak, weak
25
+ }
26
+
27
+ self.cmudict = None
28
+ try:
29
+ self.cmudict = cmudict.dict()
30
+ except:
31
+ pass # Fall back to rule-based counting if cmudict is not available
32
+
33
+ # Genre-specific syllable-to-beat ratio guidelines
34
+ self.genre_syllable_ratios = {
35
+ # Supported genres with strong syllable-to-beat patterns
36
+ 'pop': (0.5, 1.0, 1.5), # Pop - significantly reduced range
37
+ 'rock': (0.5, 0.9, 1.3), # Rock - reduced for brevity
38
+ 'country': (0.6, 0.9, 1.2), # Country - simpler syllable patterns
39
+ 'disco': (0.7, 1.0, 1.3), # Disco - tightened range
40
+ 'metal': (0.6, 1.0, 1.3), # Metal - reduced upper limit
41
+
42
+ # Other genres (analysis only, no lyrics generation)
43
+ 'hiphop': (1.8, 2.5, 3.5), # Hip hop often has many syllables per beat
44
+ 'rap': (2.0, 3.0, 4.0), # Rap often has very high syllable counts
45
+ 'folk': (0.8, 1.0, 1.3), # Folk often has close to 1:1 ratio
46
+ 'jazz': (0.7, 1.0, 1.5), # Jazz can be very flexible
47
+ 'reggae': (0.7, 1.0, 1.3), # Reggae often emphasizes specific beats
48
+ 'soul': (0.8, 1.2, 1.6), # Soul music tends to be expressive
49
+ 'r&b': (1.0, 1.5, 2.0), # R&B can have melisma
50
+ 'electronic': (0.7, 1.0, 1.5), # Electronic music varies widely
51
+ 'classical': (0.7, 1.0, 1.4), # Classical can vary by subgenre
52
+ 'blues': (0.6, 0.8, 1.2), # Blues often extends syllables
53
+ 'default': (0.6, 1.0, 1.3) # Default for unknown genres - more conservative
54
+ }
55
+
56
+ # List of genres supported for lyrics generation
57
+ # These genres have the most predictable and consistent syllable-to-beat relationships,
58
+ # making them ideal for our beat-matching algorithm
59
+ self.supported_genres = ['pop', 'rock', 'country', 'disco', 'metal']
60
+
61
+ # Common time signatures and their beat patterns with weights for prior probability
62
+ # Simplified to only include 4/4, 3/4, and 6/8
63
+ self.common_time_signatures = {
64
+ "4/4": {"beats_per_bar": 4, "beat_pattern": [1.0, 0.2, 0.5, 0.2], "weight": 0.55},
65
+ "3/4": {"beats_per_bar": 3, "beat_pattern": [1.0, 0.2, 0.3], "weight": 0.30},
66
+ "6/8": {"beats_per_bar": 6, "beat_pattern": [1.0, 0.2, 0.3, 0.8, 0.2, 0.3], "weight": 0.15}
67
+ }
68
+
69
+ # Add common accent patterns for different time signatures
70
+ self.accent_patterns = {
71
+ "4/4": [[1, 0, 0, 0], [1, 0, 2, 0], [1, 0, 2, 0, 3, 0, 2, 0]],
72
+ "3/4": [[1, 0, 0], [1, 0, 2]],
73
+ "6/8": [[1, 0, 0, 2, 0, 0], [1, 0, 0, 2, 0, 3]]
74
+ }
75
+
76
+ # Expected rhythm density (relative note density per beat) for different time signatures
77
+ self.rhythm_density = {
78
+ "4/4": [1.0, 0.7, 0.8, 0.6],
79
+ "3/4": [1.0, 0.6, 0.7],
80
+ "6/8": [1.0, 0.5, 0.4, 0.8, 0.5, 0.4]
81
+ }
82
+
83
+ @lru_cache(maxsize=128)
84
+ def count_syllables(self, word):
85
+ """Count syllables in a word using CMU dictionary if available, otherwise use rule-based method."""
86
+ word = word.lower().strip()
87
+ word = re.sub(r'[^a-z]', '', word) # Remove non-alphabetic characters
88
+
89
+ if not word:
90
+ return 0
91
+
92
+ # Try using CMUDict first if available
93
+ if self.cmudict and word in self.cmudict:
94
+ return max([len(list(y for y in x if y[-1].isdigit())) for x in self.cmudict[word]])
95
+
96
+ # Rule-based syllable counting as fallback
97
+ # Modified version from NLTK's implementation
98
+ vowels = "aeiouy"
99
+ double_vowels = ['aa', 'ae', 'ai', 'ao', 'au', 'ay', 'ea', 'ee', 'ei', 'eo', 'eu', 'ey', 'ia', 'ie', 'ii', 'io', 'iu', 'oa', 'oe', 'oi', 'oo', 'ou', 'oy', 'ua', 'ue', 'ui', 'uo', 'uy']
100
+ prev_was_vowel = False
101
+ count = 0
102
+ final_e = False
103
+
104
+ if word.endswith('e') and not word.endswith('le'):
105
+ final_e = True
106
+
107
+ for i, char in enumerate(word):
108
+ if char in vowels:
109
+ # Check if current char and previous char form a dipthong
110
+ if prev_was_vowel and i > 0 and (word[i-1:i+1] in double_vowels):
111
+ prev_was_vowel = True
112
+ continue
113
+
114
+ if not prev_was_vowel:
115
+ count += 1
116
+ prev_was_vowel = True
117
+ else:
118
+ prev_was_vowel = False
119
+
120
+ # Handle edge cases
121
+ if word.endswith('le') and len(word) > 2 and word[-3] not in vowels:
122
+ count += 1
123
+ elif final_e:
124
+ count = max(count-1, 1) # Remove last 'e', but ensure at least 1 syllable
125
+ elif word.endswith('y') and not prev_was_vowel:
126
+ count += 1
127
+
128
+ # Ensure at least one syllable
129
+ return max(count, 1)
130
+
131
+ def detect_time_signature(self, audio_path, sr=22050):
132
+ """
133
+ Advanced multi-method approach to time signature detection
134
+
135
+ Args:
136
+ audio_path: Path to audio file
137
+ sr: Sample rate
138
+
139
+ Returns:
140
+ dict with detected time signature and confidence
141
+ """
142
+ # Load audio
143
+ y, sr = librosa.load(audio_path, sr=sr)
144
+
145
+ # 1. Compute onset envelope and beat positions
146
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
147
+
148
+ # Get tempo and beat frames
149
+ tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
150
+ beat_times = librosa.frames_to_time(beat_frames, sr=sr)
151
+
152
+ # Return default if not enough beats detected
153
+ if len(beat_times) < 8:
154
+ return {"time_signature": "4/4", "confidence": 0.5}
155
+
156
+ # 2. Extract beat strengths and normalize
157
+ beat_strengths = self._get_beat_strengths(y, sr, beat_times, onset_env)
158
+
159
+ # 3. Compute various time signature features using different methods
160
+ results = {}
161
+
162
+ # Method 1: Beat pattern autocorrelation
163
+ autocorr_result = self._detect_by_autocorrelation(onset_env, sr)
164
+ results["autocorrelation"] = autocorr_result
165
+
166
+ # Method 2: Beat strength pattern matching
167
+ pattern_result = self._detect_by_pattern_matching(beat_strengths)
168
+ results["pattern_matching"] = pattern_result
169
+
170
+ # Method 3: Spectral rhythmic analysis
171
+ spectral_result = self._detect_by_spectral_analysis(onset_env, sr)
172
+ results["spectral"] = spectral_result
173
+
174
+ # Method 4: Note density analysis
175
+ density_result = self._detect_by_note_density(y, sr, beat_times)
176
+ results["note_density"] = density_result
177
+
178
+ # Method 5: Tempo-based estimation
179
+ tempo_result = self._estimate_from_tempo(tempo)
180
+ results["tempo_based"] = tempo_result
181
+
182
+ # 4. Combine results with weighted voting
183
+ final_result = self._combine_detection_results(results, tempo)
184
+
185
+ return final_result
186
+
187
+ def _get_beat_strengths(self, y, sr, beat_times, onset_env):
188
+ """Extract normalized strengths at beat positions"""
189
+ # Convert beat times to frames
190
+ beat_frames = librosa.time_to_frames(beat_times, sr=sr, hop_length=512)
191
+ beat_frames = [min(f, len(onset_env)-1) for f in beat_frames]
192
+
193
+ # Get beat strengths from onset envelope
194
+ beat_strengths = np.array([onset_env[f] for f in beat_frames])
195
+
196
+ # Also look at energy and spectral flux at beat positions
197
+ hop_length = 512
198
+ frame_length = 2048
199
+
200
+ # Get energy at each beat
201
+ energy = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
202
+ beat_energy = np.array([energy[min(f, len(energy)-1)] for f in beat_frames])
203
+
204
+ # Combine onset strength with energy (weighted average)
205
+ beat_strengths = 0.7 * beat_strengths + 0.3 * beat_energy
206
+
207
+ # Normalize
208
+ if np.max(beat_strengths) > 0:
209
+ beat_strengths = beat_strengths / np.max(beat_strengths)
210
+
211
+ return beat_strengths
212
+
213
+ def _detect_by_autocorrelation(self, onset_env, sr):
214
+ """Detect meter using autocorrelation of onset strength"""
215
+ # Calculate autocorrelation of onset envelope
216
+ hop_length = 512
217
+ ac = librosa.autocorrelate(onset_env, max_size=4 * sr // hop_length)
218
+ ac = librosa.util.normalize(ac)
219
+
220
+ # Find significant peaks in autocorrelation
221
+ peaks = signal.find_peaks(ac, height=0.2, distance=sr//(8*hop_length))[0]
222
+
223
+ if len(peaks) < 2:
224
+ return {"time_signature": "4/4", "confidence": 0.4}
225
+
226
+ # Analyze peak intervals in terms of beats
227
+ peak_intervals = np.diff(peaks)
228
+
229
+ # Convert peaks to time
230
+ peak_times = peaks * hop_length / sr
231
+
232
+ # Analyze for common time signature patterns
233
+ time_sig_votes = {}
234
+
235
+ # Check if peaks match expected bar lengths
236
+ for ts, info in self.common_time_signatures.items():
237
+ beats_per_bar = info["beats_per_bar"]
238
+
239
+ # Check how well peaks match this meter
240
+ score = 0
241
+ for interval in peak_intervals:
242
+ # Check if this interval corresponds to this time signature
243
+ # Allow some tolerance around the expected value
244
+ expected = beats_per_bar * (hop_length / sr) # in seconds
245
+ tolerance = 0.25 * expected
246
+
247
+ if abs(interval * hop_length / sr - expected) < tolerance:
248
+ score += 1
249
+
250
+ if len(peak_intervals) > 0:
251
+ time_sig_votes[ts] = score / len(peak_intervals)
252
+
253
+ # Return most likely time signature
254
+ if time_sig_votes:
255
+ best_ts = max(time_sig_votes.items(), key=lambda x: x[1])
256
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
257
+
258
+ return {"time_signature": "4/4", "confidence": 0.4}
259
+
260
+ def _detect_by_pattern_matching(self, beat_strengths):
261
+ """Match beat strength patterns against known time signature patterns"""
262
+ if len(beat_strengths) < 6:
263
+ return {"time_signature": "4/4", "confidence": 0.4}
264
+
265
+ results = {}
266
+
267
+ # Try each possible time signature
268
+ for ts, info in self.common_time_signatures.items():
269
+ beats_per_bar = info["beats_per_bar"]
270
+ expected_pattern = info["beat_pattern"]
271
+
272
+ # Calculate correlation scores for overlapping segments
273
+ scores = []
274
+
275
+ # We need at least one complete pattern
276
+ if len(beat_strengths) >= beats_per_bar:
277
+ # Try different offsets to find best alignment
278
+ for offset in range(min(beats_per_bar, len(beat_strengths) - beats_per_bar + 1)):
279
+ # Calculate scores for each complete pattern
280
+ pattern_scores = []
281
+
282
+ for i in range(offset, len(beat_strengths) - beats_per_bar + 1, beats_per_bar):
283
+ segment = beat_strengths[i:i+beats_per_bar]
284
+
285
+ # If expected pattern is longer than segment, truncate it
286
+ pattern = expected_pattern[:len(segment)]
287
+
288
+ # Normalize segment and pattern
289
+ if np.std(segment) > 0 and np.std(pattern) > 0:
290
+ # Calculate correlation
291
+ corr = np.corrcoef(segment, pattern)[0, 1]
292
+ if not np.isnan(corr):
293
+ pattern_scores.append(corr)
294
+
295
+ if pattern_scores:
296
+ scores.append(np.mean(pattern_scores))
297
+
298
+ # Use the best score among different offsets
299
+ if scores:
300
+ confidence = max(scores)
301
+ results[ts] = confidence
302
+
303
+ # Find best match
304
+ if results:
305
+ best_ts = max(results.items(), key=lambda x: x[1])
306
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
307
+
308
+ # Default
309
+ return {"time_signature": "4/4", "confidence": 0.5}
310
+
311
+ def _detect_by_spectral_analysis(self, onset_env, sr):
312
+ """Analyze rhythm in frequency domain"""
313
+ # Get rhythm periodicity through Fourier Transform
314
+ # Focus on periods corresponding to typical bar lengths (1-8 seconds)
315
+ hop_length = 512
316
+
317
+ # Calculate rhythm periodicity
318
+ fft_size = 2**13 # Large enough to give good frequency resolution
319
+ S = np.abs(np.fft.rfft(onset_env, n=fft_size))
320
+
321
+ # Convert frequency to tempo in BPM
322
+ freqs = np.fft.rfftfreq(fft_size, d=hop_length/sr)
323
+ tempos = 60 * freqs
324
+
325
+ # Focus on reasonable tempo range (40-240 BPM)
326
+ tempo_mask = (tempos >= 40) & (tempos <= 240)
327
+ S_tempo = S[tempo_mask]
328
+ tempos = tempos[tempo_mask]
329
+
330
+ # Find peaks in spectrum
331
+ peaks = signal.find_peaks(S_tempo, height=np.max(S_tempo)*0.1, distance=5)[0]
332
+
333
+ if len(peaks) == 0:
334
+ return {"time_signature": "4/4", "confidence": 0.4}
335
+
336
+ # Get peak tempos and strengths
337
+ peak_tempos = tempos[peaks]
338
+ peak_strengths = S_tempo[peaks]
339
+
340
+ # Sort by strength
341
+ peak_indices = np.argsort(peak_strengths)[::-1]
342
+ peak_tempos = peak_tempos[peak_indices]
343
+ peak_strengths = peak_strengths[peak_indices]
344
+
345
+ # Analyze relationships between peaks
346
+ # For example, 3/4 typically has peaks at multiples of 3 beats
347
+ # 4/4 has peaks at multiples of 4 beats
348
+
349
+ time_sig_scores = {}
350
+
351
+ # Check relationships between top peaks
352
+ if len(peak_tempos) >= 2:
353
+ tempo_ratios = []
354
+ for i in range(len(peak_tempos)):
355
+ for j in range(i+1, len(peak_tempos)):
356
+ if peak_tempos[j] > 0:
357
+ ratio = peak_tempos[i] / peak_tempos[j]
358
+ tempo_ratios.append(ratio)
359
+
360
+ # Check for patterns indicative of different time signatures
361
+ for ts in self.common_time_signatures:
362
+ score = 0
363
+
364
+ if ts == "4/4" or ts == "6/8":
365
+ # Look for ratios close to 4 or 6
366
+ for ratio in tempo_ratios:
367
+ if abs(ratio - 4) < 0.2 or abs(ratio - 6) < 0.3:
368
+ score += 1
369
+
370
+ # Normalize score
371
+ if tempo_ratios:
372
+ time_sig_scores[ts] = min(1.0, score / len(tempo_ratios) + 0.4)
373
+
374
+ # If we have meaningful scores, return best match
375
+ if time_sig_scores:
376
+ best_ts = max(time_sig_scores.items(), key=lambda x: x[1])
377
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
378
+
379
+ # Default fallback
380
+ return {"time_signature": "4/4", "confidence": 0.4}
381
+
382
+ def _detect_by_note_density(self, y, sr, beat_times):
383
+ """Analyze note density patterns between beats"""
384
+ if len(beat_times) < 6:
385
+ return {"time_signature": "4/4", "confidence": 0.4}
386
+
387
+ # Extract note onsets (not just beats)
388
+ onset_times = librosa.onset.onset_detect(y=y, sr=sr, units='time')
389
+
390
+ if len(onset_times) < len(beat_times):
391
+ return {"time_signature": "4/4", "confidence": 0.4}
392
+
393
+ # Count onsets between consecutive beats
394
+ note_counts = []
395
+ for i in range(len(beat_times) - 1):
396
+ start = beat_times[i]
397
+ end = beat_times[i+1]
398
+
399
+ # Count onsets in this beat
400
+ count = sum(1 for t in onset_times if start <= t < end)
401
+ note_counts.append(count)
402
+
403
+ # Look for repeating patterns in the note counts
404
+ time_sig_scores = {}
405
+
406
+ for ts, info in self.common_time_signatures.items():
407
+ beats_per_bar = info["beats_per_bar"]
408
+
409
+ # Skip if we don't have enough data
410
+ if len(note_counts) < beats_per_bar:
411
+ continue
412
+
413
+ # Calculate pattern similarity for this time signature
414
+ scores = []
415
+
416
+ for offset in range(min(beats_per_bar, len(note_counts) - beats_per_bar + 1)):
417
+ similarities = []
418
+
419
+ for i in range(offset, len(note_counts) - beats_per_bar + 1, beats_per_bar):
420
+ # Get current bar pattern
421
+ pattern = note_counts[i:i+beats_per_bar]
422
+
423
+ # Compare with expected density pattern
424
+ expected = self.rhythm_density.get(ts, [1.0] * beats_per_bar)
425
+ expected = expected[:len(pattern)] # Truncate if needed
426
+
427
+ # Normalize both patterns
428
+ if sum(pattern) > 0 and sum(expected) > 0:
429
+ pattern_norm = [p/max(1, sum(pattern)) for p in pattern]
430
+ expected_norm = [e/sum(expected) for e in expected]
431
+
432
+ # Calculate similarity (1 - distance)
433
+ distance = sum(abs(p - e) for p, e in zip(pattern_norm, expected_norm)) / len(pattern)
434
+ similarity = 1 - min(1.0, distance)
435
+ similarities.append(similarity)
436
+
437
+ if similarities:
438
+ scores.append(np.mean(similarities))
439
+
440
+ # Use the best score
441
+ if scores:
442
+ time_sig_scores[ts] = max(scores)
443
+
444
+ # Return best match
445
+ if time_sig_scores:
446
+ best_ts = max(time_sig_scores.items(), key=lambda x: x[1])
447
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
448
+
449
+ # Default
450
+ return {"time_signature": "4/4", "confidence": 0.4}
451
+
452
+ def _estimate_from_tempo(self, tempo):
453
+ """Use tempo to help estimate likely time signature"""
454
+ # Statistical tendencies: slower tempos often in compound meters (6/8)
455
+ # Fast tempos favor 4/4
456
+
457
+ scores = {}
458
+
459
+ if tempo < 70:
460
+ # Slow tempos favor compound meters
461
+ scores = {
462
+ "4/4": 0.5,
463
+ "3/4": 0.4,
464
+ "6/8": 0.7
465
+ }
466
+ elif 70 <= tempo <= 120:
467
+ # Medium tempos favor 4/4, 3/4
468
+ scores = {
469
+ "4/4": 0.7,
470
+ "3/4": 0.6,
471
+ "6/8": 0.3
472
+ }
473
+ else:
474
+ # Fast tempos favor 4/4
475
+ scores = {
476
+ "4/4": 0.8,
477
+ "3/4": 0.4,
478
+ "6/8": 0.2
479
+ }
480
+
481
+ # Find best match
482
+ best_ts = max(scores.items(), key=lambda x: x[1])
483
+ return {"time_signature": best_ts[0], "confidence": best_ts[1]}
484
+
485
+ def _combine_detection_results(self, results, tempo):
486
+ """Combine results from different detection methods"""
487
+ # Define weights for different methods
488
+ method_weights = {
489
+ "autocorrelation": 0.25,
490
+ "pattern_matching": 0.30,
491
+ "spectral": 0.20,
492
+ "note_density": 0.20,
493
+ "tempo_based": 0.05
494
+ }
495
+
496
+ # Prior probability (based on frequency in music)
497
+ prior_weights = {ts: info["weight"] for ts, info in self.common_time_signatures.items()}
498
+
499
+ # Combine votes
500
+ total_votes = {ts: prior_weights.get(ts, 0.1) for ts in self.common_time_signatures}
501
+
502
+ for method, result in results.items():
503
+ ts = result["time_signature"]
504
+ confidence = result["confidence"]
505
+ weight = method_weights.get(method, 0.1)
506
+
507
+ # Add weighted vote
508
+ if ts in total_votes:
509
+ total_votes[ts] += confidence * weight
510
+ else:
511
+ total_votes[ts] = confidence * weight
512
+
513
+ # Special case: disambiguate between 3/4 and 6/8
514
+ if "3/4" in total_votes and "6/8" in total_votes:
515
+ # If the two are close, use tempo to break tie
516
+ if abs(total_votes["3/4"] - total_votes["6/8"]) < 0.1:
517
+ if tempo < 100: # Slower tempo favors 6/8
518
+ total_votes["6/8"] += 0.1
519
+ else: # Faster tempo favors 3/4
520
+ total_votes["3/4"] += 0.1
521
+
522
+ # Get highest scoring time signature
523
+ best_ts = max(total_votes.items(), key=lambda x: x[1])
524
+
525
+ # Calculate confidence score (normalize to 0-1)
526
+ confidence = best_ts[1] / (sum(total_votes.values()) + 0.001)
527
+ confidence = min(0.95, max(0.4, confidence)) # Bound confidence
528
+
529
+ return {
530
+ "time_signature": best_ts[0],
531
+ "confidence": confidence,
532
+ "all_candidates": {ts: float(score) for ts, score in total_votes.items()}
533
+ }
534
+
535
+ def analyze_beat_pattern(self, audio_path, sr=22050, time_signature="4/4", auto_detect=False):
536
+ """Analyze beat patterns and stresses in music using the provided time signature."""
537
+ # Auto-detect time signature if requested
538
+ if auto_detect:
539
+ time_sig_result = self.detect_time_signature(audio_path, sr)
540
+ time_signature = time_sig_result["time_signature"]
541
+
542
+ # Load audio
543
+ y, sr = librosa.load(audio_path, sr=sr)
544
+
545
+ # Get tempo and beat frames
546
+ tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
547
+ beat_times = librosa.frames_to_time(beat_frames, sr=sr)
548
+
549
+ # Get beat strengths using onset envelope
550
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr)
551
+ beat_strengths = onset_env[beat_frames]
552
+
553
+ # Normalize beat strengths
554
+ if len(beat_strengths) > 0 and np.max(beat_strengths) > np.min(beat_strengths):
555
+ beat_strengths = (beat_strengths - np.min(beat_strengths)) / (np.max(beat_strengths) - np.min(beat_strengths))
556
+
557
+ # Parse time signature
558
+ if '/' in time_signature:
559
+ num, denom = map(int, time_signature.split('/'))
560
+ else:
561
+ num, denom = 4, 4 # Default to 4/4
562
+
563
+ # Group beats into bars (each bar is one phrase based on time signature)
564
+ bars = []
565
+ current_bar = []
566
+
567
+ for i, (time, strength) in enumerate(zip(beat_times, beat_strengths)):
568
+ # Determine metrical position and stress
569
+ metrical_position = i % num
570
+
571
+ # Define stress pattern according to time signature
572
+ if time_signature == "4/4":
573
+ if metrical_position == 0: # First beat (strongest)
574
+ stress = "S" # Strong
575
+ elif metrical_position == 2: # Third beat (medium)
576
+ stress = "M" # Medium
577
+ else: # Second and fourth beats (weak)
578
+ stress = "W" # Weak
579
+ elif time_signature == "3/4":
580
+ if metrical_position == 0: # First beat (strongest)
581
+ stress = "S" # Strong
582
+ else: # Other beats (weak)
583
+ stress = "W" # Weak
584
+ elif time_signature == "6/8":
585
+ if metrical_position == 0: # First beat (strongest)
586
+ stress = "S" # Strong
587
+ elif metrical_position == 3: # Fourth beat (medium)
588
+ stress = "M" # Medium
589
+ else: # Other beats (weak)
590
+ stress = "W" # Weak
591
+ else:
592
+ # Default pattern for other time signatures
593
+ if metrical_position == 0:
594
+ stress = "S"
595
+ else:
596
+ stress = "W"
597
+
598
+ # Add beat to current bar
599
+ current_bar.append({
600
+ 'time': time,
601
+ 'strength': strength,
602
+ 'stress': stress,
603
+ 'metrical_position': metrical_position
604
+ })
605
+
606
+ # When we complete a bar, add it to our bars list
607
+ if metrical_position == num - 1 or i == len(beat_times) - 1:
608
+ if current_bar:
609
+ bars.append(current_bar)
610
+ current_bar = []
611
+
612
+ # If there's any remaining beats, add them as a partial bar
613
+ if current_bar:
614
+ bars.append(current_bar)
615
+
616
+ # Organize beats into phrases (one phrase = one bar)
617
+ phrases = []
618
+
619
+ for i, bar in enumerate(bars):
620
+ phrase_beats = bar
621
+
622
+ if not phrase_beats:
623
+ continue
624
+
625
+ # Calculate the phrase information
626
+ phrase = {
627
+ 'id': i,
628
+ 'num_beats': len(phrase_beats),
629
+ 'beats': phrase_beats,
630
+ 'stress_pattern': ''.join(beat['stress'] for beat in phrase_beats),
631
+ 'start_time': phrase_beats[0]['time'],
632
+ 'end_time': phrase_beats[-1]['time'] + (phrase_beats[-1]['time'] - phrase_beats[-2]['time'] if len(phrase_beats) > 1 else 0.5),
633
+ }
634
+
635
+ phrases.append(phrase)
636
+
637
+ return {
638
+ 'tempo': tempo,
639
+ 'time_signature': time_signature,
640
+ 'num_beats': len(beat_times),
641
+ 'beat_times': beat_times.tolist(),
642
+ 'beat_strengths': beat_strengths.tolist(),
643
+ 'phrases': phrases
644
+ }
645
+
646
+ def create_lyric_template(self, beat_analysis):
647
+ """Create templates for lyrics based on beat phrases."""
648
+ templates = []
649
+
650
+ if not beat_analysis or 'phrases' not in beat_analysis:
651
+ return templates
652
+
653
+ phrases = beat_analysis['phrases']
654
+
655
+ for i, phrase in enumerate(phrases):
656
+ duration = phrase['end_time'] - phrase['start_time']
657
+
658
+ template = {
659
+ 'id': phrase['id'],
660
+ 'start_time': phrase['start_time'],
661
+ 'end_time': phrase['end_time'],
662
+ 'duration': duration,
663
+ 'num_beats': phrase['num_beats'],
664
+ 'stress_pattern': phrase['stress_pattern'],
665
+ 'syllable_guide': self.generate_phrase_guide(phrase)
666
+ }
667
+
668
+ templates.append(template)
669
+
670
+ return templates
671
+
672
+ def generate_phrase_guide(self, template, words_per_beat=0.5):
673
+ """Generate a guide for each phrase to help the LLM."""
674
+ num_beats = template['num_beats']
675
+ stress_pattern = template['stress_pattern']
676
+
677
+ # Create a visual representation of the stress pattern
678
+ # S = Strong stress, M = Medium stress, W = Weak stress
679
+ visual_pattern = ""
680
+ for i, stress in enumerate(stress_pattern):
681
+ if stress == "S":
682
+ visual_pattern += "STRONG "
683
+ elif stress == "M":
684
+ visual_pattern += "medium "
685
+ else:
686
+ visual_pattern += "weak "
687
+
688
+ # Estimate number of words based on beats (very rough estimate)
689
+ est_words = max(1, int(num_beats * 0.3)) # Reduced further to encourage extreme brevity
690
+
691
+ # Estimate syllables - use ultra conservative ranges
692
+ # For 4/4 time signature, we want to enforce extremely short phrases
693
+ if stress_pattern == "SWMW": # 4/4 time
694
+ min_syllables = max(1, int(num_beats * 0.4)) # Reduced from 0.5
695
+ max_syllables = min(6, int(num_beats * 1.2)) # Reduced from 1.3 to max 6
696
+ else:
697
+ min_syllables = max(1, int(num_beats * 0.4)) # Reduced from 0.5
698
+ max_syllables = min(6, int(num_beats * 1.1)) # Reduced from 1.2 to max 6
699
+
700
+ # Store these in the template for future reference
701
+ template['min_expected'] = min_syllables
702
+ template['max_expected'] = max_syllables
703
+
704
+ guide = f"~{est_words} words, ~{min_syllables}-{max_syllables} syllables | Pattern: {visual_pattern}"
705
+
706
+ # Add additional guidance to the template for natural phrasing
707
+ template['phrasing_guide'] = "ULTRA SHORT LINES. One thought per line. Use FRAGMENTS not sentences."
708
+
709
+ return guide
710
+
711
+ def check_syllable_stress_match(self, text, template, genre="pop"):
712
+ """Check if lyrics match the syllable and stress pattern with genre-specific flexibility."""
713
+ # Split text into words and count syllables
714
+ words = text.split()
715
+ syllable_count = sum(self.count_syllables(word) for word in words)
716
+
717
+ # Get expected syllable count based on number of beats
718
+ expected_count = template['num_beats']
719
+
720
+ # Get syllable-to-beat ratios based on genre
721
+ genre_lower = genre.lower()
722
+ if genre_lower in self.genre_syllable_ratios:
723
+ min_ratio, typical_ratio, max_ratio = self.genre_syllable_ratios[genre_lower]
724
+ else:
725
+ min_ratio, typical_ratio, max_ratio = self.genre_syllable_ratios['default']
726
+
727
+ # Calculate flexible min and max syllable expectations based on genre
728
+ # Use extremely conservative ranges to enforce ultra-short lines
729
+ min_expected = max(1, int(expected_count * min_ratio))
730
+ max_expected = min(6, int(expected_count * max_ratio)) # Hard cap at 6 syllables
731
+
732
+ # For 4/4 time signature, cap the max syllables per line even lower
733
+ if template['stress_pattern'] == "SWMW": # 4/4 time
734
+ max_expected = min(max_expected, 6) # Cap at 6 syllables max for 4/4
735
+
736
+ # Record min and max expected in the template for future reference
737
+ template['min_expected'] = min_expected
738
+ template['max_expected'] = max_expected
739
+
740
+ # Check if syllable count falls within genre-appropriate range
741
+ within_range = min_expected <= syllable_count <= max_expected
742
+
743
+ # Consider typical ratio - how close are we to the ideal for this genre?
744
+ ideal_count = int(expected_count * typical_ratio)
745
+ # Ensure ideal count is also within our constrained range
746
+ ideal_count = max(min_expected, min(max_expected, ideal_count))
747
+
748
+ # More lenient approach to determining "ideal"
749
+ # Count as ideal if within 1 syllable of the target instead of exact match
750
+ close_to_ideal = abs(syllable_count - ideal_count) <= 1
751
+
752
+ closeness_to_ideal = 1.0 - min(abs(syllable_count - ideal_count) / (max_expected - min_expected + 1), 1.0)
753
+
754
+ # Get detailed syllable breakdown for stress analysis
755
+ word_syllables = []
756
+ for word in words:
757
+ count = self.count_syllables(word)
758
+ word_syllables.append(count)
759
+
760
+ # Analyze stress pattern match using a more flexible approach
761
+ stress_pattern = template['stress_pattern']
762
+
763
+ # Simple stress matching algorithm (can be improved in future versions)
764
+ # We need to map syllables to beats in a more flexible way
765
+ syllable_to_beat_mapping = self._map_syllables_to_beats(word_syllables, stress_pattern)
766
+
767
+ # Calculate stress match score based on alignment of stressed syllables with strong beats
768
+ stress_match_percentage = self._calculate_stress_match(words, word_syllables, syllable_to_beat_mapping, stress_pattern)
769
+
770
+ # Consider a stress match if the percentage is high enough
771
+ stress_matches = stress_match_percentage >= 0.6 # Reduced from 0.7 to be more lenient
772
+
773
+ return {
774
+ 'syllable_count': syllable_count,
775
+ 'expected_count': expected_count,
776
+ 'min_expected': min_expected,
777
+ 'max_expected': max_expected,
778
+ 'within_range': within_range,
779
+ 'matches_beat_count': syllable_count == expected_count, # Exact match (strict)
780
+ 'close_match': within_range, # Flexible match (based on genre)
781
+ 'stress_matches': stress_matches,
782
+ 'stress_match_percentage': stress_match_percentage,
783
+ 'closeness_to_ideal': closeness_to_ideal,
784
+ 'word_syllables': word_syllables,
785
+ 'ideal_syllable_count': ideal_count,
786
+ 'close_to_ideal': close_to_ideal # New field
787
+ }
788
+
789
+ def _map_syllables_to_beats(self, word_syllables, stress_pattern):
790
+ """Map syllables to beats in a flexible way."""
791
+ total_syllables = sum(word_syllables)
792
+ total_beats = len(stress_pattern)
793
+
794
+ # Simple mapping for now - this could be improved with more sophisticated algorithms
795
+ if total_syllables <= total_beats:
796
+ # Fewer syllables than beats - some beats have no syllables (prolongation)
797
+ mapping = []
798
+ syllable_index = 0
799
+ for beat_index in range(total_beats):
800
+ if syllable_index < total_syllables:
801
+ mapping.append((syllable_index, beat_index))
802
+ syllable_index += 1
803
+ return mapping
804
+ else:
805
+ # More syllables than beats - some beats have multiple syllables (melisma/syncopation)
806
+ mapping = []
807
+ syllables_per_beat = total_syllables / total_beats
808
+ for beat_index in range(total_beats):
809
+ start_syllable = int(beat_index * syllables_per_beat)
810
+ end_syllable = int((beat_index + 1) * syllables_per_beat)
811
+ for syllable_index in range(start_syllable, end_syllable):
812
+ if syllable_index < total_syllables:
813
+ mapping.append((syllable_index, beat_index))
814
+ return mapping
815
+
816
+ def _calculate_stress_match(self, words, word_syllables, syllable_to_beat_mapping, stress_pattern):
817
+ """Calculate how well syllable stresses match beat stresses."""
818
+ # This is a simplified version - real stress analysis would be more complex
819
+ # For now, we'll assume the first syllable of each word is stressed
820
+
821
+ # First, create a flat list of all syllables with their stress (1 = stressed, 0 = unstressed)
822
+ syllable_stresses = []
823
+ for word, syllable_count in zip(words, word_syllables):
824
+ # Simple assumption: first syllable is stressed, rest are unstressed
825
+ for i in range(syllable_count):
826
+ if i == 0: # First syllable of word
827
+ syllable_stresses.append(1) # Stressed
828
+ else:
829
+ syllable_stresses.append(0) # Unstressed
830
+
831
+ # Count matches between syllable stress and beat stress
832
+ matches = 0
833
+ total_mapped = 0
834
+
835
+ for syllable_index, beat_index in syllable_to_beat_mapping:
836
+ if syllable_index < len(syllable_stresses):
837
+ syllable_stress = syllable_stresses[syllable_index]
838
+ beat_stress = 1 if stress_pattern[beat_index] == 'S' else (0.5 if stress_pattern[beat_index] == 'M' else 0)
839
+
840
+ # Consider it a match if:
841
+ # - Stressed syllable on Strong beat
842
+ # - Unstressed syllable on Weak beat
843
+ # - Some partial credit for other combinations
844
+ if (syllable_stress == 1 and beat_stress > 0.5) or (syllable_stress == 0 and beat_stress < 0.5):
845
+ matches += 1
846
+ elif syllable_stress == 1 and beat_stress == 0.5: # Stressed syllable on Medium beat
847
+ matches += 0.7
848
+
849
+ total_mapped += 1
850
+
851
+ if total_mapped == 0:
852
+ return 0
853
+
854
+ return matches / total_mapped
emotionanalysis.py CHANGED
@@ -1,5 +1,7 @@
1
  import librosa
2
  import numpy as np
 
 
3
  try:
4
  import matplotlib.pyplot as plt
5
  except ImportError:
@@ -7,8 +9,13 @@ except ImportError:
7
  from scipy.stats import mode
8
  import warnings
9
  warnings.filterwarnings('ignore') # Suppress librosa warnings
 
 
10
  class MusicAnalyzer:
11
  def __init__(self):
 
 
 
12
  # Emotion feature mappings - these define characteristics of different emotions
13
  self.emotion_profiles = {
14
  'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)},
@@ -56,8 +63,20 @@ class MusicAnalyzer:
56
  ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
57
  ac = librosa.util.normalize(ac, norm=np.inf)
58
 
59
- # Time signature estimation - a challenging task with many limitations
60
- estimated_signature = self._estimate_time_signature(y, sr, beat_times, onset_env)
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  # Compute onset strength to get a measure of rhythm intensity
63
  rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0
@@ -65,49 +84,22 @@ class MusicAnalyzer:
65
  # Rhythm complexity based on variation in onset strength
66
  rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0
67
 
 
 
 
 
68
  return {
69
  "tempo": float(tempo),
70
- "beat_times": beat_times.tolist(),
71
- "beat_intervals": beat_intervals.tolist(),
72
  "beat_regularity": float(beat_regularity),
73
  "rhythm_intensity": float(rhythm_intensity),
74
  "rhythm_complexity": float(rhythm_complexity),
75
- "estimated_time_signature": estimated_signature
 
 
76
  }
77
 
78
- def _estimate_time_signature(self, y, sr, beat_times, onset_env):
79
- """Estimate the time signature based on beat patterns"""
80
- # This is a simplified approach - accurate time signature detection is complex
81
- if len(beat_times) < 4:
82
- return "Unknown"
83
-
84
- # Analyze beat emphasis patterns to detect meter
85
- beat_intervals = np.diff(beat_times)
86
-
87
- # Look for periodicity in the onset envelope
88
- ac = librosa.autocorrelate(onset_env, max_size=sr)
89
-
90
- # Find peaks in autocorrelation after the first one (which is at lag 0)
91
- peaks = librosa.util.peak_pick(ac, pre_max=20, post_max=20, pre_avg=20, post_avg=20, delta=0.1, wait=1)
92
- peaks = peaks[peaks > 0] # Remove the first peak which is at lag 0
93
-
94
- if len(peaks) == 0:
95
- return "4/4" # Default to most common
96
-
97
- # Convert first significant peak to beats
98
- first_peak_time = peaks[0] / sr
99
- beats_per_bar = round(first_peak_time / np.median(beat_intervals))
100
-
101
- # Map to common time signatures
102
- if beats_per_bar == 4 or beats_per_bar == 8:
103
- return "4/4"
104
- elif beats_per_bar == 3 or beats_per_bar == 6:
105
- return "3/4"
106
- elif beats_per_bar == 2:
107
- return "2/4"
108
- else:
109
- return f"{beats_per_bar}/4" # Default assumption
110
-
111
  def analyze_tonality(self, y, sr):
112
  """Analyze tonal features: key, mode, harmonic features"""
113
  # Compute chromagram
@@ -355,6 +347,26 @@ class MusicAnalyzer:
355
  emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
356
  theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)
357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  # Combine all results
359
  return {
360
  "file": file_path,
@@ -364,7 +376,7 @@ class MusicAnalyzer:
364
  "emotion_analysis": emotion_data,
365
  "theme_analysis": theme_data,
366
  "summary": {
367
- "tempo": rhythm_data["tempo"],
368
  "time_signature": rhythm_data["estimated_time_signature"],
369
  "key": tonal_data["key"],
370
  "mode": tonal_data["mode"],
 
1
  import librosa
2
  import numpy as np
3
+ from scipy import signal
4
+ from collections import Counter
5
  try:
6
  import matplotlib.pyplot as plt
7
  except ImportError:
 
9
  from scipy.stats import mode
10
  import warnings
11
  warnings.filterwarnings('ignore') # Suppress librosa warnings
12
+ from beat_analysis import BeatAnalyzer # Import BeatAnalyzer for rhythm analysis
13
+
14
  class MusicAnalyzer:
15
  def __init__(self):
16
+ # Create an instance of BeatAnalyzer for rhythm detection
17
+ self.beat_analyzer = BeatAnalyzer()
18
+
19
  # Emotion feature mappings - these define characteristics of different emotions
20
  self.emotion_profiles = {
21
  'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)},
 
63
  ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
64
  ac = librosa.util.normalize(ac, norm=np.inf)
65
 
66
+ # Use BeatAnalyzer for advanced time signature detection
67
+ # We need to save the audio temporarily to use the BeatAnalyzer method
68
+ import tempfile
69
+ import soundfile as sf
70
+
71
+ # Create a temporary file
72
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as temp_file:
73
+ sf.write(temp_file.name, y, sr)
74
+ # Use BeatAnalyzer's advanced time signature detection
75
+ time_sig_result = self.beat_analyzer.detect_time_signature(temp_file.name)
76
+
77
+ # Extract results from the time signature detection
78
+ estimated_signature = time_sig_result["time_signature"]
79
+ time_sig_confidence = time_sig_result["confidence"]
80
 
81
  # Compute onset strength to get a measure of rhythm intensity
82
  rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0
 
84
  # Rhythm complexity based on variation in onset strength
85
  rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0
86
 
87
+ # Convert numpy arrays to regular Python types for JSON serialization
88
+ beat_times_list = [float(t) for t in beat_times.tolist()]
89
+ beat_intervals_list = [float(i) for i in beat_intervals.tolist()]
90
+
91
  return {
92
  "tempo": float(tempo),
93
+ "beat_times": beat_times_list,
94
+ "beat_intervals": beat_intervals_list,
95
  "beat_regularity": float(beat_regularity),
96
  "rhythm_intensity": float(rhythm_intensity),
97
  "rhythm_complexity": float(rhythm_complexity),
98
+ "estimated_time_signature": estimated_signature,
99
+ "time_signature_confidence": float(time_sig_confidence),
100
+ "time_signature_candidates": time_sig_result.get("all_candidates", {})
101
  }
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def analyze_tonality(self, y, sr):
104
  """Analyze tonal features: key, mode, harmonic features"""
105
  # Compute chromagram
 
347
  emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
348
  theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)
349
 
350
+ # Convert any remaining numpy values to native Python types
351
+ def convert_numpy_to_python(obj):
352
+ if isinstance(obj, dict):
353
+ return {k: convert_numpy_to_python(v) for k, v in obj.items()}
354
+ elif isinstance(obj, list):
355
+ return [convert_numpy_to_python(item) for item in obj]
356
+ elif isinstance(obj, np.ndarray):
357
+ return obj.tolist()
358
+ elif isinstance(obj, np.number):
359
+ return float(obj)
360
+ else:
361
+ return obj
362
+
363
+ # Ensure all numpy values are converted
364
+ rhythm_data = convert_numpy_to_python(rhythm_data)
365
+ tonal_data = convert_numpy_to_python(tonal_data)
366
+ energy_data = convert_numpy_to_python(energy_data)
367
+ emotion_data = convert_numpy_to_python(emotion_data)
368
+ theme_data = convert_numpy_to_python(theme_data)
369
+
370
  # Combine all results
371
  return {
372
  "file": file_path,
 
376
  "emotion_analysis": emotion_data,
377
  "theme_analysis": theme_data,
378
  "summary": {
379
+ "tempo": float(rhythm_data["tempo"]),
380
  "time_signature": rhythm_data["estimated_time_signature"],
381
  "key": tonal_data["key"],
382
  "mode": tonal_data["mode"],
requirements.txt CHANGED
@@ -13,3 +13,4 @@ scipy>=1.12.0
13
  soundfile>=0.12.1
14
  matplotlib>=3.7.0
15
  pronouncing>=0.2.0
 
 
13
  soundfile>=0.12.1
14
  matplotlib>=3.7.0
15
  pronouncing>=0.2.0
16
+ nltk>=3.8.1