Spaces:

jacob-c
/

syllables_matching_experiment

Paused

syllables_matching_experiment / beat_analysis.py

2ef1fb1 about 2 months ago

37 kB

	import librosa
	import numpy as np
	import pronouncing
	import re
	from functools import lru_cache
	import string
	from nltk.corpus import cmudict
	import nltk
	from scipy import signal

	try:
	nltk.data.find('corpora/cmudict')
	except LookupError:
	nltk.download('cmudict')

	class BeatAnalyzer:
	def __init__(self):
	# Mapping for standard stress patterns by time signature
	# Simplified to only include 4/4, 3/4, and 6/8
	self.stress_patterns = {
	# Format: Strong (1.0), Medium (0.5), Weak (0.0)
	"4/4": [1.0, 0.0, 0.5, 0.0], # Strong, weak, medium, weak
	"3/4": [1.0, 0.0, 0.0], # Strong, weak, weak
	"6/8": [1.0, 0.0, 0.0, 0.5, 0.0, 0.0] # Strong, weak, weak, medium, weak, weak
	}

	self.cmudict = None
	try:
	self.cmudict = cmudict.dict()
	except:
	pass # Fall back to rule-based counting if cmudict is not available

	# Genre-specific syllable-to-beat ratio guidelines
	self.genre_syllable_ratios = {
	# Supported genres with strong syllable-to-beat patterns
	'pop': (0.5, 1.0, 1.5), # Pop - significantly reduced range
	'rock': (0.5, 0.9, 1.3), # Rock - reduced for brevity
	'country': (0.6, 0.9, 1.2), # Country - simpler syllable patterns
	'disco': (0.7, 1.0, 1.3), # Disco - tightened range
	'metal': (0.6, 1.0, 1.3), # Metal - reduced upper limit

	# Other genres (analysis only, no lyrics generation)
	'hiphop': (1.8, 2.5, 3.5), # Hip hop often has many syllables per beat
	'rap': (2.0, 3.0, 4.0), # Rap often has very high syllable counts
	'folk': (0.8, 1.0, 1.3), # Folk often has close to 1:1 ratio
	'jazz': (0.7, 1.0, 1.5), # Jazz can be very flexible
	'reggae': (0.7, 1.0, 1.3), # Reggae often emphasizes specific beats
	'soul': (0.8, 1.2, 1.6), # Soul music tends to be expressive
	'r&b': (1.0, 1.5, 2.0), # R&B can have melisma
	'electronic': (0.7, 1.0, 1.5), # Electronic music varies widely
	'classical': (0.7, 1.0, 1.4), # Classical can vary by subgenre
	'blues': (0.6, 0.8, 1.2), # Blues often extends syllables
	'default': (0.6, 1.0, 1.3) # Default for unknown genres - more conservative
	}

	# List of genres supported for lyrics generation
	# These genres have the most predictable and consistent syllable-to-beat relationships,
	# making them ideal for our beat-matching algorithm
	self.supported_genres = ['pop', 'rock', 'country', 'disco', 'metal']

	# Common time signatures and their beat patterns with weights for prior probability
	# Simplified to only include 4/4, 3/4, and 6/8
	self.common_time_signatures = {
	"4/4": {"beats_per_bar": 4, "beat_pattern": [1.0, 0.2, 0.5, 0.2], "weight": 0.55},
	"3/4": {"beats_per_bar": 3, "beat_pattern": [1.0, 0.2, 0.3], "weight": 0.30},
	"6/8": {"beats_per_bar": 6, "beat_pattern": [1.0, 0.2, 0.3, 0.8, 0.2, 0.3], "weight": 0.15}
	}

	# Add common accent patterns for different time signatures
	self.accent_patterns = {
	"4/4": [[1, 0, 0, 0], [1, 0, 2, 0], [1, 0, 2, 0, 3, 0, 2, 0]],
	"3/4": [[1, 0, 0], [1, 0, 2]],
	"6/8": [[1, 0, 0, 2, 0, 0], [1, 0, 0, 2, 0, 3]]
	}

	# Expected rhythm density (relative note density per beat) for different time signatures
	self.rhythm_density = {
	"4/4": [1.0, 0.7, 0.8, 0.6],
	"3/4": [1.0, 0.6, 0.7],
	"6/8": [1.0, 0.5, 0.4, 0.8, 0.5, 0.4]
	}

	@lru_cache(maxsize=128)
	def count_syllables(self, word):
	"""Count syllables in a word using CMU dictionary if available, otherwise use rule-based method."""
	word = word.lower().strip()
	word = re.sub(r'[^a-z]', '', word) # Remove non-alphabetic characters

	if not word:
	return 0

	# Try using CMUDict first if available
	if self.cmudict and word in self.cmudict:
	return max([len(list(y for y in x if y[-1].isdigit())) for x in self.cmudict[word]])

	# Rule-based syllable counting as fallback
	# Modified version from NLTK's implementation
	vowels = "aeiouy"
	double_vowels = ['aa', 'ae', 'ai', 'ao', 'au', 'ay', 'ea', 'ee', 'ei', 'eo', 'eu', 'ey', 'ia', 'ie', 'ii', 'io', 'iu', 'oa', 'oe', 'oi', 'oo', 'ou', 'oy', 'ua', 'ue', 'ui', 'uo', 'uy']
	prev_was_vowel = False
	count = 0
	final_e = False

	if word.endswith('e') and not word.endswith('le'):
	final_e = True

	for i, char in enumerate(word):
	if char in vowels:
	# Check if current char and previous char form a dipthong
	if prev_was_vowel and i > 0 and (word[i-1:i+1] in double_vowels):
	prev_was_vowel = True
	continue

	if not prev_was_vowel:
	count += 1
	prev_was_vowel = True
	else:
	prev_was_vowel = False

	# Handle edge cases
	if word.endswith('le') and len(word) > 2 and word[-3] not in vowels:
	count += 1
	elif final_e:
	count = max(count-1, 1) # Remove last 'e', but ensure at least 1 syllable
	elif word.endswith('y') and not prev_was_vowel:
	count += 1

	# Ensure at least one syllable
	return max(count, 1)

	def detect_time_signature(self, audio_path, sr=22050):
	"""
	Advanced multi-method approach to time signature detection

	Args:
	audio_path: Path to audio file
	sr: Sample rate

	Returns:
	dict with detected time signature and confidence
	"""
	# Load audio
	y, sr = librosa.load(audio_path, sr=sr)

	# 1. Compute onset envelope and beat positions
	onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)

	# Get tempo and beat frames
	tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
	beat_times = librosa.frames_to_time(beat_frames, sr=sr)

	# Return default if not enough beats detected
	if len(beat_times) < 8:
	return {"time_signature": "4/4", "confidence": 0.5}

	# 2. Extract beat strengths and normalize
	beat_strengths = self._get_beat_strengths(y, sr, beat_times, onset_env)

	# 3. Compute various time signature features using different methods
	results = {}

	# Method 1: Beat pattern autocorrelation
	autocorr_result = self._detect_by_autocorrelation(onset_env, sr)
	results["autocorrelation"] = autocorr_result

	# Method 2: Beat strength pattern matching
	pattern_result = self._detect_by_pattern_matching(beat_strengths)
	results["pattern_matching"] = pattern_result

	# Method 3: Spectral rhythmic analysis
	spectral_result = self._detect_by_spectral_analysis(onset_env, sr)
	results["spectral"] = spectral_result

	# Method 4: Note density analysis
	density_result = self._detect_by_note_density(y, sr, beat_times)
	results["note_density"] = density_result

	# Method 5: Tempo-based estimation
	tempo_result = self._estimate_from_tempo(tempo)
	results["tempo_based"] = tempo_result

	# 4. Combine results with weighted voting
	final_result = self._combine_detection_results(results, tempo)

	return final_result

	def _get_beat_strengths(self, y, sr, beat_times, onset_env):
	"""Extract normalized strengths at beat positions"""
	# Convert beat times to frames
	beat_frames = librosa.time_to_frames(beat_times, sr=sr, hop_length=512)
	beat_frames = [min(f, len(onset_env)-1) for f in beat_frames]

	# Get beat strengths from onset envelope
	beat_strengths = np.array([onset_env[f] for f in beat_frames])

	# Also look at energy and spectral flux at beat positions
	hop_length = 512
	frame_length = 2048

	# Get energy at each beat
	energy = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
	beat_energy = np.array([energy[min(f, len(energy)-1)] for f in beat_frames])

	# Combine onset strength with energy (weighted average)
	beat_strengths = 0.7 * beat_strengths + 0.3 * beat_energy

	# Normalize
	if np.max(beat_strengths) > 0:
	beat_strengths = beat_strengths / np.max(beat_strengths)

	return beat_strengths

	def _detect_by_autocorrelation(self, onset_env, sr):
	"""Detect meter using autocorrelation of onset strength"""
	# Calculate autocorrelation of onset envelope
	hop_length = 512
	ac = librosa.autocorrelate(onset_env, max_size=4 * sr // hop_length)
	ac = librosa.util.normalize(ac)

	# Find significant peaks in autocorrelation
	peaks = signal.find_peaks(ac, height=0.2, distance=sr//(8*hop_length))[0]

	if len(peaks) < 2:
	return {"time_signature": "4/4", "confidence": 0.4}

	# Analyze peak intervals in terms of beats
	peak_intervals = np.diff(peaks)

	# Convert peaks to time
	peak_times = peaks * hop_length / sr

	# Analyze for common time signature patterns
	time_sig_votes = {}

	# Check if peaks match expected bar lengths
	for ts, info in self.common_time_signatures.items():
	beats_per_bar = info["beats_per_bar"]

	# Check how well peaks match this meter
	score = 0
	for interval in peak_intervals:
	# Check if this interval corresponds to this time signature
	# Allow some tolerance around the expected value
	expected = beats_per_bar * (hop_length / sr) # in seconds
	tolerance = 0.25 * expected

	if abs(interval * hop_length / sr - expected) < tolerance:
	score += 1

	if len(peak_intervals) > 0:
	time_sig_votes[ts] = score / len(peak_intervals)

	# Return most likely time signature
	if time_sig_votes:
	best_ts = max(time_sig_votes.items(), key=lambda x: x[1])
	return {"time_signature": best_ts[0], "confidence": best_ts[1]}

	return {"time_signature": "4/4", "confidence": 0.4}

	def _detect_by_pattern_matching(self, beat_strengths):
	"""Match beat strength patterns against known time signature patterns"""
	if len(beat_strengths) < 6:
	return {"time_signature": "4/4", "confidence": 0.4}

	results = {}

	# Try each possible time signature
	for ts, info in self.common_time_signatures.items():
	beats_per_bar = info["beats_per_bar"]
	expected_pattern = info["beat_pattern"]

	# Calculate correlation scores for overlapping segments
	scores = []

	# We need at least one complete pattern
	if len(beat_strengths) >= beats_per_bar:
	# Try different offsets to find best alignment
	for offset in range(min(beats_per_bar, len(beat_strengths) - beats_per_bar + 1)):
	# Calculate scores for each complete pattern
	pattern_scores = []

	for i in range(offset, len(beat_strengths) - beats_per_bar + 1, beats_per_bar):
	segment = beat_strengths[i:i+beats_per_bar]

	# If expected pattern is longer than segment, truncate it
	pattern = expected_pattern[:len(segment)]

	# Normalize segment and pattern
	if np.std(segment) > 0 and np.std(pattern) > 0:
	# Calculate correlation
	corr = np.corrcoef(segment, pattern)[0, 1]
	if not np.isnan(corr):
	pattern_scores.append(corr)

	if pattern_scores:
	scores.append(np.mean(pattern_scores))

	# Use the best score among different offsets
	if scores:
	confidence = max(scores)
	results[ts] = confidence

	# Find best match
	if results:
	best_ts = max(results.items(), key=lambda x: x[1])
	return {"time_signature": best_ts[0], "confidence": best_ts[1]}

	# Default
	return {"time_signature": "4/4", "confidence": 0.5}

	def _detect_by_spectral_analysis(self, onset_env, sr):
	"""Analyze rhythm in frequency domain"""
	# Get rhythm periodicity through Fourier Transform
	# Focus on periods corresponding to typical bar lengths (1-8 seconds)
	hop_length = 512

	# Calculate rhythm periodicity
	fft_size = 2**13 # Large enough to give good frequency resolution
	S = np.abs(np.fft.rfft(onset_env, n=fft_size))

	# Convert frequency to tempo in BPM
	freqs = np.fft.rfftfreq(fft_size, d=hop_length/sr)
	tempos = 60 * freqs

	# Focus on reasonable tempo range (40-240 BPM)
	tempo_mask = (tempos >= 40) & (tempos <= 240)
	S_tempo = S[tempo_mask]
	tempos = tempos[tempo_mask]

	# Find peaks in spectrum
	peaks = signal.find_peaks(S_tempo, height=np.max(S_tempo)*0.1, distance=5)[0]

	if len(peaks) == 0:
	return {"time_signature": "4/4", "confidence": 0.4}

	# Get peak tempos and strengths
	peak_tempos = tempos[peaks]
	peak_strengths = S_tempo[peaks]

	# Sort by strength
	peak_indices = np.argsort(peak_strengths)[::-1]
	peak_tempos = peak_tempos[peak_indices]
	peak_strengths = peak_strengths[peak_indices]

	# Analyze relationships between peaks
	# For example, 3/4 typically has peaks at multiples of 3 beats
	# 4/4 has peaks at multiples of 4 beats

	time_sig_scores = {}

	# Check relationships between top peaks
	if len(peak_tempos) >= 2:
	tempo_ratios = []
	for i in range(len(peak_tempos)):
	for j in range(i+1, len(peak_tempos)):
	if peak_tempos[j] > 0:
	ratio = peak_tempos[i] / peak_tempos[j]
	tempo_ratios.append(ratio)

	# Check for patterns indicative of different time signatures
	for ts in self.common_time_signatures:
	score = 0

	if ts == "4/4" or ts == "6/8":
	# Look for ratios close to 4 or 6
	for ratio in tempo_ratios:
	if abs(ratio - 4) < 0.2 or abs(ratio - 6) < 0.3:
	score += 1

	# Normalize score
	if tempo_ratios:
	time_sig_scores[ts] = min(1.0, score / len(tempo_ratios) + 0.4)

	# If we have meaningful scores, return best match
	if time_sig_scores:
	best_ts = max(time_sig_scores.items(), key=lambda x: x[1])
	return {"time_signature": best_ts[0], "confidence": best_ts[1]}

	# Default fallback
	return {"time_signature": "4/4", "confidence": 0.4}

	def _detect_by_note_density(self, y, sr, beat_times):
	"""Analyze note density patterns between beats"""
	if len(beat_times) < 6:
	return {"time_signature": "4/4", "confidence": 0.4}

	# Extract note onsets (not just beats)
	onset_times = librosa.onset.onset_detect(y=y, sr=sr, units='time')

	if len(onset_times) < len(beat_times):
	return {"time_signature": "4/4", "confidence": 0.4}

	# Count onsets between consecutive beats
	note_counts = []
	for i in range(len(beat_times) - 1):
	start = beat_times[i]
	end = beat_times[i+1]

	# Count onsets in this beat
	count = sum(1 for t in onset_times if start <= t < end)
	note_counts.append(count)

	# Look for repeating patterns in the note counts
	time_sig_scores = {}

	for ts, info in self.common_time_signatures.items():
	beats_per_bar = info["beats_per_bar"]

	# Skip if we don't have enough data
	if len(note_counts) < beats_per_bar:
	continue

	# Calculate pattern similarity for this time signature
	scores = []

	for offset in range(min(beats_per_bar, len(note_counts) - beats_per_bar + 1)):
	similarities = []

	for i in range(offset, len(note_counts) - beats_per_bar + 1, beats_per_bar):
	# Get current bar pattern
	pattern = note_counts[i:i+beats_per_bar]

	# Compare with expected density pattern
	expected = self.rhythm_density.get(ts, [1.0] * beats_per_bar)
	expected = expected[:len(pattern)] # Truncate if needed

	# Normalize both patterns
	if sum(pattern) > 0 and sum(expected) > 0:
	pattern_norm = [p/max(1, sum(pattern)) for p in pattern]
	expected_norm = [e/sum(expected) for e in expected]

	# Calculate similarity (1 - distance)
	distance = sum(abs(p - e) for p, e in zip(pattern_norm, expected_norm)) / len(pattern)
	similarity = 1 - min(1.0, distance)
	similarities.append(similarity)

	if similarities:
	scores.append(np.mean(similarities))

	# Use the best score
	if scores:
	time_sig_scores[ts] = max(scores)

	# Return best match
	if time_sig_scores:
	best_ts = max(time_sig_scores.items(), key=lambda x: x[1])
	return {"time_signature": best_ts[0], "confidence": best_ts[1]}

	# Default
	return {"time_signature": "4/4", "confidence": 0.4}

	def _estimate_from_tempo(self, tempo):
	"""Use tempo to help estimate likely time signature"""
	# Statistical tendencies: slower tempos often in compound meters (6/8)
	# Fast tempos favor 4/4

	scores = {}

	if tempo < 70:
	# Slow tempos favor compound meters
	scores = {
	"4/4": 0.5,
	"3/4": 0.4,
	"6/8": 0.7
	}
	elif 70 <= tempo <= 120:
	# Medium tempos favor 4/4, 3/4
	scores = {
	"4/4": 0.7,
	"3/4": 0.6,
	"6/8": 0.3
	}
	else:
	# Fast tempos favor 4/4
	scores = {
	"4/4": 0.8,
	"3/4": 0.4,
	"6/8": 0.2
	}

	# Find best match
	best_ts = max(scores.items(), key=lambda x: x[1])
	return {"time_signature": best_ts[0], "confidence": best_ts[1]}

	def _combine_detection_results(self, results, tempo):
	"""Combine results from different detection methods"""
	# Define weights for different methods
	method_weights = {
	"autocorrelation": 0.25,
	"pattern_matching": 0.30,
	"spectral": 0.20,
	"note_density": 0.20,
	"tempo_based": 0.05
	}

	# Prior probability (based on frequency in music)
	prior_weights = {ts: info["weight"] for ts, info in self.common_time_signatures.items()}

	# Combine votes
	total_votes = {ts: prior_weights.get(ts, 0.1) for ts in self.common_time_signatures}

	for method, result in results.items():
	ts = result["time_signature"]
	confidence = result["confidence"]
	weight = method_weights.get(method, 0.1)

	# Add weighted vote
	if ts in total_votes:
	total_votes[ts] += confidence * weight
	else:
	total_votes[ts] = confidence * weight

	# Special case: disambiguate between 3/4 and 6/8
	if "3/4" in total_votes and "6/8" in total_votes:
	# If the two are close, use tempo to break tie
	if abs(total_votes["3/4"] - total_votes["6/8"]) < 0.1:
	if tempo < 100: # Slower tempo favors 6/8
	total_votes["6/8"] += 0.1
	else: # Faster tempo favors 3/4
	total_votes["3/4"] += 0.1

	# Get highest scoring time signature
	best_ts = max(total_votes.items(), key=lambda x: x[1])

	# Calculate confidence score (normalize to 0-1)
	confidence = best_ts[1] / (sum(total_votes.values()) + 0.001)
	confidence = min(0.95, max(0.4, confidence)) # Bound confidence

	return {
	"time_signature": best_ts[0],
	"confidence": confidence,
	"all_candidates": {ts: float(score) for ts, score in total_votes.items()}
	}

	def analyze_beat_pattern(self, audio_path, sr=22050, time_signature="4/4", auto_detect=False):
	"""Analyze beat patterns and stresses in music using the provided time signature."""
	# Auto-detect time signature if requested
	if auto_detect:
	time_sig_result = self.detect_time_signature(audio_path, sr)
	time_signature = time_sig_result["time_signature"]

	# Load audio
	y, sr = librosa.load(audio_path, sr=sr)

	# Get tempo and beat frames
	tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
	beat_times = librosa.frames_to_time(beat_frames, sr=sr)

	# Get beat strengths using onset envelope
	onset_env = librosa.onset.onset_strength(y=y, sr=sr)
	beat_strengths = onset_env[beat_frames]

	# Normalize beat strengths
	if len(beat_strengths) > 0 and np.max(beat_strengths) > np.min(beat_strengths):
	beat_strengths = (beat_strengths - np.min(beat_strengths)) / (np.max(beat_strengths) - np.min(beat_strengths))

	# Parse time signature
	if '/' in time_signature:
	num, denom = map(int, time_signature.split('/'))
	else:
	num, denom = 4, 4 # Default to 4/4

	# Group beats into bars (each bar is one phrase based on time signature)
	bars = []
	current_bar = []

	for i, (time, strength) in enumerate(zip(beat_times, beat_strengths)):
	# Determine metrical position and stress
	metrical_position = i % num

	# Define stress pattern according to time signature
	if time_signature == "4/4":
	if metrical_position == 0: # First beat (strongest)
	stress = "S" # Strong
	elif metrical_position == 2: # Third beat (medium)
	stress = "M" # Medium
	else: # Second and fourth beats (weak)
	stress = "W" # Weak
	elif time_signature == "3/4":
	if metrical_position == 0: # First beat (strongest)
	stress = "S" # Strong
	else: # Other beats (weak)
	stress = "W" # Weak
	elif time_signature == "6/8":
	if metrical_position == 0: # First beat (strongest)
	stress = "S" # Strong
	elif metrical_position == 3: # Fourth beat (medium)
	stress = "M" # Medium
	else: # Other beats (weak)
	stress = "W" # Weak
	else:
	# Default pattern for other time signatures
	if metrical_position == 0:
	stress = "S"
	else:
	stress = "W"

	# Add beat to current bar
	current_bar.append({
	'time': time,
	'strength': strength,
	'stress': stress,
	'metrical_position': metrical_position
	})

	# When we complete a bar, add it to our bars list
	if metrical_position == num - 1 or i == len(beat_times) - 1:
	if current_bar:
	bars.append(current_bar)
	current_bar = []

	# If there's any remaining beats, add them as a partial bar
	if current_bar:
	bars.append(current_bar)

	# Organize beats into phrases (one phrase = one bar)
	phrases = []

	for i, bar in enumerate(bars):
	phrase_beats = bar

	if not phrase_beats:
	continue

	# Calculate the phrase information
	phrase = {
	'id': i,
	'num_beats': len(phrase_beats),
	'beats': phrase_beats,
	'stress_pattern': ''.join(beat['stress'] for beat in phrase_beats),
	'start_time': phrase_beats[0]['time'],
	'end_time': phrase_beats[-1]['time'] + (phrase_beats[-1]['time'] - phrase_beats[-2]['time'] if len(phrase_beats) > 1 else 0.5),
	}

	phrases.append(phrase)

	return {
	'tempo': tempo,
	'time_signature': time_signature,
	'num_beats': len(beat_times),
	'beat_times': beat_times.tolist(),
	'beat_strengths': beat_strengths.tolist(),
	'phrases': phrases
	}

	def create_lyric_template(self, beat_analysis):
	"""Create templates for lyrics based on beat phrases."""
	templates = []

	if not beat_analysis or 'phrases' not in beat_analysis:
	return templates

	phrases = beat_analysis['phrases']

	for i, phrase in enumerate(phrases):
	duration = phrase['end_time'] - phrase['start_time']

	template = {
	'id': phrase['id'],
	'start_time': phrase['start_time'],
	'end_time': phrase['end_time'],
	'duration': duration,
	'num_beats': phrase['num_beats'],
	'stress_pattern': phrase['stress_pattern'],
	'syllable_guide': self.generate_phrase_guide(phrase)
	}

	templates.append(template)

	return templates

	def generate_phrase_guide(self, template, words_per_beat=0.5):
	"""Generate a guide for each phrase to help the LLM."""
	num_beats = template['num_beats']
	stress_pattern = template['stress_pattern']

	# Create a visual representation of the stress pattern
	# S = Strong stress, M = Medium stress, W = Weak stress
	visual_pattern = ""
	for i, stress in enumerate(stress_pattern):
	if stress == "S":
	visual_pattern += "STRONG "
	elif stress == "M":
	visual_pattern += "medium "
	else:
	visual_pattern += "weak "

	# Estimate number of words based on beats (very rough estimate)
	est_words = max(1, int(num_beats * 0.3)) # Reduced further to encourage extreme brevity

	# Estimate syllables - use ultra conservative ranges
	# For 4/4 time signature, we want to enforce extremely short phrases
	if stress_pattern == "SWMW": # 4/4 time
	min_syllables = max(1, int(num_beats * 0.4)) # Reduced from 0.5
	max_syllables = min(6, int(num_beats * 1.2)) # Reduced from 1.3 to max 6
	else:
	min_syllables = max(1, int(num_beats * 0.4)) # Reduced from 0.5
	max_syllables = min(6, int(num_beats * 1.1)) # Reduced from 1.2 to max 6

	# Store these in the template for future reference
	template['min_expected'] = min_syllables
	template['max_expected'] = max_syllables

	guide = f"~{est_words} words, ~{min_syllables}-{max_syllables} syllables \| Pattern: {visual_pattern}"

	# Add additional guidance to the template for natural phrasing
	template['phrasing_guide'] = "ULTRA SHORT LINES. One thought per line. Use FRAGMENTS not sentences."

	return guide

	def check_syllable_stress_match(self, text, template, genre="pop"):
	"""Check if lyrics match the syllable and stress pattern with genre-specific flexibility."""
	# Split text into words and count syllables
	words = text.split()
	syllable_count = sum(self.count_syllables(word) for word in words)

	# Get expected syllable count based on number of beats
	expected_count = template['num_beats']

	# Get syllable-to-beat ratios based on genre
	genre_lower = genre.lower()
	if genre_lower in self.genre_syllable_ratios:
	min_ratio, typical_ratio, max_ratio = self.genre_syllable_ratios[genre_lower]
	else:
	min_ratio, typical_ratio, max_ratio = self.genre_syllable_ratios['default']

	# Calculate flexible min and max syllable expectations based on genre
	# Use extremely conservative ranges to enforce ultra-short lines
	min_expected = max(1, int(expected_count * min_ratio))
	max_expected = min(6, int(expected_count * max_ratio)) # Hard cap at 6 syllables

	# For 4/4 time signature, cap the max syllables per line even lower
	if template['stress_pattern'] == "SWMW": # 4/4 time
	max_expected = min(max_expected, 6) # Cap at 6 syllables max for 4/4

	# Record min and max expected in the template for future reference
	template['min_expected'] = min_expected
	template['max_expected'] = max_expected

	# Check if syllable count falls within genre-appropriate range
	within_range = min_expected <= syllable_count <= max_expected

	# Consider typical ratio - how close are we to the ideal for this genre?
	ideal_count = int(expected_count * typical_ratio)
	# Ensure ideal count is also within our constrained range
	ideal_count = max(min_expected, min(max_expected, ideal_count))

	# More lenient approach to determining "ideal"
	# Count as ideal if within 1 syllable of the target instead of exact match
	close_to_ideal = abs(syllable_count - ideal_count) <= 1

	closeness_to_ideal = 1.0 - min(abs(syllable_count - ideal_count) / (max_expected - min_expected + 1), 1.0)

	# Get detailed syllable breakdown for stress analysis
	word_syllables = []
	for word in words:
	count = self.count_syllables(word)
	word_syllables.append(count)

	# Analyze stress pattern match using a more flexible approach
	stress_pattern = template['stress_pattern']

	# Simple stress matching algorithm (can be improved in future versions)
	# We need to map syllables to beats in a more flexible way
	syllable_to_beat_mapping = self._map_syllables_to_beats(word_syllables, stress_pattern)

	# Calculate stress match score based on alignment of stressed syllables with strong beats
	stress_match_percentage = self._calculate_stress_match(words, word_syllables, syllable_to_beat_mapping, stress_pattern)

	# Consider a stress match if the percentage is high enough
	stress_matches = stress_match_percentage >= 0.6 # Reduced from 0.7 to be more lenient

	return {
	'syllable_count': syllable_count,
	'expected_count': expected_count,
	'min_expected': min_expected,
	'max_expected': max_expected,
	'within_range': within_range,
	'matches_beat_count': syllable_count == expected_count, # Exact match (strict)
	'close_match': within_range, # Flexible match (based on genre)
	'stress_matches': stress_matches,
	'stress_match_percentage': stress_match_percentage,
	'closeness_to_ideal': closeness_to_ideal,
	'word_syllables': word_syllables,
	'ideal_syllable_count': ideal_count,
	'close_to_ideal': close_to_ideal # New field
	}

	def _map_syllables_to_beats(self, word_syllables, stress_pattern):
	"""Map syllables to beats in a flexible way."""
	total_syllables = sum(word_syllables)
	total_beats = len(stress_pattern)

	# Simple mapping for now - this could be improved with more sophisticated algorithms
	if total_syllables <= total_beats:
	# Fewer syllables than beats - some beats have no syllables (prolongation)
	mapping = []
	syllable_index = 0
	for beat_index in range(total_beats):
	if syllable_index < total_syllables:
	mapping.append((syllable_index, beat_index))
	syllable_index += 1
	return mapping
	else:
	# More syllables than beats - some beats have multiple syllables (melisma/syncopation)
	mapping = []
	syllables_per_beat = total_syllables / total_beats
	for beat_index in range(total_beats):
	start_syllable = int(beat_index * syllables_per_beat)
	end_syllable = int((beat_index + 1) * syllables_per_beat)
	for syllable_index in range(start_syllable, end_syllable):
	if syllable_index < total_syllables:
	mapping.append((syllable_index, beat_index))
	return mapping

	def _calculate_stress_match(self, words, word_syllables, syllable_to_beat_mapping, stress_pattern):
	"""Calculate how well syllable stresses match beat stresses."""
	# This is a simplified version - real stress analysis would be more complex
	# For now, we'll assume the first syllable of each word is stressed

	# First, create a flat list of all syllables with their stress (1 = stressed, 0 = unstressed)
	syllable_stresses = []
	for word, syllable_count in zip(words, word_syllables):
	# Simple assumption: first syllable is stressed, rest are unstressed
	for i in range(syllable_count):
	if i == 0: # First syllable of word
	syllable_stresses.append(1) # Stressed
	else:
	syllable_stresses.append(0) # Unstressed

	# Count matches between syllable stress and beat stress
	matches = 0
	total_mapped = 0

	for syllable_index, beat_index in syllable_to_beat_mapping:
	if syllable_index < len(syllable_stresses):
	syllable_stress = syllable_stresses[syllable_index]
	beat_stress = 1 if stress_pattern[beat_index] == 'S' else (0.5 if stress_pattern[beat_index] == 'M' else 0)

	# Consider it a match if:
	# - Stressed syllable on Strong beat
	# - Unstressed syllable on Weak beat
	# - Some partial credit for other combinations
	if (syllable_stress == 1 and beat_stress > 0.5) or (syllable_stress == 0 and beat_stress < 0.5):
	matches += 1
	elif syllable_stress == 1 and beat_stress == 0.5: # Stressed syllable on Medium beat
	matches += 0.7

	total_mapped += 1

	if total_mapped == 0:
	return 0

	return matches / total_mapped