Spaces:

jacob-c
/

largermodel_lyrics_generation

Paused

root

css

19c0923 about 1 month ago

44.6 kB

	import os
	import io
	import gradio as gr
	import torch
	import numpy as np
	import re
	import pronouncing
	import functools
	from transformers import (
	AutoModelForAudioClassification,
	AutoFeatureExtractor,
	AutoTokenizer,
	pipeline,
	AutoModelForCausalLM,
	BitsAndBytesConfig
	)
	from huggingface_hub import login
	from utils import (
	load_audio,
	extract_audio_duration,
	extract_mfcc_features,
	format_genre_results,
	ensure_cuda_availability
	)
	from emotionanalysis import MusicAnalyzer
	import librosa
	from beat_analysis import BeatAnalyzer # Import the BeatAnalyzer class

	# Initialize beat analyzer
	beat_analyzer = BeatAnalyzer()

	# Login to Hugging Face Hub if token is provided
	if "HF_TOKEN" in os.environ:
	login(token=os.environ["HF_TOKEN"])

	# Constants
	GENRE_MODEL_NAME = "dima806/music_genres_classification"
	MUSIC_DETECTION_MODEL = "MIT/ast-finetuned-audioset-10-10-0.4593"
	LLM_MODEL_NAME = "Qwen/QwQ-32B"
	SAMPLE_RATE = 22050 # Standard sample rate for audio processing

	# Check CUDA availability (for informational purposes)
	CUDA_AVAILABLE = ensure_cuda_availability()

	# Load models at initialization time
	print("Loading genre classification model...")
	try:
	genre_feature_extractor = AutoFeatureExtractor.from_pretrained(GENRE_MODEL_NAME)
	genre_model = AutoModelForAudioClassification.from_pretrained(
	GENRE_MODEL_NAME,
	device_map="auto" if CUDA_AVAILABLE else None
	)
	# Create a convenience wrapper function with the same interface as before
	def get_genre_model():
	return genre_model, genre_feature_extractor
	except Exception as e:
	print(f"Error loading genre model: {str(e)}")
	genre_model = None
	genre_feature_extractor = None

	# Load LLM and tokenizer at initialization time
	print("Loading Qwen QwQ-32B model with 4-bit quantization...")
	try:
	# Configure 4-bit quantization for better performance
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True
	)

	llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
	llm_model = AutoModelForCausalLM.from_pretrained(
	LLM_MODEL_NAME,
	quantization_config=quantization_config,
	device_map="auto",
	trust_remote_code=True,
	torch_dtype=torch.float16,
	use_cache=True
	)
	except Exception as e:
	print(f"Error loading LLM model: {str(e)}")
	llm_tokenizer = None
	llm_model = None

	# Create music analyzer instance
	music_analyzer = MusicAnalyzer()

	# Process uploaded audio file
	def process_audio(audio_file, custom_prompt=""):
	if audio_file is None:
	return "No audio file provided", None, None, None, None, None, None, None, None, None

	try:
	# Load and analyze audio
	y, sr = load_audio(audio_file, sr=SAMPLE_RATE)

	# Basic audio information
	duration = extract_audio_duration(y, sr)

	# Detect time signature using BeatAnalyzer
	time_sig_result = beat_analyzer.detect_time_signature(audio_file)
	time_signature = time_sig_result["time_signature"]

	# Analyze music with MusicAnalyzer for emotion and theme analysis
	music_analysis = music_analyzer.analyze_music(audio_file)

	# Extract key information
	tempo = music_analysis["rhythm_analysis"]["tempo"]

	# Get top two emotions
	emotion_scores = music_analysis["emotion_analysis"]["emotion_scores"]
	sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True)
	primary_emotion = sorted_emotions[0][0]
	secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None

	# Get top two themes
	theme_scores = music_analysis["theme_analysis"]["theme_scores"]
	sorted_themes = sorted(theme_scores.items(), key=lambda x: x[1], reverse=True)
	primary_theme = sorted_themes[0][0]
	secondary_theme = sorted_themes[1][0] if len(sorted_themes) > 1 else None

	# Use genre classification directly instead of pipeline
	if genre_model is not None and genre_feature_extractor is not None:
	# Resample audio to 16000 Hz for the genre model
	y_16k = librosa.resample(y, orig_sr=sr, target_sr=16000)

	# Extract features
	inputs = genre_feature_extractor(
	y_16k,
	sampling_rate=16000,
	return_tensors="pt"
	).to(genre_model.device)

	# Classify genre
	with torch.no_grad():
	outputs = genre_model(**inputs)
	logits = outputs.logits
	probs = torch.nn.functional.softmax(logits, dim=-1)

	# Get top genres
	values, indices = torch.topk(probs[0], k=5)
	top_genres = [(genre_model.config.id2label[idx.item()], val.item()) for val, idx in zip(values, indices)]
	else:
	# Fallback if model loading failed
	top_genres = [("Unknown", 1.0)]

	# Format genre results for display
	genre_results_text = format_genre_results(top_genres)
	primary_genre = top_genres[0][0]

	# Ensure time signature is one of the supported ones (4/4, 3/4, 6/8)
	if time_signature not in ["4/4", "3/4", "6/8"]:
	time_signature = "4/4" # Default to 4/4 if unsupported

	# Analyze beat patterns and create lyrics template using the time signature
	beat_analysis = beat_analyzer.analyze_beat_pattern(audio_file, time_signature=time_signature, auto_detect=False)
	lyric_templates = beat_analyzer.create_lyric_template(beat_analysis)

	# Store these in the music_analysis dict for use in lyrics generation
	music_analysis["beat_analysis"] = beat_analysis
	music_analysis["lyric_templates"] = lyric_templates

	# Prepare analysis summary
	analysis_summary = f"""
	### Music Analysis Results

	Duration: {duration:.2f} seconds
	Tempo: {tempo:.1f} BPM
	Time Signature: {time_signature} (Confidence: {time_sig_result["confidence"]:.1%})
	Key: {music_analysis["tonal_analysis"]["key"]} {music_analysis["tonal_analysis"]["mode"]}

	Emotions:
	- Primary: {primary_emotion} (Confidence: {emotion_scores[primary_emotion]:.1%})
	- Secondary: {secondary_emotion} (Confidence: {emotion_scores[secondary_emotion]:.1%})

	Themes:
	- Primary: {primary_theme} (Confidence: {theme_scores[primary_theme]:.1%})
	- Secondary: {secondary_theme} (Confidence: {theme_scores[secondary_theme]:.1%})

	Top Genre: {primary_genre}

	{genre_results_text}
	"""

	# Add beat analysis summary
	if lyric_templates:
	analysis_summary += f"""
	### Beat Analysis

	Total Phrases: {len(lyric_templates)}
	Average Beats Per Phrase: {np.mean([t['num_beats'] for t in lyric_templates]):.1f}
	Beat Pattern Examples:
	- Phrase 1: {lyric_templates[0]['stress_pattern'] if lyric_templates else 'N/A'}
	- Phrase 2: {lyric_templates[1]['stress_pattern'] if len(lyric_templates) > 1 else 'N/A'}
	"""

	# Check if genre is supported for lyrics generation
	genre_supported = any(genre.lower() in primary_genre.lower() for genre in beat_analyzer.supported_genres)

	# Generate lyrics only for supported genres
	if genre_supported:
	lyrics = generate_lyrics(music_analysis, primary_genre, duration, custom_prompt)
	beat_match_analysis = analyze_lyrics_rhythm_match(lyrics, lyric_templates, primary_genre)
	else:
	supported_genres_str = ", ".join([genre.capitalize() for genre in beat_analyzer.supported_genres])
	lyrics = f"Lyrics generation is only supported for the following genres: {supported_genres_str}.\n\nDetected genre '{primary_genre}' doesn't have strong syllable-to-beat patterns required for our lyric generation algorithm."
	beat_match_analysis = "Lyrics generation not available for this genre."

	return analysis_summary, lyrics, tempo, time_signature, primary_emotion, secondary_emotion, primary_theme, secondary_theme, primary_genre, beat_match_analysis

	except Exception as e:
	error_msg = f"Error processing audio: {str(e)}"
	print(error_msg)
	return error_msg, None, None, None, None, None, None, None, None, None

	def generate_lyrics(music_analysis, genre, duration, custom_prompt=""):
	try:
	# Extract meaningful information for context
	tempo = music_analysis["rhythm_analysis"]["tempo"]
	key = music_analysis["tonal_analysis"]["key"]
	mode = music_analysis["tonal_analysis"]["mode"]

	# Get both primary and secondary emotions and themes
	emotion_scores = music_analysis["emotion_analysis"]["emotion_scores"]
	sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True)
	primary_emotion = sorted_emotions[0][0]
	secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None

	theme_scores = music_analysis["theme_analysis"]["theme_scores"]
	sorted_themes = sorted(theme_scores.items(), key=lambda x: x[1], reverse=True)
	primary_theme = sorted_themes[0][0]
	secondary_theme = sorted_themes[1][0] if len(sorted_themes) > 1 else None

	# Get beat analysis and templates
	lyric_templates = music_analysis.get("lyric_templates", [])

	# Define num_phrases here to ensure it's available in all code paths
	# Also define syllable limits for the prompt
	if not lyric_templates:
	num_phrases_for_prompt = 4 # Default if no templates
	min_syl_for_prompt = 2
	max_syl_for_prompt = 7

	# Build the base prompt
	base_prompt = f'''You are a professional songwriter. Write song lyrics for a {genre} song.

	SONG DETAILS:
	- Key: {key} {mode}
	- Tempo: {tempo} BPM
	- Primary emotion: {primary_emotion}
	- Secondary emotion: {secondary_emotion}
	- Primary theme: {primary_theme}
	- Secondary theme: {secondary_theme}'''

	# Add custom requirements if provided
	custom_requirements = ""
	if custom_prompt and custom_prompt.strip():
	custom_requirements = f'''

	SPECIAL REQUIREMENTS FROM USER:
	{custom_prompt.strip()}
	Please incorporate these requirements while still following all the technical constraints below.'''

	prompt = base_prompt + custom_requirements + f'''

	CRITICAL REQUIREMENTS (MOST IMPORTANT):
	- You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics.
	- Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example:
	1. First lyric line.
	2. Second lyric line.
	...
	{num_phrases_for_prompt}. The final lyric line.
	- Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM.
	- NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT.
	- Count syllables carefully for the content of each numbered line.
	- Use SHORT WORDS and SHORT PHRASES for the content of each numbered line.
	- Break long thoughts into multiple numbered lines.

	CREATIVITY GUIDELINES:
	- Create original, vivid imagery that captures the emotions.
	- Use concrete, sensory details (what you see, hear, feel, touch).
	- Avoid clichés and common phrases.
	- Draw inspiration from the specific themes and emotions listed above.
	- Think about unique moments, specific objects, or personal details.
	- Use unexpected word combinations.
	- Focus on the particular mood created by {primary_emotion} and {secondary_emotion}.

	STYLE FOR SHORT LINES (for the content of each numbered line):
	- Use brief, impactful phrases.
	- Focus on single images or moments per line.
	- Choose simple, everyday words.
	- Let each line paint one clear picture.

	ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc.

	OUTPUT FORMAT:
	Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines.

	LYRICS:
	(Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space)

	Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.'''
	else:
	# Calculate the typical syllable range for this genre
	num_phrases_for_prompt = len(lyric_templates)
	max_syl_for_prompt = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('max_expected') else 7
	min_syl_for_prompt = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('min_expected') else 2

	# Build the base prompt
	base_prompt = f'''You are a professional songwriter. Write song lyrics for a {genre} song.

	SONG DETAILS:
	- Key: {key} {mode}
	- Tempo: {tempo} BPM
	- Primary emotion: {primary_emotion}
	- Secondary emotion: {secondary_emotion}
	- Primary theme: {primary_theme}
	- Secondary theme: {secondary_theme}'''

	# Add custom requirements if provided
	custom_requirements = ""
	if custom_prompt and custom_prompt.strip():
	custom_requirements = f'''

	SPECIAL REQUIREMENTS FROM USER:
	{custom_prompt.strip()}
	Please incorporate these requirements while still following all the technical constraints below.'''

	prompt = base_prompt + custom_requirements + f'''

	CRITICAL REQUIREMENTS (MOST IMPORTANT):
	- You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics.
	- Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example:
	1. First lyric line.
	2. Second lyric line.
	...
	{num_phrases_for_prompt}. The final lyric line.
	- Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM.
	- NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT.
	- Count syllables carefully for the content of each numbered line.
	- Use SHORT WORDS and SHORT PHRASES for the content of each numbered line.
	- Break long thoughts into multiple numbered lines.

	CREATIVITY GUIDELINES:
	- Create original, vivid imagery that captures the emotions.
	- Use concrete, sensory details (what you see, hear, feel, touch).
	- Avoid clichés and common phrases.
	- Draw inspiration from the specific themes and emotions listed above.
	- Think about unique moments, specific objects, or personal details.
	- Use unexpected word combinations.
	- Focus on the particular mood created by {primary_emotion} and {secondary_emotion}.

	STYLE FOR SHORT LINES (for the content of each numbered line):
	- Use brief, impactful phrases.
	- Focus on single images or moments per line.
	- Choose simple, everyday words.
	- Let each line paint one clear picture.

	ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc.

	OUTPUT FORMAT:
	Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines.

	LYRICS:
	(Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space)

	Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.'''
	# Generate with optimized parameters for QwQ model
	messages = [
	{"role": "user", "content": prompt}
	]

	# Apply chat template
	text = llm_tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Tokenize and move to model device
	model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)

	# Generate with optimized parameters for QwQ model
	generated_ids = llm_model.generate(
	**model_inputs,
	max_new_tokens=2048, # Increased from 1024 to give QwQ more room
	do_sample=True,
	temperature=0.6, # QwQ recommended setting
	top_p=0.95, # QwQ recommended setting
	top_k=30, # QwQ recommended range 20-40
	repetition_penalty=1.1, # Reduced to allow some repetition if needed
	pad_token_id=llm_tokenizer.eos_token_id
	)

	# Decode the output
	output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
	lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()

	# ENHANCED CLEANING FOR QWQ MODEL - IMPROVED APPROACH
	# ---------------------------------------------------

	# QwQ often includes thinking process - we need to extract only the final lyrics

	# 1. First, remove any thinking tags completely (QwQ specific)
	lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL \| re.IGNORECASE)
	lyrics = re.sub(r'<think>', '', lyrics, flags=re.IGNORECASE)
	lyrics = re.sub(r'</think>', '', lyrics, flags=re.IGNORECASE)

	# 2. Look for the LYRICS: section specifically
	lyrics_section_match = re.search(r'LYRICS:\s\n(.?)(?:\n\n\|\Z)', lyrics, re.DOTALL \| re.IGNORECASE)
	if lyrics_section_match:
	lyrics = lyrics_section_match.group(1).strip()
	else:
	# Fallback: look for other common transitions that indicate the start of actual lyrics
	lyric_start_patterns = [
	r'(?:here (?:are )?(?:the )?lyrics?:?\|lyrics?:?\|my lyrics?:?\|song lyrics?:?)\s*',
	r'(?:here (?:is )?(?:a )?song:?\|here (?:is )?my song:?)\s*',
	r'(?:\{3,}\|\={3,}\|\-{3,})\s',
	r'(?:final lyrics?:?\|the lyrics?:?)\s*',
	r'```\s*'
	]

	# Try to find where actual lyrics start
	lyrics_start_pos = 0
	for pattern in lyric_start_patterns:
	match = re.search(pattern, lyrics, re.IGNORECASE)
	if match:
	lyrics_start_pos = max(lyrics_start_pos, match.end())

	# Keep content from the identified start position
	if lyrics_start_pos > 0:
	lyrics = lyrics[lyrics_start_pos:].strip()

	# 3. Split into lines and apply basic filtering
	lines = lyrics.strip().split('\n')
	clean_lines = []

	# 4. Simple filtering - keep only actual lyric lines
	for line in lines:
	line = line.strip()
	if not line or line.isspace():
	continue

	# Strip leading numbers like "1. ", "2. ", etc.
	line = re.sub(r'^\d+\.\s*', '', line)

	line_lower = line.lower()

	# Remove placeholder lines - more comprehensive pattern
	if re.match(r'^\[ (line\|moment\|breath\|phrase\|word\|sound) \]$', line_lower):
	continue

	# Skip lines that are clearly not lyrics (simplified filtering)
	if any(phrase in line_lower for phrase in [
	'line 1', 'line 2', 'line 3',
	'thinking', 'lyrics:', 'format:', 'etc...', 'commentary',
	'syllables', 'requirements', 'output', 'provide'
	]):
	continue

	# Skip numbered annotations
	if re.match(r'^\d+[\.\):]\|^\[.*\]$', line):
	continue

	# Keep lines that look like actual lyrics (not too long, not too technical)
	words = line.split()
	if 1 <= len(words) <= 8 and not any(tech_word in line_lower for tech_word in [
	'syllable', 'beat', 'tempo', 'analysis', 'format', 'section'
	]):
	clean_lines.append(line)

	# 5. Additional cleanup for QwQ-specific issues
	# Remove any remaining thinking fragments
	final_clean_lines = []
	for line in clean_lines:
	# Remove trailing thoughts/annotations
	line = re.sub(r'\s+//.*$', '', line)
	line = re.sub(r'\s+$.*?$$', '', line)

	# Remove syllable count annotations
	line = re.sub(r'\s$\d+\ssyllables?$', '', line, flags=re.IGNORECASE)

	# Skip if the line became empty after cleaning
	if line.strip():
	final_clean_lines.append(line.strip())

	clean_lines = final_clean_lines

	# AGGRESSIVE SYLLABLE ENFORCEMENT - This is critical for beat matching
	if lyric_templates:
	max_allowed_syllables = max([t.get('max_expected', 6) for t in lyric_templates])
	min_allowed_syllables = min([t.get('min_expected', 2) for t in lyric_templates])
	else:
	max_allowed_syllables = 6
	min_allowed_syllables = 2

	# Enforce syllable limits on every line
	syllable_enforced_lines = []
	for line in clean_lines:
	words = line.split()
	current_syllables = sum(beat_analyzer.count_syllables(word) for word in words)

	# If line is within limits, keep it
	if min_allowed_syllables <= current_syllables <= max_allowed_syllables:
	syllable_enforced_lines.append(line)
	# If line is too long, we need to split it intelligently
	elif current_syllables > max_allowed_syllables:
	# Try to split into multiple shorter lines
	current_line = []
	current_count = 0

	for word in words:
	word_syllables = beat_analyzer.count_syllables(word)

	# If adding this word would exceed limit, start new line
	if current_count + word_syllables > max_allowed_syllables and current_line:
	syllable_enforced_lines.append(" ".join(current_line))
	current_line = [word]
	current_count = word_syllables
	else:
	# Add the word to the current line
	current_line.append(word)
	current_count += word_syllables

	# Add the remaining words as final line
	if current_line and current_count >= min_allowed_syllables:
	syllable_enforced_lines.append(" ".join(current_line))
	# Skip lines that are too short

	clean_lines = syllable_enforced_lines

	# Get required number of lines
	if lyric_templates:
	num_required = len(lyric_templates)
	else:
	num_required = 4

	# IMPORTANT: Adjust line count to match requirement
	if len(clean_lines) > num_required:
	# Too many lines - try to merge adjacent short lines first
	merged_lines = []
	i = 0

	while i < len(clean_lines) and len(merged_lines) < num_required:
	if i + 1 < len(clean_lines) and len(merged_lines) < num_required - 1:
	# Check if we can merge current and next line
	line1 = clean_lines[i]
	line2 = clean_lines[i + 1]

	words1 = line1.split()
	words2 = line2.split()

	syllables1 = sum(beat_analyzer.count_syllables(word) for word in words1)
	syllables2 = sum(beat_analyzer.count_syllables(word) for word in words2)

	# If merging would stay within limits, merge them
	if syllables1 + syllables2 <= max_allowed_syllables:
	merged_lines.append(line1 + " " + line2)
	i += 2
	else:
	merged_lines.append(line1)
	i += 1
	else:
	merged_lines.append(clean_lines[i])
	i += 1

	# If still too many, truncate to required number
	clean_lines = merged_lines[:num_required]

	elif len(clean_lines) < num_required:
	# Too few lines - this is a generation failure
	# Instead of error, try to pad with empty lines or regenerate
	# For now, let's return an error message
	return f"Error: The model generated {len(clean_lines)} lines but {num_required} were required. Please try again."

	# Final check - ensure we have exactly the required number
	if len(clean_lines) != num_required:
	# If we still don't have the right number, truncate or pad
	if len(clean_lines) > num_required:
	clean_lines = clean_lines[:num_required]
	else:
	# This shouldn't happen with the above logic, but just in case
	return f"Error: Could not generate exactly {num_required} lines. Please try again."

	# Assemble final lyrics
	final_lyrics = '\n'.join(clean_lines)

	# Final sanity check - if we have nothing or very little, return an error
	if not final_lyrics or len(final_lyrics.strip()) < 15:
	return "The model output appears to be mostly thinking content. Please try regenerating for cleaner lyrics."

	return final_lyrics

	except Exception as e:
	error_msg = f"Error generating lyrics: {str(e)}"
	print(error_msg)
	return error_msg

	def analyze_lyrics_rhythm_match(lyrics, lyric_templates, genre="pop"):
	"""Analyze how well the generated lyrics match the beat patterns and syllable requirements"""
	if not lyric_templates or not lyrics:
	return "No beat templates or lyrics available for analysis."

	# Split lyrics into lines
	lines = lyrics.strip().split('\n')
	lines = [line for line in lines if line.strip()] # Remove empty lines

	# Prepare analysis result
	result = "### Beat & Syllable Match Analysis\n\n"
	result += "\| Line \| Syllables \| Target Range \| Match \| Stress Pattern \|\n"
	result += "\| ---- \| --------- \| ------------ \| ----- \| -------------- \|\n"

	# Maximum number of lines to analyze (either all lines or all templates)
	line_count = min(len(lines), len(lyric_templates))

	# Track overall match statistics
	total_matches = 0
	total_range_matches = 0
	total_stress_matches = 0
	total_stress_percentage = 0
	total_ideal_matches = 0

	for i in range(line_count):
	line = lines[i]
	template = lyric_templates[i]

	# Check match between line and template with genre awareness
	check_result = beat_analyzer.check_syllable_stress_match(line, template, genre)

	# Get match symbols
	if check_result["close_to_ideal"]:
	syllable_match = "✓" # Ideal or very close
	elif check_result["within_range"]:
	syllable_match = "✓*" # Within range but not ideal
	else:
	syllable_match = "✗" # Outside range

	stress_match = "✓" if check_result["stress_matches"] else f"{int(check_result['stress_match_percentage']*100)}%"

	# Update stats
	if check_result["close_to_ideal"]:
	total_matches += 1
	total_ideal_matches += 1
	elif check_result["within_range"]:
	total_range_matches += 1

	if check_result["stress_matches"]:
	total_stress_matches += 1
	total_stress_percentage += check_result["stress_match_percentage"]

	# Create visual representation of the stress pattern
	stress_visual = ""
	for char in template['stress_pattern']:
	if char == "S":
	stress_visual += "X" # Strong
	elif char == "M":
	stress_visual += "x" # Medium
	else:
	stress_visual += "." # Weak

	# Add line to results table
	result += f"\| {i+1} \| {check_result['syllable_count']} \| {check_result['min_expected']}-{check_result['max_expected']} \| {syllable_match} \| {stress_visual} \|\n"

	# Add summary statistics
	if line_count > 0:
	exact_match_rate = (total_matches / line_count) * 100
	range_match_rate = ((total_matches + total_range_matches) / line_count) * 100
	ideal_match_rate = (total_ideal_matches / line_count) * 100
	stress_match_rate = (total_stress_matches / line_count) * 100
	avg_stress_percentage = (total_stress_percentage / line_count) * 100

	result += f"\nSummary:\n"
	result += f"- Ideal or near-ideal syllable match rate: {exact_match_rate:.1f}%\n"
	result += f"- Genre-appropriate syllable range match rate: {range_match_rate:.1f}%\n"
	result += f"- Perfect stress pattern match rate: {stress_match_rate:.1f}%\n"
	result += f"- Average stress pattern accuracy: {avg_stress_percentage:.1f}%\n"
	result += f"- Overall rhythmic accuracy: {((range_match_rate + avg_stress_percentage) / 2):.1f}%\n"

	# Analyze sentence flow across lines
	sentence_flow_analysis = analyze_sentence_flow(lines)
	result += f"\nSentence Flow Analysis:\n"
	result += f"- Connected thought groups: {sentence_flow_analysis['connected_groups']} detected\n"
	result += f"- Average lines per thought: {sentence_flow_analysis['avg_lines_per_group']:.1f}\n"
	result += f"- Flow quality: {sentence_flow_analysis['flow_quality']}\n"

	# Add guidance on ideal distribution for syllables and sentence flow
	result += f"\nSyllable & Flow Guidance:\n"
	result += f"- Aim for {min([t.get('min_expected', 3) for t in lyric_templates])}-{max([t.get('max_expected', 7) for t in lyric_templates])} syllables per line\n"
	result += f"- Break complete thoughts across 2-3 lines for natural flow\n"
	result += f"- Connect your lyrics with sentence fragments that flow across lines\n"
	result += f"- Use conjunctions, prepositions, and dependent clauses to connect lines\n"

	# Add genre-specific notes
	result += f"\nGenre Notes ({genre}):\n"

	# Add appropriate genre notes based on genre
	if genre.lower() == "pop":
	result += "- Pop lyrics work well with thoughts spanning 2-3 musical phrases\n"
	result += "- Create flow by connecting lines with transitions like 'as', 'when', 'through'\n"
	elif genre.lower() == "rock":
	result += "- Rock lyrics benefit from short phrases that build into complete thoughts\n"
	result += "- Use line breaks strategically to emphasize key words\n"
	elif genre.lower() == "country":
	result += "- Country lyrics tell stories that flow naturally across multiple lines\n"
	result += "- Connect narrative elements across phrases for authentic storytelling\n"
	elif genre.lower() == "disco":
	result += "- Disco lyrics work well with phrases that create rhythmic momentum\n"
	result += "- Use line transitions that maintain energy and flow\n"
	elif genre.lower() == "metal":
	result += "- Metal lyrics can create intensity by breaking phrases at dramatic points\n"
	result += "- Connect lines to build tension and release across measures\n"
	else:
	result += "- This genre works well with connected thoughts across multiple lines\n"
	result += "- Aim for natural speech flow rather than complete thoughts per line\n"

	return result

	def analyze_sentence_flow(lines):
	"""Analyze how well the lyrics create sentence flow across multiple lines"""
	if not lines or len(lines) < 2:
	return {
	"connected_groups": 0,
	"avg_lines_per_group": 0,
	"flow_quality": "Insufficient lines to analyze"
	}

	# Simplified analysis looking for grammatical clues of sentence continuation
	continuation_starters = [
	'and', 'but', 'or', 'nor', 'for', 'yet', 'so', # Coordinating conjunctions
	'as', 'when', 'while', 'before', 'after', 'since', 'until', 'because', 'although', 'though', # Subordinating conjunctions
	'with', 'without', 'through', 'throughout', 'beyond', 'beneath', 'under', 'over', 'into', 'onto', # Prepositions
	'to', 'from', 'by', 'at', 'in', 'on', 'of', # Common prepositions
	'where', 'how', 'who', 'whom', 'whose', 'which', 'that', # Relative pronouns
	'if', 'then', # Conditional connectors
	]

	# Check for lines that likely continue a thought from previous line
	connected_lines = []
	potential_groups = []
	current_group = [0] # Start with first line

	for i in range(1, len(lines)):
	# Check if line starts with a continuation word
	words = lines[i].lower().split()

	# Empty line or no words
	if not words:
	if len(current_group) > 1: # Only consider groups of 2+ lines
	potential_groups.append(current_group.copy())
	current_group = [i]
	continue

	# Check first word for continuation clues
	first_word = words[0].strip(',.!?;:')
	if first_word in continuation_starters:
	connected_lines.append(i)
	current_group.append(i)
	# Check for absence of capitalization as continuation clue
	elif not first_word[0].isupper() and first_word[0].isalpha():
	connected_lines.append(i)
	current_group.append(i)
	# Check if current line is very short (likely part of a continued thought)
	elif len(words) <= 3 and i < len(lines) - 1:
	# Look ahead to see if next line could be a continuation
	if i+1 < len(lines):
	next_words = lines[i+1].lower().split()
	if next_words and next_words[0] in continuation_starters:
	connected_lines.append(i)
	current_group.append(i)
	else:
	# This might end a group
	if len(current_group) > 1: # Only consider groups of 2+ lines
	potential_groups.append(current_group.copy())
	current_group = [i]
	else:
	# This likely starts a new thought
	if len(current_group) > 1: # Only consider groups of 2+ lines
	potential_groups.append(current_group.copy())
	current_group = [i]

	# Add the last group if it has multiple lines
	if len(current_group) > 1:
	potential_groups.append(current_group)

	# Calculate metrics
	connected_groups = len(potential_groups)

	if connected_groups > 0:
	avg_lines_per_group = sum(len(group) for group in potential_groups) / connected_groups

	# Determine flow quality
	if connected_groups >= len(lines) / 3 and avg_lines_per_group >= 2.5:
	flow_quality = "Excellent - multiple connected thoughts across lines"
	elif connected_groups >= len(lines) / 4 and avg_lines_per_group >= 2:
	flow_quality = "Good - some connected thoughts across lines"
	elif connected_groups > 0:
	flow_quality = "Fair - limited connection between lines"
	else:
	flow_quality = "Poor - mostly independent lines"
	else:
	avg_lines_per_group = 0
	flow_quality = "Poor - no connected thoughts detected"

	return {
	"connected_groups": connected_groups,
	"avg_lines_per_group": avg_lines_per_group,
	"flow_quality": flow_quality
	}

	def enforce_syllable_limits(lines, max_syllables=6):
	"""
	Enforce syllable limits by splitting or truncating lines that are too long.
	Returns a modified list of lines where no line exceeds max_syllables.
	"""
	if not lines:
	return []

	result_lines = []

	for line in lines:
	words = line.split()
	if not words:
	continue

	# Count syllables in the line
	syllable_count = sum(beat_analyzer.count_syllables(word) for word in words)

	# If within limits, keep the line as is
	if syllable_count <= max_syllables:
	result_lines.append(line)
	continue

	# Line is too long - we need to split or truncate it
	current_line = []
	current_syllables = 0

	for word in words:
	word_syllables = beat_analyzer.count_syllables(word)

	# If adding this word would exceed the limit, start a new line
	if current_syllables + word_syllables > max_syllables and current_line:
	result_lines.append(" ".join(current_line))
	current_line = [word]
	current_syllables = word_syllables
	else:
	# Add the word to the current line
	current_line.append(word)
	current_syllables += word_syllables

	# Don't forget the last line if there are words left
	if current_line:
	result_lines.append(" ".join(current_line))

	return result_lines

	# Create Gradio interface
	def create_interface():
	with gr.Blocks(title="Advanced Music Analysis & Beat-Matched Lyrics Generator") as demo:
	gr.Markdown("# 🎵 Advanced Music Analysis & Beat-Matched Lyrics Generator")
	gr.Markdown("Upload music to get comprehensive analysis and generate perfectly synchronized lyrics that match the rhythm, emotion, and structure of your audio")

	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(
	label="🎧 Upload or Record Audio",
	type="filepath",
	sources=["upload", "microphone"]
	)

	# Add custom prompt input
	custom_prompt_input = gr.Textbox(
	label="🎨 Custom Lyrics Requirements (Optional)",
	placeholder="e.g., 'Write about a rainy day in the city' or 'Include metaphors about flying' or 'Make it about overcoming challenges'",
	lines=3,
	info="Add any specific requirements, themes, or creative directions for the lyrics. This will be merged with the music analysis to create personalized lyrics."
	)

	analyze_btn = gr.Button("🚀 Analyze Music & Generate Lyrics", variant="primary", size="lg")

	with gr.Column(scale=2):
	with gr.Tab("📊 Music Analysis"):
	analysis_output = gr.Textbox(label="Comprehensive Music Analysis Results", lines=10)

	with gr.Row():
	tempo_output = gr.Number(label="🥁 Tempo (BPM)")
	time_sig_output = gr.Textbox(label="⏱️ Time Signature")

	with gr.Row():
	primary_emotion_output = gr.Textbox(label="😊 Primary Emotion")
	secondary_emotion_output = gr.Textbox(label="😌 Secondary Emotion")

	with gr.Row():
	primary_theme_output = gr.Textbox(label="🎭 Primary Theme")
	secondary_theme_output = gr.Textbox(label="🎪 Secondary Theme")
	genre_output = gr.Textbox(label="🎼 Primary Genre")

	with gr.Tab("🎤 Generated Lyrics"):
	lyrics_output = gr.Textbox(label="Beat-Synchronized Lyrics", lines=20)

	with gr.Tab("🎯 Beat Matching Analysis"):
	beat_match_output = gr.Markdown(label="Rhythm & Syllable Synchronization Analysis")

	# Set up event handlers
	analyze_btn.click(
	fn=process_audio,
	inputs=[audio_input, custom_prompt_input],
	outputs=[
	analysis_output, lyrics_output, tempo_output, time_sig_output,
	primary_emotion_output, secondary_emotion_output,
	primary_theme_output, secondary_theme_output,
	genre_output, beat_match_output
	]
	)

	# Format supported genres for display
	supported_genres_md = "\n".join([f"- {genre.capitalize()}: Optimized for {genre} music patterns" for genre in beat_analyzer.supported_genres])

	gr.Markdown(f"""
	## 🚀 How It Works

	1. 🎧 Upload Audio: Support for various formats (MP3, WAV, etc.) or record directly in your browser
	2. 🎨 Add Custom Requirements (Optional): Specify your creative vision, themes, or style preferences
	3. 🔍 Advanced Analysis: Multi-layered analysis including:
	- Tempo & Time Signature: Advanced detection using multiple algorithms
	- Emotional Profiling: 8-dimensional emotion mapping (happy, sad, excited, calm, etc.)
	- Thematic Analysis: Musical themes (love, triumph, adventure, reflection, etc.)
	- Beat Pattern Extraction: Precise rhythm and stress pattern identification
	- Genre Classification: AI-powered genre detection with confidence scores
	4. 🎤 Lyrics Generation: AI creates perfectly synchronized lyrics that:
	- Match Beat Patterns: Each line aligns with musical phrases and rhythm
	- Follow Syllable Constraints: Precise syllable-to-beat mapping for natural flow
	- Incorporate Emotions & Themes: Blend detected musical characteristics
	- Include Your Requirements: Merge your creative directions seamlessly
	5. 📊 Quality Analysis: Comprehensive metrics showing beat matching accuracy and flow quality

	## 🎨 Custom Requirements Examples

	🌟 Themes: "Write about nature and freedom", "Focus on urban nightlife", "Tell a story about friendship"

	🖼️ Imagery: "Use ocean metaphors", "Include references to stars and sky", "Focus on light and shadow"

	👁️ Perspective: "From a child's viewpoint", "Make it nostalgic", "Focus on hope and resilience"

	✍️ Style: "Use simple everyday language", "Include some rhyming", "Make it conversational"

	📝 Content: "Avoid sad themes", "Include words 'journey' and 'home'", "Focus on personal growth"

	The system intelligently blends your requirements with detected musical characteristics to create personalized, rhythm-perfect lyrics.

	## 🎵 Supported Genres for Full Lyrics Generation

	✅ Full Support (Complete Analysis + Beat-Matched Lyrics):
	{supported_genres_md}

	These genres have consistent syllable-to-beat patterns that work optimally with our advanced rhythm-matching algorithm.

	📊 Analysis Only: All other genres receive comprehensive musical analysis (tempo, emotion, themes, etc.) without lyrics generation.

	## 🛠️ Advanced Features

	- 🎯 Beat Synchronization: Syllable-perfect alignment with musical phrases
	- 🧠 Emotion Integration: Lyrics reflect detected emotional characteristics
	- 🎭 Theme Incorporation: Musical themes guide lyrical content
	- 📏 Quality Metrics: Detailed analysis of rhythm matching accuracy
	- 🔄 Flow Optimization: Natural sentence continuation across lines
	- ⚙️ Genre Optimization: Tailored patterns for different musical styles
	""")

	return demo

	# Launch the app
	demo = create_interface()

	if __name__ == "__main__":
	demo.launch()
	else:
	# For Hugging Face Spaces
	app = demo