|
import os |
|
import io |
|
import gradio as gr |
|
import torch |
|
import numpy as np |
|
import re |
|
import pronouncing |
|
import functools |
|
from transformers import ( |
|
AutoModelForAudioClassification, |
|
AutoFeatureExtractor, |
|
AutoTokenizer, |
|
pipeline, |
|
AutoModelForCausalLM, |
|
BitsAndBytesConfig |
|
) |
|
from huggingface_hub import login |
|
from utils import ( |
|
load_audio, |
|
extract_audio_duration, |
|
extract_mfcc_features, |
|
format_genre_results, |
|
ensure_cuda_availability |
|
) |
|
from emotionanalysis import MusicAnalyzer |
|
import librosa |
|
from beat_analysis import BeatAnalyzer |
|
|
|
|
|
beat_analyzer = BeatAnalyzer() |
|
|
|
|
|
if "HF_TOKEN" in os.environ: |
|
login(token=os.environ["HF_TOKEN"]) |
|
|
|
|
|
GENRE_MODEL_NAME = "dima806/music_genres_classification" |
|
MUSIC_DETECTION_MODEL = "MIT/ast-finetuned-audioset-10-10-0.4593" |
|
LLM_MODEL_NAME = "Qwen/QwQ-32B" |
|
SAMPLE_RATE = 22050 |
|
|
|
|
|
CUDA_AVAILABLE = ensure_cuda_availability() |
|
|
|
|
|
print("Loading genre classification model...") |
|
try: |
|
genre_feature_extractor = AutoFeatureExtractor.from_pretrained(GENRE_MODEL_NAME) |
|
genre_model = AutoModelForAudioClassification.from_pretrained( |
|
GENRE_MODEL_NAME, |
|
device_map="auto" if CUDA_AVAILABLE else None |
|
) |
|
|
|
def get_genre_model(): |
|
return genre_model, genre_feature_extractor |
|
except Exception as e: |
|
print(f"Error loading genre model: {str(e)}") |
|
genre_model = None |
|
genre_feature_extractor = None |
|
|
|
|
|
print("Loading Qwen QwQ-32B model with 4-bit quantization...") |
|
try: |
|
|
|
quantization_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_quant_type="nf4", |
|
bnb_4bit_compute_dtype=torch.float16, |
|
bnb_4bit_use_double_quant=True |
|
) |
|
|
|
llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME) |
|
llm_model = AutoModelForCausalLM.from_pretrained( |
|
LLM_MODEL_NAME, |
|
quantization_config=quantization_config, |
|
device_map="auto", |
|
trust_remote_code=True, |
|
torch_dtype=torch.float16, |
|
use_cache=True |
|
) |
|
except Exception as e: |
|
print(f"Error loading LLM model: {str(e)}") |
|
llm_tokenizer = None |
|
llm_model = None |
|
|
|
|
|
music_analyzer = MusicAnalyzer() |
|
|
|
|
|
def process_audio(audio_file, custom_prompt=""): |
|
if audio_file is None: |
|
return "No audio file provided", None, None, None, None, None, None, None, None, None |
|
|
|
try: |
|
|
|
y, sr = load_audio(audio_file, sr=SAMPLE_RATE) |
|
|
|
|
|
duration = extract_audio_duration(y, sr) |
|
|
|
|
|
time_sig_result = beat_analyzer.detect_time_signature(audio_file) |
|
time_signature = time_sig_result["time_signature"] |
|
|
|
|
|
music_analysis = music_analyzer.analyze_music(audio_file) |
|
|
|
|
|
tempo = music_analysis["rhythm_analysis"]["tempo"] |
|
|
|
|
|
emotion_scores = music_analysis["emotion_analysis"]["emotion_scores"] |
|
sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True) |
|
primary_emotion = sorted_emotions[0][0] |
|
secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None |
|
|
|
|
|
theme_scores = music_analysis["theme_analysis"]["theme_scores"] |
|
sorted_themes = sorted(theme_scores.items(), key=lambda x: x[1], reverse=True) |
|
primary_theme = sorted_themes[0][0] |
|
secondary_theme = sorted_themes[1][0] if len(sorted_themes) > 1 else None |
|
|
|
|
|
if genre_model is not None and genre_feature_extractor is not None: |
|
|
|
y_16k = librosa.resample(y, orig_sr=sr, target_sr=16000) |
|
|
|
|
|
inputs = genre_feature_extractor( |
|
y_16k, |
|
sampling_rate=16000, |
|
return_tensors="pt" |
|
).to(genre_model.device) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = genre_model(**inputs) |
|
logits = outputs.logits |
|
probs = torch.nn.functional.softmax(logits, dim=-1) |
|
|
|
|
|
values, indices = torch.topk(probs[0], k=5) |
|
top_genres = [(genre_model.config.id2label[idx.item()], val.item()) for val, idx in zip(values, indices)] |
|
else: |
|
|
|
top_genres = [("Unknown", 1.0)] |
|
|
|
|
|
genre_results_text = format_genre_results(top_genres) |
|
primary_genre = top_genres[0][0] |
|
|
|
|
|
if time_signature not in ["4/4", "3/4", "6/8"]: |
|
time_signature = "4/4" |
|
|
|
|
|
beat_analysis = beat_analyzer.analyze_beat_pattern(audio_file, time_signature=time_signature, auto_detect=False) |
|
lyric_templates = beat_analyzer.create_lyric_template(beat_analysis) |
|
|
|
|
|
music_analysis["beat_analysis"] = beat_analysis |
|
music_analysis["lyric_templates"] = lyric_templates |
|
|
|
|
|
analysis_summary = f""" |
|
### Music Analysis Results |
|
|
|
**Duration:** {duration:.2f} seconds |
|
**Tempo:** {tempo:.1f} BPM |
|
**Time Signature:** {time_signature} (Confidence: {time_sig_result["confidence"]:.1%}) |
|
**Key:** {music_analysis["tonal_analysis"]["key"]} {music_analysis["tonal_analysis"]["mode"]} |
|
|
|
**Emotions:** |
|
- Primary: {primary_emotion} (Confidence: {emotion_scores[primary_emotion]:.1%}) |
|
- Secondary: {secondary_emotion} (Confidence: {emotion_scores[secondary_emotion]:.1%}) |
|
|
|
**Themes:** |
|
- Primary: {primary_theme} (Confidence: {theme_scores[primary_theme]:.1%}) |
|
- Secondary: {secondary_theme} (Confidence: {theme_scores[secondary_theme]:.1%}) |
|
|
|
**Top Genre:** {primary_genre} |
|
|
|
{genre_results_text} |
|
""" |
|
|
|
|
|
if lyric_templates: |
|
analysis_summary += f""" |
|
### Beat Analysis |
|
|
|
**Total Phrases:** {len(lyric_templates)} |
|
**Average Beats Per Phrase:** {np.mean([t['num_beats'] for t in lyric_templates]):.1f} |
|
**Beat Pattern Examples:** |
|
- Phrase 1: {lyric_templates[0]['stress_pattern'] if lyric_templates else 'N/A'} |
|
- Phrase 2: {lyric_templates[1]['stress_pattern'] if len(lyric_templates) > 1 else 'N/A'} |
|
""" |
|
|
|
|
|
genre_supported = any(genre.lower() in primary_genre.lower() for genre in beat_analyzer.supported_genres) |
|
|
|
|
|
if genre_supported: |
|
lyrics = generate_lyrics(music_analysis, primary_genre, duration, custom_prompt) |
|
beat_match_analysis = analyze_lyrics_rhythm_match(lyrics, lyric_templates, primary_genre) |
|
else: |
|
supported_genres_str = ", ".join([genre.capitalize() for genre in beat_analyzer.supported_genres]) |
|
lyrics = f"Lyrics generation is only supported for the following genres: {supported_genres_str}.\n\nDetected genre '{primary_genre}' doesn't have strong syllable-to-beat patterns required for our lyric generation algorithm." |
|
beat_match_analysis = "Lyrics generation not available for this genre." |
|
|
|
return analysis_summary, lyrics, tempo, time_signature, primary_emotion, secondary_emotion, primary_theme, secondary_theme, primary_genre, beat_match_analysis |
|
|
|
except Exception as e: |
|
error_msg = f"Error processing audio: {str(e)}" |
|
print(error_msg) |
|
return error_msg, None, None, None, None, None, None, None, None, None |
|
|
|
def generate_lyrics(music_analysis, genre, duration, custom_prompt=""): |
|
try: |
|
|
|
tempo = music_analysis["rhythm_analysis"]["tempo"] |
|
key = music_analysis["tonal_analysis"]["key"] |
|
mode = music_analysis["tonal_analysis"]["mode"] |
|
|
|
|
|
emotion_scores = music_analysis["emotion_analysis"]["emotion_scores"] |
|
sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True) |
|
primary_emotion = sorted_emotions[0][0] |
|
secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None |
|
|
|
theme_scores = music_analysis["theme_analysis"]["theme_scores"] |
|
sorted_themes = sorted(theme_scores.items(), key=lambda x: x[1], reverse=True) |
|
primary_theme = sorted_themes[0][0] |
|
secondary_theme = sorted_themes[1][0] if len(sorted_themes) > 1 else None |
|
|
|
|
|
lyric_templates = music_analysis.get("lyric_templates", []) |
|
|
|
|
|
|
|
if not lyric_templates: |
|
num_phrases_for_prompt = 4 |
|
min_syl_for_prompt = 2 |
|
max_syl_for_prompt = 7 |
|
|
|
|
|
base_prompt = f'''You are a professional songwriter. Write song lyrics for a {genre} song. |
|
|
|
SONG DETAILS: |
|
- Key: {key} {mode} |
|
- Tempo: {tempo} BPM |
|
- Primary emotion: {primary_emotion} |
|
- Secondary emotion: {secondary_emotion} |
|
- Primary theme: {primary_theme} |
|
- Secondary theme: {secondary_theme}''' |
|
|
|
|
|
custom_requirements = "" |
|
if custom_prompt and custom_prompt.strip(): |
|
custom_requirements = f''' |
|
|
|
SPECIAL REQUIREMENTS FROM USER: |
|
{custom_prompt.strip()} |
|
Please incorporate these requirements while still following all the technical constraints below.''' |
|
|
|
prompt = base_prompt + custom_requirements + f''' |
|
|
|
CRITICAL REQUIREMENTS (MOST IMPORTANT): |
|
- You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics. |
|
- Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example: |
|
1. First lyric line. |
|
2. Second lyric line. |
|
... |
|
{num_phrases_for_prompt}. The final lyric line. |
|
- Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM. |
|
- NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT. |
|
- Count syllables carefully for the content of each numbered line. |
|
- Use SHORT WORDS and SHORT PHRASES for the content of each numbered line. |
|
- Break long thoughts into multiple numbered lines. |
|
|
|
CREATIVITY GUIDELINES: |
|
- Create original, vivid imagery that captures the emotions. |
|
- Use concrete, sensory details (what you see, hear, feel, touch). |
|
- Avoid clichΓ©s and common phrases. |
|
- Draw inspiration from the specific themes and emotions listed above. |
|
- Think about unique moments, specific objects, or personal details. |
|
- Use unexpected word combinations. |
|
- Focus on the particular mood created by {primary_emotion} and {secondary_emotion}. |
|
|
|
STYLE FOR SHORT LINES (for the content of each numbered line): |
|
- Use brief, impactful phrases. |
|
- Focus on single images or moments per line. |
|
- Choose simple, everyday words. |
|
- Let each line paint one clear picture. |
|
|
|
ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc. |
|
|
|
OUTPUT FORMAT: |
|
Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines. |
|
|
|
LYRICS: |
|
(Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space) |
|
|
|
Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''' |
|
else: |
|
|
|
num_phrases_for_prompt = len(lyric_templates) |
|
max_syl_for_prompt = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('max_expected') else 7 |
|
min_syl_for_prompt = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('min_expected') else 2 |
|
|
|
|
|
base_prompt = f'''You are a professional songwriter. Write song lyrics for a {genre} song. |
|
|
|
SONG DETAILS: |
|
- Key: {key} {mode} |
|
- Tempo: {tempo} BPM |
|
- Primary emotion: {primary_emotion} |
|
- Secondary emotion: {secondary_emotion} |
|
- Primary theme: {primary_theme} |
|
- Secondary theme: {secondary_theme}''' |
|
|
|
|
|
custom_requirements = "" |
|
if custom_prompt and custom_prompt.strip(): |
|
custom_requirements = f''' |
|
|
|
SPECIAL REQUIREMENTS FROM USER: |
|
{custom_prompt.strip()} |
|
Please incorporate these requirements while still following all the technical constraints below.''' |
|
|
|
prompt = base_prompt + custom_requirements + f''' |
|
|
|
CRITICAL REQUIREMENTS (MOST IMPORTANT): |
|
- You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics. |
|
- Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example: |
|
1. First lyric line. |
|
2. Second lyric line. |
|
... |
|
{num_phrases_for_prompt}. The final lyric line. |
|
- Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM. |
|
- NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT. |
|
- Count syllables carefully for the content of each numbered line. |
|
- Use SHORT WORDS and SHORT PHRASES for the content of each numbered line. |
|
- Break long thoughts into multiple numbered lines. |
|
|
|
CREATIVITY GUIDELINES: |
|
- Create original, vivid imagery that captures the emotions. |
|
- Use concrete, sensory details (what you see, hear, feel, touch). |
|
- Avoid clichΓ©s and common phrases. |
|
- Draw inspiration from the specific themes and emotions listed above. |
|
- Think about unique moments, specific objects, or personal details. |
|
- Use unexpected word combinations. |
|
- Focus on the particular mood created by {primary_emotion} and {secondary_emotion}. |
|
|
|
STYLE FOR SHORT LINES (for the content of each numbered line): |
|
- Use brief, impactful phrases. |
|
- Focus on single images or moments per line. |
|
- Choose simple, everyday words. |
|
- Let each line paint one clear picture. |
|
|
|
ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc. |
|
|
|
OUTPUT FORMAT: |
|
Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines. |
|
|
|
LYRICS: |
|
(Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space) |
|
|
|
Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''' |
|
|
|
messages = [ |
|
{"role": "user", "content": prompt} |
|
] |
|
|
|
|
|
text = llm_tokenizer.apply_chat_template( |
|
messages, |
|
tokenize=False, |
|
add_generation_prompt=True |
|
) |
|
|
|
|
|
model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device) |
|
|
|
|
|
generated_ids = llm_model.generate( |
|
**model_inputs, |
|
max_new_tokens=2048, |
|
do_sample=True, |
|
temperature=0.6, |
|
top_p=0.95, |
|
top_k=30, |
|
repetition_penalty=1.1, |
|
pad_token_id=llm_tokenizer.eos_token_id |
|
) |
|
|
|
|
|
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() |
|
lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL | re.IGNORECASE) |
|
lyrics = re.sub(r'<think>', '', lyrics, flags=re.IGNORECASE) |
|
lyrics = re.sub(r'</think>', '', lyrics, flags=re.IGNORECASE) |
|
|
|
|
|
lyrics_section_match = re.search(r'LYRICS:\s*\n(.*?)(?:\n\n|\Z)', lyrics, re.DOTALL | re.IGNORECASE) |
|
if lyrics_section_match: |
|
lyrics = lyrics_section_match.group(1).strip() |
|
else: |
|
|
|
lyric_start_patterns = [ |
|
r'(?:here (?:are )?(?:the )?lyrics?:?|lyrics?:?|my lyrics?:?|song lyrics?:?)\s*', |
|
r'(?:here (?:is )?(?:a )?song:?|here (?:is )?my song:?)\s*', |
|
r'(?:\*{3,}|\={3,}|\-{3,})\s*', |
|
r'(?:final lyrics?:?|the lyrics?:?)\s*', |
|
r'```\s*' |
|
] |
|
|
|
|
|
lyrics_start_pos = 0 |
|
for pattern in lyric_start_patterns: |
|
match = re.search(pattern, lyrics, re.IGNORECASE) |
|
if match: |
|
lyrics_start_pos = max(lyrics_start_pos, match.end()) |
|
|
|
|
|
if lyrics_start_pos > 0: |
|
lyrics = lyrics[lyrics_start_pos:].strip() |
|
|
|
|
|
lines = lyrics.strip().split('\n') |
|
clean_lines = [] |
|
|
|
|
|
for line in lines: |
|
line = line.strip() |
|
if not line or line.isspace(): |
|
continue |
|
|
|
|
|
line = re.sub(r'^\d+\.\s*', '', line) |
|
|
|
line_lower = line.lower() |
|
|
|
|
|
if re.match(r'^\[ *(line|moment|breath|phrase|word|sound) *\]$', line_lower): |
|
continue |
|
|
|
|
|
if any(phrase in line_lower for phrase in [ |
|
'line 1', 'line 2', 'line 3', |
|
'thinking', 'lyrics:', 'format:', 'etc...', 'commentary', |
|
'syllables', 'requirements', 'output', 'provide' |
|
]): |
|
continue |
|
|
|
|
|
if re.match(r'^\d+[\.\):]|^\[.*\]$', line): |
|
continue |
|
|
|
|
|
words = line.split() |
|
if 1 <= len(words) <= 8 and not any(tech_word in line_lower for tech_word in [ |
|
'syllable', 'beat', 'tempo', 'analysis', 'format', 'section' |
|
]): |
|
clean_lines.append(line) |
|
|
|
|
|
|
|
final_clean_lines = [] |
|
for line in clean_lines: |
|
|
|
line = re.sub(r'\s+//.*$', '', line) |
|
line = re.sub(r'\s+\(.*?\)$', '', line) |
|
|
|
|
|
line = re.sub(r'\s*\(\d+\s*syllables?\)', '', line, flags=re.IGNORECASE) |
|
|
|
|
|
if line.strip(): |
|
final_clean_lines.append(line.strip()) |
|
|
|
clean_lines = final_clean_lines |
|
|
|
|
|
if lyric_templates: |
|
max_allowed_syllables = max([t.get('max_expected', 6) for t in lyric_templates]) |
|
min_allowed_syllables = min([t.get('min_expected', 2) for t in lyric_templates]) |
|
else: |
|
max_allowed_syllables = 6 |
|
min_allowed_syllables = 2 |
|
|
|
|
|
syllable_enforced_lines = [] |
|
for line in clean_lines: |
|
words = line.split() |
|
current_syllables = sum(beat_analyzer.count_syllables(word) for word in words) |
|
|
|
|
|
if min_allowed_syllables <= current_syllables <= max_allowed_syllables: |
|
syllable_enforced_lines.append(line) |
|
|
|
elif current_syllables > max_allowed_syllables: |
|
|
|
current_line = [] |
|
current_count = 0 |
|
|
|
for word in words: |
|
word_syllables = beat_analyzer.count_syllables(word) |
|
|
|
|
|
if current_count + word_syllables > max_allowed_syllables and current_line: |
|
syllable_enforced_lines.append(" ".join(current_line)) |
|
current_line = [word] |
|
current_count = word_syllables |
|
else: |
|
|
|
current_line.append(word) |
|
current_count += word_syllables |
|
|
|
|
|
if current_line and current_count >= min_allowed_syllables: |
|
syllable_enforced_lines.append(" ".join(current_line)) |
|
|
|
|
|
clean_lines = syllable_enforced_lines |
|
|
|
|
|
if lyric_templates: |
|
num_required = len(lyric_templates) |
|
else: |
|
num_required = 4 |
|
|
|
|
|
if len(clean_lines) > num_required: |
|
|
|
merged_lines = [] |
|
i = 0 |
|
|
|
while i < len(clean_lines) and len(merged_lines) < num_required: |
|
if i + 1 < len(clean_lines) and len(merged_lines) < num_required - 1: |
|
|
|
line1 = clean_lines[i] |
|
line2 = clean_lines[i + 1] |
|
|
|
words1 = line1.split() |
|
words2 = line2.split() |
|
|
|
syllables1 = sum(beat_analyzer.count_syllables(word) for word in words1) |
|
syllables2 = sum(beat_analyzer.count_syllables(word) for word in words2) |
|
|
|
|
|
if syllables1 + syllables2 <= max_allowed_syllables: |
|
merged_lines.append(line1 + " " + line2) |
|
i += 2 |
|
else: |
|
merged_lines.append(line1) |
|
i += 1 |
|
else: |
|
merged_lines.append(clean_lines[i]) |
|
i += 1 |
|
|
|
|
|
clean_lines = merged_lines[:num_required] |
|
|
|
elif len(clean_lines) < num_required: |
|
|
|
|
|
|
|
return f"Error: The model generated {len(clean_lines)} lines but {num_required} were required. Please try again." |
|
|
|
|
|
if len(clean_lines) != num_required: |
|
|
|
if len(clean_lines) > num_required: |
|
clean_lines = clean_lines[:num_required] |
|
else: |
|
|
|
return f"Error: Could not generate exactly {num_required} lines. Please try again." |
|
|
|
|
|
final_lyrics = '\n'.join(clean_lines) |
|
|
|
|
|
if not final_lyrics or len(final_lyrics.strip()) < 15: |
|
return "The model output appears to be mostly thinking content. Please try regenerating for cleaner lyrics." |
|
|
|
return final_lyrics |
|
|
|
except Exception as e: |
|
error_msg = f"Error generating lyrics: {str(e)}" |
|
print(error_msg) |
|
return error_msg |
|
|
|
def analyze_lyrics_rhythm_match(lyrics, lyric_templates, genre="pop"): |
|
"""Analyze how well the generated lyrics match the beat patterns and syllable requirements""" |
|
if not lyric_templates or not lyrics: |
|
return "No beat templates or lyrics available for analysis." |
|
|
|
|
|
lines = lyrics.strip().split('\n') |
|
lines = [line for line in lines if line.strip()] |
|
|
|
|
|
result = "### Beat & Syllable Match Analysis\n\n" |
|
result += "| Line | Syllables | Target Range | Match | Stress Pattern |\n" |
|
result += "| ---- | --------- | ------------ | ----- | -------------- |\n" |
|
|
|
|
|
line_count = min(len(lines), len(lyric_templates)) |
|
|
|
|
|
total_matches = 0 |
|
total_range_matches = 0 |
|
total_stress_matches = 0 |
|
total_stress_percentage = 0 |
|
total_ideal_matches = 0 |
|
|
|
for i in range(line_count): |
|
line = lines[i] |
|
template = lyric_templates[i] |
|
|
|
|
|
check_result = beat_analyzer.check_syllable_stress_match(line, template, genre) |
|
|
|
|
|
if check_result["close_to_ideal"]: |
|
syllable_match = "β" |
|
elif check_result["within_range"]: |
|
syllable_match = "β*" |
|
else: |
|
syllable_match = "β" |
|
|
|
stress_match = "β" if check_result["stress_matches"] else f"{int(check_result['stress_match_percentage']*100)}%" |
|
|
|
|
|
if check_result["close_to_ideal"]: |
|
total_matches += 1 |
|
total_ideal_matches += 1 |
|
elif check_result["within_range"]: |
|
total_range_matches += 1 |
|
|
|
if check_result["stress_matches"]: |
|
total_stress_matches += 1 |
|
total_stress_percentage += check_result["stress_match_percentage"] |
|
|
|
|
|
stress_visual = "" |
|
for char in template['stress_pattern']: |
|
if char == "S": |
|
stress_visual += "X" |
|
elif char == "M": |
|
stress_visual += "x" |
|
else: |
|
stress_visual += "." |
|
|
|
|
|
result += f"| {i+1} | {check_result['syllable_count']} | {check_result['min_expected']}-{check_result['max_expected']} | {syllable_match} | {stress_visual} |\n" |
|
|
|
|
|
if line_count > 0: |
|
exact_match_rate = (total_matches / line_count) * 100 |
|
range_match_rate = ((total_matches + total_range_matches) / line_count) * 100 |
|
ideal_match_rate = (total_ideal_matches / line_count) * 100 |
|
stress_match_rate = (total_stress_matches / line_count) * 100 |
|
avg_stress_percentage = (total_stress_percentage / line_count) * 100 |
|
|
|
result += f"\n**Summary:**\n" |
|
result += f"- Ideal or near-ideal syllable match rate: {exact_match_rate:.1f}%\n" |
|
result += f"- Genre-appropriate syllable range match rate: {range_match_rate:.1f}%\n" |
|
result += f"- Perfect stress pattern match rate: {stress_match_rate:.1f}%\n" |
|
result += f"- Average stress pattern accuracy: {avg_stress_percentage:.1f}%\n" |
|
result += f"- Overall rhythmic accuracy: {((range_match_rate + avg_stress_percentage) / 2):.1f}%\n" |
|
|
|
|
|
sentence_flow_analysis = analyze_sentence_flow(lines) |
|
result += f"\n**Sentence Flow Analysis:**\n" |
|
result += f"- Connected thought groups: {sentence_flow_analysis['connected_groups']} detected\n" |
|
result += f"- Average lines per thought: {sentence_flow_analysis['avg_lines_per_group']:.1f}\n" |
|
result += f"- Flow quality: {sentence_flow_analysis['flow_quality']}\n" |
|
|
|
|
|
result += f"\n**Syllable & Flow Guidance:**\n" |
|
result += f"- Aim for {min([t.get('min_expected', 3) for t in lyric_templates])}-{max([t.get('max_expected', 7) for t in lyric_templates])} syllables per line\n" |
|
result += f"- Break complete thoughts across 2-3 lines for natural flow\n" |
|
result += f"- Connect your lyrics with sentence fragments that flow across lines\n" |
|
result += f"- Use conjunctions, prepositions, and dependent clauses to connect lines\n" |
|
|
|
|
|
result += f"\n**Genre Notes ({genre}):**\n" |
|
|
|
|
|
if genre.lower() == "pop": |
|
result += "- Pop lyrics work well with thoughts spanning 2-3 musical phrases\n" |
|
result += "- Create flow by connecting lines with transitions like 'as', 'when', 'through'\n" |
|
elif genre.lower() == "rock": |
|
result += "- Rock lyrics benefit from short phrases that build into complete thoughts\n" |
|
result += "- Use line breaks strategically to emphasize key words\n" |
|
elif genre.lower() == "country": |
|
result += "- Country lyrics tell stories that flow naturally across multiple lines\n" |
|
result += "- Connect narrative elements across phrases for authentic storytelling\n" |
|
elif genre.lower() == "disco": |
|
result += "- Disco lyrics work well with phrases that create rhythmic momentum\n" |
|
result += "- Use line transitions that maintain energy and flow\n" |
|
elif genre.lower() == "metal": |
|
result += "- Metal lyrics can create intensity by breaking phrases at dramatic points\n" |
|
result += "- Connect lines to build tension and release across measures\n" |
|
else: |
|
result += "- This genre works well with connected thoughts across multiple lines\n" |
|
result += "- Aim for natural speech flow rather than complete thoughts per line\n" |
|
|
|
return result |
|
|
|
def analyze_sentence_flow(lines): |
|
"""Analyze how well the lyrics create sentence flow across multiple lines""" |
|
if not lines or len(lines) < 2: |
|
return { |
|
"connected_groups": 0, |
|
"avg_lines_per_group": 0, |
|
"flow_quality": "Insufficient lines to analyze" |
|
} |
|
|
|
|
|
continuation_starters = [ |
|
'and', 'but', 'or', 'nor', 'for', 'yet', 'so', |
|
'as', 'when', 'while', 'before', 'after', 'since', 'until', 'because', 'although', 'though', |
|
'with', 'without', 'through', 'throughout', 'beyond', 'beneath', 'under', 'over', 'into', 'onto', |
|
'to', 'from', 'by', 'at', 'in', 'on', 'of', |
|
'where', 'how', 'who', 'whom', 'whose', 'which', 'that', |
|
'if', 'then', |
|
] |
|
|
|
|
|
connected_lines = [] |
|
potential_groups = [] |
|
current_group = [0] |
|
|
|
for i in range(1, len(lines)): |
|
|
|
words = lines[i].lower().split() |
|
|
|
|
|
if not words: |
|
if len(current_group) > 1: |
|
potential_groups.append(current_group.copy()) |
|
current_group = [i] |
|
continue |
|
|
|
|
|
first_word = words[0].strip(',.!?;:') |
|
if first_word in continuation_starters: |
|
connected_lines.append(i) |
|
current_group.append(i) |
|
|
|
elif not first_word[0].isupper() and first_word[0].isalpha(): |
|
connected_lines.append(i) |
|
current_group.append(i) |
|
|
|
elif len(words) <= 3 and i < len(lines) - 1: |
|
|
|
if i+1 < len(lines): |
|
next_words = lines[i+1].lower().split() |
|
if next_words and next_words[0] in continuation_starters: |
|
connected_lines.append(i) |
|
current_group.append(i) |
|
else: |
|
|
|
if len(current_group) > 1: |
|
potential_groups.append(current_group.copy()) |
|
current_group = [i] |
|
else: |
|
|
|
if len(current_group) > 1: |
|
potential_groups.append(current_group.copy()) |
|
current_group = [i] |
|
|
|
|
|
if len(current_group) > 1: |
|
potential_groups.append(current_group) |
|
|
|
|
|
connected_groups = len(potential_groups) |
|
|
|
if connected_groups > 0: |
|
avg_lines_per_group = sum(len(group) for group in potential_groups) / connected_groups |
|
|
|
|
|
if connected_groups >= len(lines) / 3 and avg_lines_per_group >= 2.5: |
|
flow_quality = "Excellent - multiple connected thoughts across lines" |
|
elif connected_groups >= len(lines) / 4 and avg_lines_per_group >= 2: |
|
flow_quality = "Good - some connected thoughts across lines" |
|
elif connected_groups > 0: |
|
flow_quality = "Fair - limited connection between lines" |
|
else: |
|
flow_quality = "Poor - mostly independent lines" |
|
else: |
|
avg_lines_per_group = 0 |
|
flow_quality = "Poor - no connected thoughts detected" |
|
|
|
return { |
|
"connected_groups": connected_groups, |
|
"avg_lines_per_group": avg_lines_per_group, |
|
"flow_quality": flow_quality |
|
} |
|
|
|
def enforce_syllable_limits(lines, max_syllables=6): |
|
""" |
|
Enforce syllable limits by splitting or truncating lines that are too long. |
|
Returns a modified list of lines where no line exceeds max_syllables. |
|
""" |
|
if not lines: |
|
return [] |
|
|
|
result_lines = [] |
|
|
|
for line in lines: |
|
words = line.split() |
|
if not words: |
|
continue |
|
|
|
|
|
syllable_count = sum(beat_analyzer.count_syllables(word) for word in words) |
|
|
|
|
|
if syllable_count <= max_syllables: |
|
result_lines.append(line) |
|
continue |
|
|
|
|
|
current_line = [] |
|
current_syllables = 0 |
|
|
|
for word in words: |
|
word_syllables = beat_analyzer.count_syllables(word) |
|
|
|
|
|
if current_syllables + word_syllables > max_syllables and current_line: |
|
result_lines.append(" ".join(current_line)) |
|
current_line = [word] |
|
current_syllables = word_syllables |
|
else: |
|
|
|
current_line.append(word) |
|
current_syllables += word_syllables |
|
|
|
|
|
if current_line: |
|
result_lines.append(" ".join(current_line)) |
|
|
|
return result_lines |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="Advanced Music Analysis & Beat-Matched Lyrics Generator") as demo: |
|
gr.Markdown("# π΅ Advanced Music Analysis & Beat-Matched Lyrics Generator") |
|
gr.Markdown("**Upload music to get comprehensive analysis and generate perfectly synchronized lyrics that match the rhythm, emotion, and structure of your audio**") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
audio_input = gr.Audio( |
|
label="π§ Upload or Record Audio", |
|
type="filepath", |
|
sources=["upload", "microphone"] |
|
) |
|
|
|
|
|
custom_prompt_input = gr.Textbox( |
|
label="π¨ Custom Lyrics Requirements (Optional)", |
|
placeholder="e.g., 'Write about a rainy day in the city' or 'Include metaphors about flying' or 'Make it about overcoming challenges'", |
|
lines=3, |
|
info="Add any specific requirements, themes, or creative directions for the lyrics. This will be merged with the music analysis to create personalized lyrics." |
|
) |
|
|
|
analyze_btn = gr.Button("π Analyze Music & Generate Lyrics", variant="primary", size="lg") |
|
|
|
with gr.Column(scale=2): |
|
with gr.Tab("π Music Analysis"): |
|
analysis_output = gr.Textbox(label="Comprehensive Music Analysis Results", lines=10) |
|
|
|
with gr.Row(): |
|
tempo_output = gr.Number(label="π₯ Tempo (BPM)") |
|
time_sig_output = gr.Textbox(label="β±οΈ Time Signature") |
|
|
|
with gr.Row(): |
|
primary_emotion_output = gr.Textbox(label="π Primary Emotion") |
|
secondary_emotion_output = gr.Textbox(label="π Secondary Emotion") |
|
|
|
with gr.Row(): |
|
primary_theme_output = gr.Textbox(label="π Primary Theme") |
|
secondary_theme_output = gr.Textbox(label="πͺ Secondary Theme") |
|
genre_output = gr.Textbox(label="πΌ Primary Genre") |
|
|
|
with gr.Tab("π€ Generated Lyrics"): |
|
lyrics_output = gr.Textbox(label="Beat-Synchronized Lyrics", lines=20) |
|
|
|
with gr.Tab("π― Beat Matching Analysis"): |
|
beat_match_output = gr.Markdown(label="Rhythm & Syllable Synchronization Analysis") |
|
|
|
|
|
analyze_btn.click( |
|
fn=process_audio, |
|
inputs=[audio_input, custom_prompt_input], |
|
outputs=[ |
|
analysis_output, lyrics_output, tempo_output, time_sig_output, |
|
primary_emotion_output, secondary_emotion_output, |
|
primary_theme_output, secondary_theme_output, |
|
genre_output, beat_match_output |
|
] |
|
) |
|
|
|
|
|
supported_genres_md = "\n".join([f"- **{genre.capitalize()}**: Optimized for {genre} music patterns" for genre in beat_analyzer.supported_genres]) |
|
|
|
gr.Markdown(f""" |
|
## π How It Works |
|
|
|
1. **π§ Upload Audio**: Support for various formats (MP3, WAV, etc.) or record directly in your browser |
|
2. **π¨ Add Custom Requirements** (Optional): Specify your creative vision, themes, or style preferences |
|
3. **π Advanced Analysis**: Multi-layered analysis including: |
|
- **Tempo & Time Signature**: Advanced detection using multiple algorithms |
|
- **Emotional Profiling**: 8-dimensional emotion mapping (happy, sad, excited, calm, etc.) |
|
- **Thematic Analysis**: Musical themes (love, triumph, adventure, reflection, etc.) |
|
- **Beat Pattern Extraction**: Precise rhythm and stress pattern identification |
|
- **Genre Classification**: AI-powered genre detection with confidence scores |
|
4. **π€ Lyrics Generation**: AI creates perfectly synchronized lyrics that: |
|
- **Match Beat Patterns**: Each line aligns with musical phrases and rhythm |
|
- **Follow Syllable Constraints**: Precise syllable-to-beat mapping for natural flow |
|
- **Incorporate Emotions & Themes**: Blend detected musical characteristics |
|
- **Include Your Requirements**: Merge your creative directions seamlessly |
|
5. **π Quality Analysis**: Comprehensive metrics showing beat matching accuracy and flow quality |
|
|
|
## π¨ Custom Requirements Examples |
|
|
|
**π Themes**: "Write about nature and freedom", "Focus on urban nightlife", "Tell a story about friendship" |
|
|
|
**πΌοΈ Imagery**: "Use ocean metaphors", "Include references to stars and sky", "Focus on light and shadow" |
|
|
|
**ποΈ Perspective**: "From a child's viewpoint", "Make it nostalgic", "Focus on hope and resilience" |
|
|
|
**βοΈ Style**: "Use simple everyday language", "Include some rhyming", "Make it conversational" |
|
|
|
**π Content**: "Avoid sad themes", "Include words 'journey' and 'home'", "Focus on personal growth" |
|
|
|
The system intelligently blends your requirements with detected musical characteristics to create personalized, rhythm-perfect lyrics. |
|
|
|
## π΅ Supported Genres for Full Lyrics Generation |
|
|
|
**β
Full Support** (Complete Analysis + Beat-Matched Lyrics): |
|
{supported_genres_md} |
|
|
|
These genres have consistent syllable-to-beat patterns that work optimally with our advanced rhythm-matching algorithm. |
|
|
|
**π Analysis Only**: All other genres receive comprehensive musical analysis (tempo, emotion, themes, etc.) without lyrics generation. |
|
|
|
## π οΈ Advanced Features |
|
|
|
- **π― Beat Synchronization**: Syllable-perfect alignment with musical phrases |
|
- **π§ Emotion Integration**: Lyrics reflect detected emotional characteristics |
|
- **π Theme Incorporation**: Musical themes guide lyrical content |
|
- **π Quality Metrics**: Detailed analysis of rhythm matching accuracy |
|
- **π Flow Optimization**: Natural sentence continuation across lines |
|
- **βοΈ Genre Optimization**: Tailored patterns for different musical styles |
|
""") |
|
|
|
return demo |
|
|
|
|
|
demo = create_interface() |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
else: |
|
|
|
app = demo |