root
commited on
Commit
·
1e95801
1
Parent(s):
3feaaf4
ss
Browse files- app.py +346 -493
- lastapp.py +0 -0
app.py
CHANGED
@@ -6,7 +6,6 @@ import numpy as np
|
|
6 |
import re
|
7 |
import pronouncing # Add this to requirements.txt for syllable counting
|
8 |
import functools # Add this for lru_cache functionality
|
9 |
-
import json # Add this for JSON serialization
|
10 |
from transformers import (
|
11 |
AutoModelForAudioClassification,
|
12 |
AutoFeatureExtractor,
|
@@ -2299,109 +2298,6 @@ Improved lyrics with fixed rhythm:
|
|
2299 |
|
2300 |
return lyrics
|
2301 |
|
2302 |
-
def prepare_beat_synced_lyrics(audio_data, lyrics, beats_info):
|
2303 |
-
"""
|
2304 |
-
Prepare data for the beat-synced lyrics viewer
|
2305 |
-
|
2306 |
-
Parameters:
|
2307 |
-
audio_data: Dictionary containing audio features
|
2308 |
-
lyrics: String containing generated lyrics
|
2309 |
-
beats_info: Dictionary containing beat analysis data
|
2310 |
-
|
2311 |
-
Returns:
|
2312 |
-
Dictionary containing data for the beat-synced lyrics viewer
|
2313 |
-
"""
|
2314 |
-
# Extract necessary data for visualization
|
2315 |
-
beat_times = beats_info.get("beat_times", [])
|
2316 |
-
beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
|
2317 |
-
tempo = beats_info.get("tempo", 120)
|
2318 |
-
|
2319 |
-
# Clean lyrics - remove section markers and annotations
|
2320 |
-
clean_lyrics = lyrics
|
2321 |
-
if isinstance(lyrics, str):
|
2322 |
-
# Remove "[Verse]", "[Chorus]", etc.
|
2323 |
-
clean_lyrics = re.sub(r'\[\w+\]', '', lyrics)
|
2324 |
-
# Remove any rhythm analysis notes
|
2325 |
-
if "[Note:" in clean_lyrics:
|
2326 |
-
clean_lyrics = clean_lyrics.split("[Note:")[0].strip()
|
2327 |
-
# Remove any rhythm analysis section
|
2328 |
-
if "[RHYTHM_ANALYSIS_SECTION]" in clean_lyrics:
|
2329 |
-
clean_lyrics = clean_lyrics.split("[RHYTHM_ANALYSIS_SECTION]")[0].strip()
|
2330 |
-
|
2331 |
-
# Split into lines
|
2332 |
-
lines = [line.strip() for line in clean_lyrics.split('\n') if line.strip()]
|
2333 |
-
|
2334 |
-
# Split each line into words and estimate timing
|
2335 |
-
lyrics_data = []
|
2336 |
-
|
2337 |
-
# Estimate start time for lyrics - allow a small intro period
|
2338 |
-
lyrics_start_time = beat_times[0] if len(beat_times) > 0 else 0
|
2339 |
-
|
2340 |
-
# Simple approach: distribute lines evenly across available beats
|
2341 |
-
if len(lines) > 0 and len(beat_times) > 0:
|
2342 |
-
beats_per_line = max(1, len(beat_times) // len(lines))
|
2343 |
-
|
2344 |
-
for i, line in enumerate(lines):
|
2345 |
-
# Determine beat range for this line
|
2346 |
-
start_beat_idx = min(i * beats_per_line, len(beat_times) - 1)
|
2347 |
-
end_beat_idx = min(start_beat_idx + beats_per_line, len(beat_times) - 1)
|
2348 |
-
|
2349 |
-
# Get time range
|
2350 |
-
line_start_time = beat_times[start_beat_idx]
|
2351 |
-
line_end_time = beat_times[end_beat_idx] if end_beat_idx < len(beat_times) else audio_data["duration"]
|
2352 |
-
|
2353 |
-
# Split line into words
|
2354 |
-
words = re.findall(r'\b\w+\b|-|\s+|[^\w\s]', line)
|
2355 |
-
filtered_words = [w for w in words if w.strip()]
|
2356 |
-
|
2357 |
-
if filtered_words:
|
2358 |
-
# Distribute words across beats for this line
|
2359 |
-
word_data = []
|
2360 |
-
|
2361 |
-
# Get beat times for this line
|
2362 |
-
line_beat_times = beat_times[start_beat_idx:end_beat_idx+1]
|
2363 |
-
if len(line_beat_times) < 2:
|
2364 |
-
line_beat_times = [line_start_time, line_end_time]
|
2365 |
-
|
2366 |
-
# Distribute words evenly if we have enough beats
|
2367 |
-
if len(filtered_words) <= len(line_beat_times):
|
2368 |
-
for j, word in enumerate(filtered_words):
|
2369 |
-
beat_idx = min(j, len(line_beat_times) - 1)
|
2370 |
-
word_time = line_beat_times[beat_idx]
|
2371 |
-
word_data.append({
|
2372 |
-
"text": word,
|
2373 |
-
"time": word_time,
|
2374 |
-
"is_strong": j == 0 or word[0].isupper() # Simple heuristic for strong beats
|
2375 |
-
})
|
2376 |
-
else:
|
2377 |
-
# More words than beats, distribute evenly
|
2378 |
-
word_duration = (line_end_time - line_start_time) / len(filtered_words)
|
2379 |
-
for j, word in enumerate(filtered_words):
|
2380 |
-
word_time = line_start_time + j * word_duration
|
2381 |
-
word_data.append({
|
2382 |
-
"text": word,
|
2383 |
-
"time": word_time,
|
2384 |
-
"is_strong": j == 0 or word[0].isupper()
|
2385 |
-
})
|
2386 |
-
|
2387 |
-
lyrics_data.append({
|
2388 |
-
"line": line,
|
2389 |
-
"start_time": line_start_time,
|
2390 |
-
"end_time": line_end_time,
|
2391 |
-
"words": word_data
|
2392 |
-
})
|
2393 |
-
|
2394 |
-
# Create visualization data
|
2395 |
-
visualization_data = {
|
2396 |
-
"duration": audio_data["duration"],
|
2397 |
-
"tempo": tempo,
|
2398 |
-
"beat_times": beat_times,
|
2399 |
-
"beat_strengths": beat_strengths,
|
2400 |
-
"lyrics_data": lyrics_data
|
2401 |
-
}
|
2402 |
-
|
2403 |
-
return visualization_data
|
2404 |
-
|
2405 |
def process_audio(audio_file):
|
2406 |
"""Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
|
2407 |
if audio_file is None:
|
@@ -2628,17 +2524,296 @@ def process_audio(audio_file):
|
|
2628 |
except Exception as e:
|
2629 |
error_msg = f"Error processing audio: {str(e)}"
|
2630 |
print(error_msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2631 |
|
2632 |
-
#
|
2633 |
-
|
2634 |
-
|
2635 |
-
|
2636 |
-
|
2637 |
-
|
2638 |
-
|
2639 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2640 |
|
2641 |
-
|
|
|
|
|
2642 |
|
2643 |
# Create enhanced Gradio interface with tabs for better organization
|
2644 |
with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
@@ -2683,303 +2858,17 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
2683 |
with gr.TabItem("Rhythm Analysis"):
|
2684 |
rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
|
2685 |
|
2686 |
-
with gr.TabItem("Beat-Synced Lyrics"):
|
2687 |
-
# Create a container for the beat-synced lyrics viewer
|
2688 |
-
synced_audio_output = gr.Audio(label="Playback with Synced Lyrics", type="filepath")
|
2689 |
-
|
2690 |
-
# Create a custom JavaScript component for the beat-synced lyrics viewer
|
2691 |
-
lyrics_viewer_html = gr.HTML(
|
2692 |
-
"""
|
2693 |
-
<div id="beat-sync-container" style="width:100%; height:400px; position:relative;">
|
2694 |
-
<div id="loading-message">Please analyze audio to view beat-synced lyrics</div>
|
2695 |
-
<div id="beat-sync-timeline" style="display:none; width:100%; height:80px; background:#f0f0f0; position:relative; overflow:hidden; margin-bottom:10px;">
|
2696 |
-
<div id="playhead" style="position:absolute; width:2px; height:100%; background:red; top:0; left:0; z-index:10;"></div>
|
2697 |
-
<div id="beat-markers" style="position:absolute; width:100%; height:100%; top:0; left:0;"></div>
|
2698 |
-
</div>
|
2699 |
-
<div id="lyrics-display" style="display:none; width:100%; height:300px; overflow-y:auto; font-size:16px; line-height:1.6;"></div>
|
2700 |
-
</div>
|
2701 |
-
|
2702 |
-
<script>
|
2703 |
-
let beatSyncData = null;
|
2704 |
-
let isPlaying = false;
|
2705 |
-
let audioElement = null;
|
2706 |
-
let playheadInterval = null;
|
2707 |
-
let lastHighlightedWord = -1;
|
2708 |
-
|
2709 |
-
function initBeatSyncViewer(data) {
|
2710 |
-
beatSyncData = data;
|
2711 |
-
const container = document.getElementById('beat-sync-container');
|
2712 |
-
const timeline = document.getElementById('beat-sync-timeline');
|
2713 |
-
const beatMarkers = document.getElementById('beat-markers');
|
2714 |
-
const lyricsDisplay = document.getElementById('lyrics-display');
|
2715 |
-
const loadingMessage = document.getElementById('loading-message');
|
2716 |
-
|
2717 |
-
// Clear previous content
|
2718 |
-
beatMarkers.innerHTML = '';
|
2719 |
-
lyricsDisplay.innerHTML = '';
|
2720 |
-
|
2721 |
-
// Show the viewer elements, hide loading message
|
2722 |
-
loadingMessage.style.display = 'none';
|
2723 |
-
timeline.style.display = 'block';
|
2724 |
-
lyricsDisplay.style.display = 'block';
|
2725 |
-
|
2726 |
-
// Create beat markers
|
2727 |
-
const duration = data.duration;
|
2728 |
-
const beatTimes = data.beat_times;
|
2729 |
-
const beatStrengths = data.beat_strengths;
|
2730 |
-
|
2731 |
-
if (beatTimes && beatTimes.length > 0) {
|
2732 |
-
for (let i = 0; i < beatTimes.length; i++) {
|
2733 |
-
const beatTime = beatTimes[i];
|
2734 |
-
const beatStrength = beatStrengths && beatStrengths[i] ? beatStrengths[i] : 1.0;
|
2735 |
-
const position = (beatTime / duration) * 100;
|
2736 |
-
|
2737 |
-
// Create marker with height based on beat strength
|
2738 |
-
const marker = document.createElement('div');
|
2739 |
-
const height = 30 + (beatStrength * 50); // Scale between 30-80px
|
2740 |
-
|
2741 |
-
marker.className = 'beat-marker';
|
2742 |
-
marker.style.position = 'absolute';
|
2743 |
-
marker.style.left = `${position}%`;
|
2744 |
-
marker.style.top = `${(80 - height) / 2}px`;
|
2745 |
-
marker.style.width = '2px';
|
2746 |
-
marker.style.height = `${height}px`;
|
2747 |
-
marker.style.background = beatStrength > 0.7 ? '#2d7dd2' : '#97c6e3';
|
2748 |
-
marker.setAttribute('data-time', beatTime);
|
2749 |
-
|
2750 |
-
beatMarkers.appendChild(marker);
|
2751 |
-
}
|
2752 |
-
}
|
2753 |
-
|
2754 |
-
// Create lyrics display
|
2755 |
-
if (data.lyrics_data && data.lyrics_data.length > 0) {
|
2756 |
-
for (let i = 0; i < data.lyrics_data.length; i++) {
|
2757 |
-
const line = data.lyrics_data[i];
|
2758 |
-
const lineElement = document.createElement('div');
|
2759 |
-
lineElement.className = 'lyric-line';
|
2760 |
-
lineElement.style.marginBottom = '15px';
|
2761 |
-
|
2762 |
-
// Create word elements for the line
|
2763 |
-
line.words.forEach((word, j) => {
|
2764 |
-
const wordSpan = document.createElement('span');
|
2765 |
-
wordSpan.innerText = word.text + ' ';
|
2766 |
-
wordSpan.className = 'lyric-word';
|
2767 |
-
wordSpan.style.display = 'inline-block';
|
2768 |
-
wordSpan.style.transition = 'color 0.1s, transform 0.1s';
|
2769 |
-
wordSpan.setAttribute('data-time', word.time);
|
2770 |
-
wordSpan.setAttribute('data-word-index', j);
|
2771 |
-
wordSpan.setAttribute('data-line-index', i);
|
2772 |
-
|
2773 |
-
if (word.is_strong) {
|
2774 |
-
wordSpan.style.fontWeight = 'bold';
|
2775 |
-
}
|
2776 |
-
|
2777 |
-
lineElement.appendChild(wordSpan);
|
2778 |
-
});
|
2779 |
-
|
2780 |
-
lyricsDisplay.appendChild(lineElement);
|
2781 |
-
}
|
2782 |
-
} else {
|
2783 |
-
lyricsDisplay.innerHTML = '<p>No lyrics data available or could not align lyrics with beats.</p>';
|
2784 |
-
}
|
2785 |
-
|
2786 |
-
// Add timeline click/drag handler for scrubbing
|
2787 |
-
timeline.addEventListener('click', function(e) {
|
2788 |
-
if (!audioElement) return;
|
2789 |
-
|
2790 |
-
const rect = timeline.getBoundingClientRect();
|
2791 |
-
const clickPosition = (e.clientX - rect.left) / rect.width;
|
2792 |
-
const newTime = clickPosition * duration;
|
2793 |
-
|
2794 |
-
// Set audio to new position
|
2795 |
-
audioElement.currentTime = newTime;
|
2796 |
-
|
2797 |
-
// Update playhead and lyrics
|
2798 |
-
updatePlayhead(newTime);
|
2799 |
-
highlightLyricsAtTime(newTime);
|
2800 |
-
});
|
2801 |
-
}
|
2802 |
-
|
2803 |
-
function connectAudio(audioSelector) {
|
2804 |
-
// Find the audio element from Gradio's component
|
2805 |
-
const audioContainer = document.querySelector(audioSelector);
|
2806 |
-
if (!audioContainer) return;
|
2807 |
-
|
2808 |
-
audioElement = audioContainer.querySelector('audio');
|
2809 |
-
if (!audioElement) return;
|
2810 |
-
|
2811 |
-
// Add event listeners to the audio element
|
2812 |
-
audioElement.addEventListener('play', startPlayheadMovement);
|
2813 |
-
audioElement.addEventListener('pause', stopPlayheadMovement);
|
2814 |
-
audioElement.addEventListener('ended', stopPlayheadMovement);
|
2815 |
-
audioElement.addEventListener('seeked', function() {
|
2816 |
-
updatePlayhead(audioElement.currentTime);
|
2817 |
-
highlightLyricsAtTime(audioElement.currentTime);
|
2818 |
-
});
|
2819 |
-
}
|
2820 |
-
|
2821 |
-
function startPlayheadMovement() {
|
2822 |
-
isPlaying = true;
|
2823 |
-
if (playheadInterval) clearInterval(playheadInterval);
|
2824 |
-
|
2825 |
-
playheadInterval = setInterval(() => {
|
2826 |
-
if (!audioElement || !isPlaying) return;
|
2827 |
-
updatePlayhead(audioElement.currentTime);
|
2828 |
-
highlightLyricsAtTime(audioElement.currentTime);
|
2829 |
-
}, 50); // Update every 50ms
|
2830 |
-
}
|
2831 |
-
|
2832 |
-
function stopPlayheadMovement() {
|
2833 |
-
isPlaying = false;
|
2834 |
-
if (playheadInterval) {
|
2835 |
-
clearInterval(playheadInterval);
|
2836 |
-
playheadInterval = null;
|
2837 |
-
}
|
2838 |
-
}
|
2839 |
-
|
2840 |
-
function updatePlayhead(currentTime) {
|
2841 |
-
if (!beatSyncData) return;
|
2842 |
-
|
2843 |
-
const playhead = document.getElementById('playhead');
|
2844 |
-
const position = (currentTime / beatSyncData.duration) * 100;
|
2845 |
-
playhead.style.left = `${position}%`;
|
2846 |
-
}
|
2847 |
-
|
2848 |
-
function highlightLyricsAtTime(currentTime) {
|
2849 |
-
if (!beatSyncData || !beatSyncData.lyrics_data) return;
|
2850 |
-
|
2851 |
-
// Reset all word styling
|
2852 |
-
const words = document.querySelectorAll('.lyric-word');
|
2853 |
-
words.forEach(word => {
|
2854 |
-
word.style.color = 'black';
|
2855 |
-
word.style.transform = 'scale(1)';
|
2856 |
-
});
|
2857 |
-
|
2858 |
-
// Find the current word to highlight
|
2859 |
-
let currentWordElement = null;
|
2860 |
-
let bestTimeDiff = Infinity;
|
2861 |
-
|
2862 |
-
words.forEach(word => {
|
2863 |
-
const wordTime = parseFloat(word.getAttribute('data-time'));
|
2864 |
-
|
2865 |
-
// Highlight words that have already been passed or are coming up soon
|
2866 |
-
if (wordTime <= currentTime + 0.2) {
|
2867 |
-
const timeDiff = Math.abs(wordTime - currentTime);
|
2868 |
-
|
2869 |
-
// Mark past words as "read"
|
2870 |
-
if (wordTime < currentTime - 0.5) {
|
2871 |
-
word.style.color = '#666666';
|
2872 |
-
}
|
2873 |
-
|
2874 |
-
// Find the closest word to current time
|
2875 |
-
if (timeDiff < bestTimeDiff) {
|
2876 |
-
bestTimeDiff = timeDiff;
|
2877 |
-
currentWordElement = word;
|
2878 |
-
}
|
2879 |
-
}
|
2880 |
-
});
|
2881 |
-
|
2882 |
-
// Highlight current word
|
2883 |
-
if (currentWordElement) {
|
2884 |
-
currentWordElement.style.color = '#e63946';
|
2885 |
-
currentWordElement.style.transform = 'scale(1.1)';
|
2886 |
-
|
2887 |
-
// Scroll to keep the current line visible
|
2888 |
-
const lineIndex = parseInt(currentWordElement.getAttribute('data-line-index'));
|
2889 |
-
const lineElement = document.querySelectorAll('.lyric-line')[lineIndex];
|
2890 |
-
|
2891 |
-
if (lineElement) {
|
2892 |
-
const lyricsDisplay = document.getElementById('lyrics-display');
|
2893 |
-
const displayRect = lyricsDisplay.getBoundingClientRect();
|
2894 |
-
const lineRect = lineElement.getBoundingClientRect();
|
2895 |
-
|
2896 |
-
// Check if the line is outside the visible area
|
2897 |
-
if (lineRect.top < displayRect.top || lineRect.bottom > displayRect.bottom) {
|
2898 |
-
lineElement.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
2899 |
-
}
|
2900 |
-
}
|
2901 |
-
}
|
2902 |
-
}
|
2903 |
-
|
2904 |
-
// Wait for Gradio to fully load the components
|
2905 |
-
function waitForGradio() {
|
2906 |
-
// Connect to the audio element when available
|
2907 |
-
setTimeout(() => {
|
2908 |
-
connectAudio('#component-17'); // Replace with the actual selector
|
2909 |
-
|
2910 |
-
// Check for data updates from Gradio
|
2911 |
-
const observer = new MutationObserver((mutations) => {
|
2912 |
-
for (const mutation of mutations) {
|
2913 |
-
if (mutation.type === 'attributes' &&
|
2914 |
-
mutation.target.id === 'beat-sync-container' &&
|
2915 |
-
mutation.target.hasAttribute('data-sync-info')) {
|
2916 |
-
|
2917 |
-
const dataStr = mutation.target.getAttribute('data-sync-info');
|
2918 |
-
try {
|
2919 |
-
const data = JSON.parse(dataStr);
|
2920 |
-
initBeatSyncViewer(data);
|
2921 |
-
} catch (e) {
|
2922 |
-
console.error('Error parsing beat sync data:', e);
|
2923 |
-
}
|
2924 |
-
}
|
2925 |
-
}
|
2926 |
-
});
|
2927 |
-
|
2928 |
-
observer.observe(document.getElementById('beat-sync-container'), {
|
2929 |
-
attributes: true,
|
2930 |
-
attributeFilter: ['data-sync-info']
|
2931 |
-
});
|
2932 |
-
|
2933 |
-
// Try to find all audio elements and add a more robust connection method
|
2934 |
-
function tryConnectAudio() {
|
2935 |
-
const audioElements = document.querySelectorAll('audio');
|
2936 |
-
for (const audio of audioElements) {
|
2937 |
-
if (audio.parentElement.closest('#component-17') ||
|
2938 |
-
audio.parentElement.closest('.beat-synced-lyrics-tab')) {
|
2939 |
-
audioElement = audio;
|
2940 |
-
audioElement.addEventListener('play', startPlayheadMovement);
|
2941 |
-
audioElement.addEventListener('pause', stopPlayheadMovement);
|
2942 |
-
audioElement.addEventListener('ended', stopPlayheadMovement);
|
2943 |
-
audioElement.addEventListener('seeked', function() {
|
2944 |
-
updatePlayhead(audioElement.currentTime);
|
2945 |
-
highlightLyricsAtTime(audioElement.currentTime);
|
2946 |
-
});
|
2947 |
-
return true;
|
2948 |
-
}
|
2949 |
-
}
|
2950 |
-
return false;
|
2951 |
-
}
|
2952 |
-
|
2953 |
-
// Keep trying until we find the audio element
|
2954 |
-
if (!tryConnectAudio()) {
|
2955 |
-
setTimeout(tryConnectAudio, 1000); // Retry after 1 second
|
2956 |
-
}
|
2957 |
-
}, 2000);
|
2958 |
-
}
|
2959 |
-
|
2960 |
-
// Initialize when DOM is ready
|
2961 |
-
if (document.readyState === 'loading') {
|
2962 |
-
document.addEventListener('DOMContentLoaded', waitForGradio);
|
2963 |
-
} else {
|
2964 |
-
waitForGradio();
|
2965 |
-
}
|
2966 |
-
</script>
|
2967 |
-
"""
|
2968 |
-
)
|
2969 |
-
|
2970 |
with gr.TabItem("Syllable Analysis"):
|
2971 |
syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
|
2972 |
prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
|
|
|
|
|
|
|
2973 |
|
2974 |
# Processing function with better handling of results
|
2975 |
def display_results(audio_file):
|
2976 |
if audio_file is None:
|
2977 |
-
return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.",
|
2978 |
-
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
2979 |
-
document.getElementById('loading-message').style.display = 'block';
|
2980 |
-
document.getElementById('beat-sync-timeline').style.display = 'none';
|
2981 |
-
document.getElementById('lyrics-display').style.display = 'none';
|
2982 |
-
</script>""", "No syllable analysis available.", "No prompt template available."
|
2983 |
|
2984 |
try:
|
2985 |
# Process audio and get results
|
@@ -2987,19 +2876,9 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
2987 |
|
2988 |
# Check if we got an error message instead of results
|
2989 |
if isinstance(results, str) and "Error" in results:
|
2990 |
-
return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available",
|
2991 |
-
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
2992 |
-
document.getElementById('loading-message').style.display = 'block';
|
2993 |
-
document.getElementById('beat-sync-timeline').style.display = 'none';
|
2994 |
-
document.getElementById('lyrics-display').style.display = 'none';
|
2995 |
-
</script>""", "No syllable analysis available", "No prompt template available"
|
2996 |
elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
|
2997 |
-
return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available",
|
2998 |
-
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
2999 |
-
document.getElementById('loading-message').style.display = 'block';
|
3000 |
-
document.getElementById('beat-sync-timeline').style.display = 'none';
|
3001 |
-
document.getElementById('lyrics-display').style.display = 'none';
|
3002 |
-
</script>""", "No syllable analysis available", "No prompt template available"
|
3003 |
|
3004 |
# For backwards compatibility, handle both dictionary and tuple returns
|
3005 |
if isinstance(results, dict):
|
@@ -3035,6 +2914,9 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
3035 |
syllable_analysis = "No syllable analysis available"
|
3036 |
prompt_template = "No prompt template available"
|
3037 |
|
|
|
|
|
|
|
3038 |
# Format emotion analysis results
|
3039 |
try:
|
3040 |
emotion_results = music_analyzer.analyze_music(audio_file)
|
@@ -3046,7 +2928,52 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
3046 |
# Add detailed song structure information if available
|
3047 |
try:
|
3048 |
audio_data = extract_audio_features(audio_file)
|
3049 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3050 |
|
3051 |
emotion_text += "\n\nSong Structure:\n"
|
3052 |
for section in song_structure["syllables"]:
|
@@ -3086,87 +3013,19 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
3086 |
else:
|
3087 |
ast_text = "No valid audio classification results available."
|
3088 |
|
3089 |
-
# Prepare beat-synced lyrics visualization data
|
3090 |
-
try:
|
3091 |
-
audio_data = extract_audio_features(audio_file)
|
3092 |
-
|
3093 |
-
# Get beat information
|
3094 |
-
y, sr = load_audio(audio_file, SAMPLE_RATE)
|
3095 |
-
beats_info = detect_beats(y, sr)
|
3096 |
-
|
3097 |
-
# Prepare data for beat-synced lyrics
|
3098 |
-
visualization_data = prepare_beat_synced_lyrics(audio_data, clean_lyrics, beats_info)
|
3099 |
-
|
3100 |
-
# Convert to JSON for JavaScript
|
3101 |
-
visualization_json = json.dumps(visualization_data)
|
3102 |
-
|
3103 |
-
# Create HTML with the data injected - avoid using f-string for the entire HTML
|
3104 |
-
# Handle string escaping separately to avoid f-string backslash issues
|
3105 |
-
escaped_json = visualization_json.replace("'", "\\'")
|
3106 |
-
|
3107 |
-
# Create HTML in parts to avoid f-string backslash issues
|
3108 |
-
html_start = """<div id="beat-sync-container" data-sync-info='"""
|
3109 |
-
html_middle = """' style="width:100%; height:400px; position:relative;">
|
3110 |
-
<div id="loading-message">Loading beat-synced lyrics viewer...</div>
|
3111 |
-
<div id="beat-sync-timeline" style="display:none; width:100%; height:80px; background:#f0f0f0; position:relative; overflow:hidden; margin-bottom:10px;">
|
3112 |
-
<div id="playhead" style="position:absolute; width:2px; height:100%; background:red; top:0; left:0; z-index:10;"></div>
|
3113 |
-
<div id="beat-markers" style="position:absolute; width:100%; height:100%; top:0; left:0;"></div>
|
3114 |
-
</div>
|
3115 |
-
<div id="lyrics-display" style="display:none; width:100%; height:300px; overflow-y:auto; font-size:16px; line-height:1.6;"></div>
|
3116 |
-
</div>
|
3117 |
-
<script>
|
3118 |
-
// Signal to the viewer that new data is available
|
3119 |
-
const container = document.getElementById('beat-sync-container');
|
3120 |
-
if (container) {
|
3121 |
-
// This will trigger the mutation observer
|
3122 |
-
container.setAttribute('data-sync-info', '"""
|
3123 |
-
html_end = """');
|
3124 |
-
}
|
3125 |
-
</script>"""
|
3126 |
-
|
3127 |
-
# Combine parts without using f-strings in the parts that don't need variables
|
3128 |
-
beat_sync_html = html_start + visualization_json + html_middle + escaped_json + html_end
|
3129 |
-
except Exception as e:
|
3130 |
-
print(f"Error creating beat-synced lyrics: {str(e)}")
|
3131 |
-
# Handle string escaping separately to avoid f-string backslash issues
|
3132 |
-
escaped_error = str(e).replace("'", "\\'")
|
3133 |
-
|
3134 |
-
# Use regular strings instead of f-strings to avoid backslash issues
|
3135 |
-
html_start = """<script>
|
3136 |
-
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
3137 |
-
document.getElementById('loading-message').style.display = 'block';
|
3138 |
-
document.getElementById('loading-message').innerText = 'Error creating beat-synced lyrics: """
|
3139 |
-
html_end = """';
|
3140 |
-
document.getElementById('beat-sync-timeline').style.display = 'none';
|
3141 |
-
document.getElementById('lyrics-display').style.display = 'none';
|
3142 |
-
</script>"""
|
3143 |
-
|
3144 |
-
# Combine parts without using f-strings
|
3145 |
-
beat_sync_html = html_start + escaped_error + html_end
|
3146 |
-
|
3147 |
# Return all results including new fields
|
3148 |
-
return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis,
|
3149 |
|
3150 |
except Exception as e:
|
3151 |
error_msg = f"Error: {str(e)}"
|
3152 |
print(error_msg)
|
3153 |
-
|
3154 |
-
# Use a raw string literal to avoid f-string backslash issues
|
3155 |
-
error_html = """<script>
|
3156 |
-
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
3157 |
-
document.getElementById('loading-message').style.display = 'block';
|
3158 |
-
document.getElementById('loading-message').innerText = 'Error processing audio';
|
3159 |
-
document.getElementById('beat-sync-timeline').style.display = 'none';
|
3160 |
-
document.getElementById('lyrics-display').style.display = 'none';
|
3161 |
-
</script>"""
|
3162 |
-
|
3163 |
-
return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", audio_file, error_html, "No syllable analysis available", "No prompt template available"
|
3164 |
|
3165 |
# Connect the button to the display function with updated outputs
|
3166 |
submit_btn.click(
|
3167 |
fn=display_results,
|
3168 |
inputs=[audio_input],
|
3169 |
-
outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output,
|
3170 |
)
|
3171 |
|
3172 |
# Enhanced explanation of how the system works
|
@@ -3203,12 +3062,6 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
3203 |
|
3204 |
8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
|
3205 |
|
3206 |
-
9. **Beat-Synced Visualization**: The beat-synced lyrics viewer shows you exactly how the lyrics align with the music:
|
3207 |
-
- Beat markers show the song's rhythmic structure
|
3208 |
-
- Words are highlighted in sync with the music
|
3209 |
-
- Strong beats and stressed syllables are emphasized
|
3210 |
-
- You can scrub through the song to see how lyrics and music match at any point
|
3211 |
-
|
3212 |
This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
|
3213 |
""")
|
3214 |
|
|
|
6 |
import re
|
7 |
import pronouncing # Add this to requirements.txt for syllable counting
|
8 |
import functools # Add this for lru_cache functionality
|
|
|
9 |
from transformers import (
|
10 |
AutoModelForAudioClassification,
|
11 |
AutoFeatureExtractor,
|
|
|
2298 |
|
2299 |
return lyrics
|
2300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2301 |
def process_audio(audio_file):
|
2302 |
"""Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
|
2303 |
if audio_file is None:
|
|
|
2524 |
except Exception as e:
|
2525 |
error_msg = f"Error processing audio: {str(e)}"
|
2526 |
print(error_msg)
|
2527 |
+
return error_msg, None, []
|
2528 |
+
|
2529 |
+
def format_beat_timeline(audio_file, lyrics=None):
|
2530 |
+
"""Creates a formatted timeline showing beat timings and their syllable patterns"""
|
2531 |
+
if audio_file is None:
|
2532 |
+
return "Please upload an audio file to see beat timeline."
|
2533 |
+
|
2534 |
+
try:
|
2535 |
+
# Extract audio data
|
2536 |
+
y, sr = load_audio(audio_file, SAMPLE_RATE)
|
2537 |
|
2538 |
+
# Get beat information
|
2539 |
+
beats_info = detect_beats(y, sr)
|
2540 |
+
|
2541 |
+
# Format the timeline
|
2542 |
+
timeline = "=== BEAT & SYLLABLE TIMELINE ===\n\n"
|
2543 |
+
timeline += f"Tempo: {beats_info['tempo']:.1f} BPM\n"
|
2544 |
+
timeline += f"Time Signature: {beats_info['time_signature']}/4\n"
|
2545 |
+
timeline += f"Total Beats: {beats_info['beat_count']}\n\n"
|
2546 |
+
|
2547 |
+
# Create a table header
|
2548 |
+
timeline += "| Beat # | Time (s) | Beat Strength | Syllable Pattern |\n"
|
2549 |
+
timeline += "|--------|----------|--------------|------------------|\n"
|
2550 |
+
|
2551 |
+
# Add beat-by-beat information
|
2552 |
+
for i, (time, strength) in enumerate(zip(beats_info['beat_times'], beats_info['beat_strengths'])):
|
2553 |
+
# Determine beat type based on strength
|
2554 |
+
if strength >= 0.8:
|
2555 |
+
beat_type = "STRONG"
|
2556 |
+
elif strength >= 0.5:
|
2557 |
+
beat_type = "medium"
|
2558 |
+
else:
|
2559 |
+
beat_type = "weak"
|
2560 |
+
|
2561 |
+
# Create beat pattern indicator
|
2562 |
+
if i % beats_info['time_signature'] == 0:
|
2563 |
+
pattern = "S" # Strong beat at start of measure
|
2564 |
+
elif i % beats_info['time_signature'] == beats_info['time_signature'] // 2 and beats_info['time_signature'] > 3:
|
2565 |
+
pattern = "m" # Medium beat (3rd beat in 4/4)
|
2566 |
+
else:
|
2567 |
+
pattern = "w" # Weak beat
|
2568 |
+
|
2569 |
+
# Add row to table
|
2570 |
+
timeline += f"| {i+1:<6} | {time:.2f}s | {beat_type:<12} | {pattern}:{1.5 if pattern=='S' else 1.0} |\n"
|
2571 |
+
|
2572 |
+
# Keep table to a reasonable size
|
2573 |
+
if i >= 29:
|
2574 |
+
timeline += f"... and {beats_info['beat_count'] - 30} more beats ...\n"
|
2575 |
+
break
|
2576 |
+
|
2577 |
+
# Add a visual timeline of beats
|
2578 |
+
timeline += "\n=== VISUAL BEAT TIMELINE ===\n\n"
|
2579 |
+
timeline += "Each character represents 0.5 seconds. Beats are marked as:\n"
|
2580 |
+
timeline += "S = Strong beat | m = Medium beat | w = Weak beat | · = No beat\n\n"
|
2581 |
+
|
2582 |
+
# Calculate total duration and create time markers
|
2583 |
+
if beats_info['beat_times']:
|
2584 |
+
total_duration = max(beats_info['beat_times']) + 2 # Add 2 seconds of padding
|
2585 |
+
else:
|
2586 |
+
total_duration = 30 # Default duration if no beats found
|
2587 |
+
|
2588 |
+
time_markers = ""
|
2589 |
+
for i in range(0, int(total_duration) + 1, 5):
|
2590 |
+
time_markers += f"{i:<5}"
|
2591 |
+
timeline += time_markers + " (seconds)\n"
|
2592 |
+
|
2593 |
+
# Create a ruler for easier time tracking
|
2594 |
+
ruler = ""
|
2595 |
+
for i in range(0, int(total_duration) + 1):
|
2596 |
+
if i % 5 == 0:
|
2597 |
+
ruler += "+"
|
2598 |
+
else:
|
2599 |
+
ruler += "-"
|
2600 |
+
ruler += "-" * 9 # Each second is 10 characters wide
|
2601 |
+
timeline += ruler + "\n"
|
2602 |
+
|
2603 |
+
# Create a visualization of beats with symbols
|
2604 |
+
beat_line = ["·"] * int(total_duration * 2) # 2 characters per second
|
2605 |
+
|
2606 |
+
for i, time in enumerate(beats_info['beat_times']):
|
2607 |
+
if i >= len(beats_info['beat_strengths']):
|
2608 |
+
break
|
2609 |
+
|
2610 |
+
# Determine position in the timeline
|
2611 |
+
pos = int(time * 2) # Convert to position in the beat_line
|
2612 |
+
if pos >= len(beat_line):
|
2613 |
+
continue
|
2614 |
+
|
2615 |
+
# Determine beat type based on strength and position
|
2616 |
+
strength = beats_info['beat_strengths'][i]
|
2617 |
+
if i % beats_info['time_signature'] == 0:
|
2618 |
+
beat_line[pos] = "S" # Strong beat at start of measure
|
2619 |
+
elif strength >= 0.8:
|
2620 |
+
beat_line[pos] = "S" # Strong beat
|
2621 |
+
elif i % beats_info['time_signature'] == beats_info['time_signature'] // 2 and beats_info['time_signature'] > 3:
|
2622 |
+
beat_line[pos] = "m" # Medium beat (3rd beat in 4/4)
|
2623 |
+
elif strength >= 0.5:
|
2624 |
+
beat_line[pos] = "m" # Medium beat
|
2625 |
+
else:
|
2626 |
+
beat_line[pos] = "w" # Weak beat
|
2627 |
+
|
2628 |
+
# Format and add to timeline
|
2629 |
+
beat_visualization = ""
|
2630 |
+
for i in range(0, len(beat_line), 10):
|
2631 |
+
beat_visualization += "".join(beat_line[i:i+10])
|
2632 |
+
if i + 10 < len(beat_line):
|
2633 |
+
beat_visualization += " " # Add space every 5 seconds
|
2634 |
+
timeline += beat_visualization + "\n\n"
|
2635 |
+
|
2636 |
+
# Add measure markers
|
2637 |
+
timeline += "=== MEASURE MARKERS ===\n\n"
|
2638 |
+
|
2639 |
+
# Create a list to track measure start times
|
2640 |
+
measure_starts = []
|
2641 |
+
for i, time in enumerate(beats_info['beat_times']):
|
2642 |
+
if i % beats_info['time_signature'] == 0: # Start of measure
|
2643 |
+
measure_starts.append((i // beats_info['time_signature'] + 1, time))
|
2644 |
+
|
2645 |
+
# Format measure information
|
2646 |
+
if measure_starts:
|
2647 |
+
timeline += "| Measure # | Start Time | Duration |\n"
|
2648 |
+
timeline += "|-----------|------------|----------|\n"
|
2649 |
+
|
2650 |
+
for i in range(len(measure_starts)):
|
2651 |
+
measure_num, start_time = measure_starts[i]
|
2652 |
+
|
2653 |
+
# Calculate end time (start of next measure or end of song)
|
2654 |
+
if i < len(measure_starts) - 1:
|
2655 |
+
end_time = measure_starts[i+1][1]
|
2656 |
+
elif beats_info['beat_times']:
|
2657 |
+
end_time = beats_info['beat_times'][-1]
|
2658 |
+
else:
|
2659 |
+
end_time = start_time + 2.0 # Default 2 seconds if no next measure
|
2660 |
+
|
2661 |
+
duration = end_time - start_time
|
2662 |
+
|
2663 |
+
timeline += f"| {measure_num:<9} | {start_time:.2f}s | {duration:.2f}s |\n"
|
2664 |
+
|
2665 |
+
# Limit to reasonable size
|
2666 |
+
if i >= 9:
|
2667 |
+
timeline += f"... and {len(measure_starts) - 10} more measures ...\n"
|
2668 |
+
break
|
2669 |
+
|
2670 |
+
# Add phrase information
|
2671 |
+
if beats_info['phrases']:
|
2672 |
+
timeline += "\n=== MUSICAL PHRASES ===\n\n"
|
2673 |
+
for i, phrase in enumerate(beats_info['phrases']):
|
2674 |
+
if i < 10: # Limit to first 10 phrases
|
2675 |
+
if not phrase:
|
2676 |
+
continue
|
2677 |
+
|
2678 |
+
start_beat = phrase[0]
|
2679 |
+
end_beat = phrase[-1]
|
2680 |
+
if start_beat >= len(beats_info['beat_times']) or end_beat >= len(beats_info['beat_times']):
|
2681 |
+
continue
|
2682 |
+
|
2683 |
+
phrase_start = beats_info['beat_times'][start_beat]
|
2684 |
+
phrase_end = beats_info['beat_times'][end_beat]
|
2685 |
+
|
2686 |
+
timeline += f"Phrase {i+1}: Beats {start_beat+1}-{end_beat+1} ({phrase_start:.2f}s - {phrase_end:.2f}s)\n"
|
2687 |
+
|
2688 |
+
# Create syllable template for this phrase
|
2689 |
+
phrase_beats = {
|
2690 |
+
"beat_times": [beats_info['beat_times'][j] for j in phrase if j < len(beats_info['beat_times'])],
|
2691 |
+
"beat_strengths": [beats_info['beat_strengths'][j] for j in phrase if j < len(beats_info['beat_strengths'])],
|
2692 |
+
"tempo": beats_info['tempo'],
|
2693 |
+
"time_signature": beats_info['time_signature'],
|
2694 |
+
"phrases": [list(range(len(phrase)))]
|
2695 |
+
}
|
2696 |
+
|
2697 |
+
template = create_flexible_syllable_templates(phrase_beats)
|
2698 |
+
timeline += f" Syllable Template: {template}\n"
|
2699 |
+
|
2700 |
+
# Create a visual representation of this phrase
|
2701 |
+
if phrase_start < total_duration and phrase_end < total_duration:
|
2702 |
+
# Create a timeline for this phrase
|
2703 |
+
phrase_visualization = ["·"] * int(total_duration * 2)
|
2704 |
+
|
2705 |
+
# Mark the phrase boundaries
|
2706 |
+
start_pos = int(phrase_start * 2)
|
2707 |
+
end_pos = int(phrase_end * 2)
|
2708 |
+
|
2709 |
+
if start_pos < len(phrase_visualization):
|
2710 |
+
phrase_visualization[start_pos] = "["
|
2711 |
+
|
2712 |
+
if end_pos < len(phrase_visualization):
|
2713 |
+
phrase_visualization[end_pos] = "]"
|
2714 |
+
|
2715 |
+
# Mark the beats in this phrase
|
2716 |
+
for j in phrase:
|
2717 |
+
if j < len(beats_info['beat_times']):
|
2718 |
+
beat_time = beats_info['beat_times'][j]
|
2719 |
+
beat_pos = int(beat_time * 2)
|
2720 |
+
|
2721 |
+
if beat_pos < len(phrase_visualization) and beat_pos != start_pos and beat_pos != end_pos:
|
2722 |
+
# Determine beat type
|
2723 |
+
if j % beats_info['time_signature'] == 0:
|
2724 |
+
phrase_visualization[beat_pos] = "S"
|
2725 |
+
elif j % beats_info['time_signature'] == beats_info['time_signature'] // 2:
|
2726 |
+
phrase_visualization[beat_pos] = "m"
|
2727 |
+
else:
|
2728 |
+
phrase_visualization[beat_pos] = "w"
|
2729 |
+
|
2730 |
+
# Format and add visualization
|
2731 |
+
phrase_visual = ""
|
2732 |
+
for k in range(0, len(phrase_visualization), 10):
|
2733 |
+
phrase_visual += "".join(phrase_visualization[k:k+10])
|
2734 |
+
if k + 10 < len(phrase_visualization):
|
2735 |
+
phrase_visual += " "
|
2736 |
+
|
2737 |
+
timeline += f" Timeline: {phrase_visual}\n\n"
|
2738 |
+
|
2739 |
+
if len(beats_info['phrases']) > 10:
|
2740 |
+
timeline += f"... and {len(beats_info['phrases']) - 10} more phrases ...\n"
|
2741 |
+
|
2742 |
+
# Add a section showing alignment if lyrics were generated
|
2743 |
+
if lyrics and isinstance(lyrics, str):
|
2744 |
+
timeline += "\n=== LYRICS-BEAT ALIGNMENT ===\n\n"
|
2745 |
+
# Remove rhythm analysis notes from lyrics if present
|
2746 |
+
if "[Note:" in lyrics:
|
2747 |
+
clean_lyrics = lyrics.split("[Note:")[0].strip()
|
2748 |
+
else:
|
2749 |
+
clean_lyrics = lyrics
|
2750 |
+
|
2751 |
+
lines = clean_lyrics.strip().split('\n')
|
2752 |
+
|
2753 |
+
# Show alignment for first few lines
|
2754 |
+
for i, line in enumerate(lines[:10]):
|
2755 |
+
if not line.strip() or line.startswith('['):
|
2756 |
+
continue
|
2757 |
+
|
2758 |
+
timeline += f"Line: \"{line}\"\n"
|
2759 |
+
|
2760 |
+
# Count syllables
|
2761 |
+
syllable_count = count_syllables(line)
|
2762 |
+
timeline += f" Syllables: {syllable_count}\n"
|
2763 |
+
|
2764 |
+
# Show ideal timing (if we have enough phrases)
|
2765 |
+
if beats_info['phrases'] and i < len(beats_info['phrases']):
|
2766 |
+
phrase = beats_info['phrases'][i]
|
2767 |
+
if phrase and phrase[0] < len(beats_info['beat_times']) and phrase[-1] < len(beats_info['beat_times']):
|
2768 |
+
start_beat = phrase[0]
|
2769 |
+
end_beat = phrase[-1]
|
2770 |
+
start_time = beats_info['beat_times'][start_beat]
|
2771 |
+
end_time = beats_info['beat_times'][end_beat]
|
2772 |
+
timeline += f" Timing: {start_time:.2f}s - {end_time:.2f}s\n"
|
2773 |
+
|
2774 |
+
# Create a visualization of syllable alignment
|
2775 |
+
timeline += " Alignment: "
|
2776 |
+
|
2777 |
+
# Create a timeline focused on just this phrase
|
2778 |
+
phrase_duration = end_time - start_time
|
2779 |
+
syllable_viz = []
|
2780 |
+
|
2781 |
+
# Initialize with beat markers for this phrase
|
2782 |
+
for j in phrase:
|
2783 |
+
if j < len(beats_info['beat_times']):
|
2784 |
+
beat_time = beats_info['beat_times'][j]
|
2785 |
+
relative_pos = int((beat_time - start_time) / phrase_duration * syllable_count)
|
2786 |
+
|
2787 |
+
while len(syllable_viz) <= relative_pos:
|
2788 |
+
syllable_viz.append("·")
|
2789 |
+
|
2790 |
+
if j % beats_info['time_signature'] == 0:
|
2791 |
+
syllable_viz[relative_pos] = "S"
|
2792 |
+
elif j % beats_info['time_signature'] == beats_info['time_signature'] // 2:
|
2793 |
+
syllable_viz[relative_pos] = "m"
|
2794 |
+
else:
|
2795 |
+
syllable_viz[relative_pos] = "w"
|
2796 |
+
|
2797 |
+
# Fill in any gaps
|
2798 |
+
while len(syllable_viz) < syllable_count:
|
2799 |
+
syllable_viz.append("·")
|
2800 |
+
|
2801 |
+
# Trim if too long
|
2802 |
+
syllable_viz = syllable_viz[:syllable_count]
|
2803 |
+
|
2804 |
+
# Now map to the line
|
2805 |
+
timeline += "".join(syllable_viz) + "\n"
|
2806 |
+
|
2807 |
+
timeline += "\n"
|
2808 |
+
|
2809 |
+
if len(lines) > 10:
|
2810 |
+
timeline += f"... and {len(lines) - 10} more lines ...\n"
|
2811 |
+
|
2812 |
+
return timeline
|
2813 |
|
2814 |
+
except Exception as e:
|
2815 |
+
print(f"Error generating beat timeline: {str(e)}")
|
2816 |
+
return f"Error generating beat timeline: {str(e)}"
|
2817 |
|
2818 |
# Create enhanced Gradio interface with tabs for better organization
|
2819 |
with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
|
2858 |
with gr.TabItem("Rhythm Analysis"):
|
2859 |
rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
|
2860 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2861 |
with gr.TabItem("Syllable Analysis"):
|
2862 |
syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
|
2863 |
prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
|
2864 |
+
|
2865 |
+
with gr.TabItem("Beat & Syllable Timeline"):
|
2866 |
+
beat_timeline_output = gr.Textbox(label="Beat Timings & Syllable Patterns", lines=16)
|
2867 |
|
2868 |
# Processing function with better handling of results
|
2869 |
def display_results(audio_file):
|
2870 |
if audio_file is None:
|
2871 |
+
return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.", "No syllable analysis available.", "No prompt template available.", "No beat timeline available."
|
|
|
|
|
|
|
|
|
|
|
2872 |
|
2873 |
try:
|
2874 |
# Process audio and get results
|
|
|
2876 |
|
2877 |
# Check if we got an error message instead of results
|
2878 |
if isinstance(results, str) and "Error" in results:
|
2879 |
+
return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available", "No beat timeline available"
|
|
|
|
|
|
|
|
|
|
|
2880 |
elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
|
2881 |
+
return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available", "No beat timeline available"
|
|
|
|
|
|
|
|
|
|
|
2882 |
|
2883 |
# For backwards compatibility, handle both dictionary and tuple returns
|
2884 |
if isinstance(results, dict):
|
|
|
2914 |
syllable_analysis = "No syllable analysis available"
|
2915 |
prompt_template = "No prompt template available"
|
2916 |
|
2917 |
+
# Generate beat timeline data
|
2918 |
+
beat_timeline = format_beat_timeline(audio_file, clean_lyrics)
|
2919 |
+
|
2920 |
# Format emotion analysis results
|
2921 |
try:
|
2922 |
emotion_results = music_analyzer.analyze_music(audio_file)
|
|
|
2928 |
# Add detailed song structure information if available
|
2929 |
try:
|
2930 |
audio_data = extract_audio_features(audio_file)
|
2931 |
+
# Use the existing information rather than a function that doesn't exist
|
2932 |
+
y, sr = load_audio(audio_file, SAMPLE_RATE)
|
2933 |
+
beats_info = detect_beats(y, sr)
|
2934 |
+
sections_info = detect_sections(y, sr)
|
2935 |
+
|
2936 |
+
# Create a simple song structure from the available data
|
2937 |
+
song_structure = {
|
2938 |
+
"beats": beats_info,
|
2939 |
+
"sections": sections_info,
|
2940 |
+
"syllables": []
|
2941 |
+
}
|
2942 |
+
|
2943 |
+
# Add syllable counts to each section
|
2944 |
+
for section in sections_info:
|
2945 |
+
# Create syllable templates for sections
|
2946 |
+
section_beats_info = {
|
2947 |
+
"beat_times": [beat for beat in beats_info["beat_times"]
|
2948 |
+
if section["start"] <= beat < section["end"]],
|
2949 |
+
"tempo": beats_info.get("tempo", 120)
|
2950 |
+
}
|
2951 |
+
if "beat_strengths" in beats_info:
|
2952 |
+
section_beats_info["beat_strengths"] = [
|
2953 |
+
strength for i, strength in enumerate(beats_info["beat_strengths"])
|
2954 |
+
if i < len(beats_info["beat_times"]) and
|
2955 |
+
section["start"] <= beats_info["beat_times"][i] < section["end"]
|
2956 |
+
]
|
2957 |
+
|
2958 |
+
# Get a syllable count based on section duration and tempo
|
2959 |
+
syllable_count = int(section["duration"] * (beats_info.get("tempo", 120) / 60) * 1.5)
|
2960 |
+
|
2961 |
+
section_info = {
|
2962 |
+
"type": section["type"],
|
2963 |
+
"start": section["start"],
|
2964 |
+
"end": section["end"],
|
2965 |
+
"duration": section["duration"],
|
2966 |
+
"syllable_count": syllable_count,
|
2967 |
+
"beat_count": len(section_beats_info["beat_times"])
|
2968 |
+
}
|
2969 |
+
|
2970 |
+
# Try to create a more detailed syllable template
|
2971 |
+
if len(section_beats_info["beat_times"]) >= 2:
|
2972 |
+
section_info["syllable_template"] = create_flexible_syllable_templates(
|
2973 |
+
section_beats_info
|
2974 |
+
)
|
2975 |
+
|
2976 |
+
song_structure["syllables"].append(section_info)
|
2977 |
|
2978 |
emotion_text += "\n\nSong Structure:\n"
|
2979 |
for section in song_structure["syllables"]:
|
|
|
3013 |
else:
|
3014 |
ast_text = "No valid audio classification results available."
|
3015 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3016 |
# Return all results including new fields
|
3017 |
+
return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis, syllable_analysis, prompt_template, beat_timeline
|
3018 |
|
3019 |
except Exception as e:
|
3020 |
error_msg = f"Error: {str(e)}"
|
3021 |
print(error_msg)
|
3022 |
+
return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", "No syllable analysis available", "No prompt template available", "No beat timeline available"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3023 |
|
3024 |
# Connect the button to the display function with updated outputs
|
3025 |
submit_btn.click(
|
3026 |
fn=display_results,
|
3027 |
inputs=[audio_input],
|
3028 |
+
outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output, syllable_analysis_output, prompt_template_output, beat_timeline_output]
|
3029 |
)
|
3030 |
|
3031 |
# Enhanced explanation of how the system works
|
|
|
3062 |
|
3063 |
8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
|
3064 |
|
|
|
|
|
|
|
|
|
|
|
|
|
3065 |
This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
|
3066 |
""")
|
3067 |
|
lastapp.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|