Spaces:

jacob-c
/

fyp_start_space

Running

App Files Files Community

jacob-c commited on Jan 12

Commit

c31b527

1 Parent(s): 3bc2967

.

Browse files

Files changed (1) hide show

app.py +99 -33

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import json
 import time
 import tempfile
 import shutil
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Check if CUDA is available and set the device accordingly
@@ -16,12 +17,46 @@ AUDIO_API_URL = "https://api-inference.huggingface.co/models/MIT/ast-finetuned-a
 LYRICS_API_URL = "https://api-inference.huggingface.co/models/gpt2-xl"
 headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
-def format_error(message):
-    """Helper function to format error messages as JSON"""
-    return {"error": message}
-def create_lyrics_prompt(classification_results):
-    """Create a prompt for lyrics generation based on classification results"""
     # Get the top genre and its characteristics
     top_result = classification_results[0]
     genre = top_result['label']
@@ -30,14 +65,57 @@ def create_lyrics_prompt(classification_results):
     # Get additional musical elements
     additional_elements = [r['label'] for r in classification_results[1:3]]
-    # Create a more focused prompt for GPT2-XL
     prompt = f"""Write song lyrics in the style of {genre}.
 Theme: A {genre} song with elements of {' and '.join(additional_elements)}
 [Verse 1]"""
     return prompt
-def generate_lyrics_with_retry(prompt, max_retries=5, initial_wait=2):
     """Generate lyrics using GPT2-XL with retry logic"""
     wait_time = initial_wait
@@ -49,7 +127,7 @@ def generate_lyrics_with_retry(prompt, max_retries=5, initial_wait=2):
                 json={
                     "inputs": prompt,
                     "parameters": {
-                        "max_new_tokens": 200,
                         "temperature": 0.9,
                         "top_p": 0.95,
                         "do_sample": True,
@@ -66,23 +144,7 @@ def generate_lyrics_with_retry(prompt, max_retries=5, initial_wait=2):
                 result = response.json()
                 if isinstance(result, list) and len(result) > 0:
                     generated_text = result[0].get("generated_text", "")
-                    # Clean up and format the generated text
-                    lines = generated_text.split('\n')
-                    cleaned_lines = []
-                    current_section = "[Verse 1]"
-                    for line in lines:
-                        line = line.strip()
-                        if line and not line.startswith('###') and not line.startswith('```'):
-                            if line.lower().startswith('[verse') or line.lower().startswith('[chorus'):
-                                current_section = line
-                            cleaned_lines.append(line)
-                            # Add chorus after first verse if not present
-                            if len(cleaned_lines) == 4 and current_section == "[Verse 1]":
-                                cleaned_lines.append("\n[Chorus]")
-                    return "\n".join(cleaned_lines)
                 return "Error: No text generated"
             elif response.status_code == 503:
                 print(f"Model loading, attempt {attempt + 1}/{max_retries}. Waiting {wait_time} seconds...")
@@ -127,14 +189,18 @@ def classify_and_generate(audio_file):
         if not token:
             return "Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token."
         # Create a temporary file to handle the audio data
         with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
-            # If audio_file is a tuple (file path and sampling rate)
-            if isinstance(audio_file, tuple):
-                audio_path = audio_file[0]
-            else:
-                audio_path = audio_file
             # Copy the audio file to our temporary file
             shutil.copy2(audio_path, temp_audio.name)
@@ -163,8 +229,8 @@ def classify_and_generate(audio_file):
             # Generate lyrics based on classification with retry logic
             print("Generating lyrics based on classification...")
-            prompt = create_lyrics_prompt(formatted_results)
-            lyrics = generate_lyrics_with_retry(prompt)
             # Format and return results
             return format_results(formatted_results, lyrics, prompt)

 import time
 import tempfile
 import shutil
+import librosa
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Check if CUDA is available and set the device accordingly
 LYRICS_API_URL = "https://api-inference.huggingface.co/models/gpt2-xl"
 headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
+def get_audio_duration(audio_path):
+    """Get the duration of the audio file in seconds"""
+    try:
+        duration = librosa.get_duration(path=audio_path)
+        return duration
+    except Exception as e:
+        print(f"Error getting audio duration: {e}")
+        return None
+def calculate_song_structure(duration):
+    """Calculate song structure based on audio duration"""
+    if duration is None:
+        return {"verses": 2, "choruses": 1, "tokens": 200}  # Default structure
+    # Basic rules for song structure:
+    # - Short clips (< 30s): 1 verse, 1 chorus
+    # - Medium clips (30s-2min): 2 verses, 1-2 choruses
+    # - Longer clips (>2min): 3 verses, 2-3 choruses
+    if duration < 30:
+        return {
+            "verses": 1,
+            "choruses": 1,
+            "tokens": 150
+        }
+    elif duration < 120:
+        return {
+            "verses": 2,
+            "choruses": 2,
+            "tokens": 200
+        }
+    else:
+        return {
+            "verses": 3,
+            "choruses": 3,
+            "tokens": 300
+        }
+def create_lyrics_prompt(classification_results, song_structure):
+    """Create a prompt for lyrics generation based on classification results and desired structure"""
     # Get the top genre and its characteristics
     top_result = classification_results[0]
     genre = top_result['label']
     # Get additional musical elements
     additional_elements = [r['label'] for r in classification_results[1:3]]
+    # Create a structured prompt based on song length
     prompt = f"""Write song lyrics in the style of {genre}.
 Theme: A {genre} song with elements of {' and '.join(additional_elements)}
+Structure: {song_structure['verses']} verses and {song_structure['choruses']} choruses
+Format the lyrics with [Verse 1], [Chorus], [Verse 2], etc.
+Make each verse 4-6 lines and chorus 4 lines.
 [Verse 1]"""
     return prompt
+def format_lyrics(generated_text, song_structure):
+    """Format the generated lyrics according to desired structure"""
+    lines = generated_text.split('\n')
+    cleaned_lines = []
+    current_section = "[Verse 1]"
+    verse_count = 0
+    chorus_count = 0
+    for line in lines:
+        line = line.strip()
+        if not line or line.startswith('###') or line.startswith('```'):
+            continue
+        # Handle section markers
+        if line.lower().startswith('[verse'):
+            if verse_count < song_structure['verses']:
+                verse_count += 1
+                current_section = f"[Verse {verse_count}]"
+                cleaned_lines.append(f"\n{current_section}")
+                continue
+        elif line.lower().startswith('[chorus'):
+            if chorus_count < song_structure['choruses']:
+                chorus_count += 1
+                current_section = f"[Chorus {chorus_count}]"
+                cleaned_lines.append(f"\n{current_section}")
+                continue
+        # Add the line if we haven't exceeded our structure limits
+        if (current_section.startswith('[Verse') and verse_count <= song_structure['verses']) or \
+           (current_section.startswith('[Chorus') and chorus_count <= song_structure['choruses']):
+            cleaned_lines.append(line)
+        # Add chorus after first verse if not present
+        if len(cleaned_lines) == 5 and chorus_count == 0:  # After 4 lines of verse + section header
+            chorus_count += 1
+            cleaned_lines.append(f"\n[Chorus 1]")
+    return "\n".join(cleaned_lines)
+def generate_lyrics_with_retry(prompt, song_structure, max_retries=5, initial_wait=2):
     """Generate lyrics using GPT2-XL with retry logic"""
     wait_time = initial_wait
                 json={
                     "inputs": prompt,
                     "parameters": {
+                        "max_new_tokens": song_structure['tokens'],
                         "temperature": 0.9,
                         "top_p": 0.95,
                         "do_sample": True,
                 result = response.json()
                 if isinstance(result, list) and len(result) > 0:
                     generated_text = result[0].get("generated_text", "")
+                    return format_lyrics(generated_text, song_structure)
                 return "Error: No text generated"
             elif response.status_code == 503:
                 print(f"Model loading, attempt {attempt + 1}/{max_retries}. Waiting {wait_time} seconds...")
         if not token:
             return "Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token."
+        # Get audio duration and calculate structure
+        if isinstance(audio_file, tuple):
+            audio_path = audio_file[0]
+        else:
+            audio_path = audio_file
+        duration = get_audio_duration(audio_path)
+        song_structure = calculate_song_structure(duration)
+        print(f"Audio duration: {duration:.2f}s, Structure: {song_structure}")
         # Create a temporary file to handle the audio data
         with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
             # Copy the audio file to our temporary file
             shutil.copy2(audio_path, temp_audio.name)
             # Generate lyrics based on classification with retry logic
             print("Generating lyrics based on classification...")
+            prompt = create_lyrics_prompt(formatted_results, song_structure)
+            lyrics = generate_lyrics_with_retry(prompt, song_structure)
             # Format and return results
             return format_results(formatted_results, lyrics, prompt)