Spaces:

jacob-c
/

fyp_start_space

Running

fyp_start_space / app.py

7355122 6 months ago

11.7 kB

	import requests
	import gradio as gr
	import os
	import torch
	import json
	import time
	import tempfile
	import shutil
	import librosa
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Check if CUDA is available and set the device accordingly
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	# API URLs and headers
	AUDIO_API_URL = "https://api-inference.huggingface.co/models/MIT/ast-finetuned-audioset-10-10-0.4593"
	LYRICS_API_URL = "https://api-inference.huggingface.co/models/gpt2-medium"
	headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}

	def get_audio_duration(audio_path):
	"""Get the duration of the audio file in seconds"""
	try:
	duration = librosa.get_duration(path=audio_path)
	return duration
	except Exception as e:
	print(f"Error getting audio duration: {e}")
	return None

	def calculate_song_structure(duration):
	"""Calculate song structure based on audio duration"""
	if duration is None:
	return {"verses": 2, "choruses": 1, "tokens": 200} # Default structure

	# Basic rules for song structure:
	# - Short clips (< 30s): 1 verse, 1 chorus
	# - Medium clips (30s-2min): 2 verses, 1-2 choruses
	# - Longer clips (>2min): 3 verses, 2-3 choruses

	if duration < 30:
	return {
	"verses": 1,
	"choruses": 1,
	"tokens": 150
	}
	elif duration < 120:
	return {
	"verses": 2,
	"choruses": 2,
	"tokens": 200
	}
	else:
	return {
	"verses": 3,
	"choruses": 3,
	"tokens": 300
	}

	def create_lyrics_prompt(classification_results, song_structure):
	"""Create a prompt for lyrics generation based on classification results and desired structure"""
	# Get the top genres and characteristics
	main_style = classification_results[0]['label']
	main_confidence = float(classification_results[0]['score'].strip('%'))
	secondary_elements = [result['label'] for result in classification_results[1:3]]

	# Create a simpler prompt with example structure
	prompt = f"""Here's a {main_style} song with {', '.join(secondary_elements)} elements:

	[Verse 1]
	The melody rings through the air tonight
	Like a gentle whisper in the light
	Every note tells a story so clear
	Creating magic for all to hear

	[Chorus]
	Let the rhythm flow and shine
	Feel the music so divine
	Every moment, every sound
	Brings the joy that we have found

	Now continue with your own lyrics in this style:

	[Verse 1]"""
	return prompt

	def format_lyrics(generated_text, song_structure):
	"""Format the generated lyrics according to desired structure"""
	lines = []
	current_section = None
	verse_count = 0
	chorus_count = 0
	section_lines = []

	# Process the generated text line by line
	for line in generated_text.split('\n'):
	line = line.strip()

	# Skip empty lines and code blocks
	if not line or line.startswith('```') or line.startswith('###'):
	continue

	# Handle section markers
	if '[verse' in line.lower() or '[chorus' in line.lower():
	# Save previous section if exists
	if section_lines:
	while len(section_lines) < 4: # Ensure 4 lines per section
	section_lines.append("...")
	lines.extend(section_lines[:4]) # Only take first 4 lines if more
	section_lines = []

	# Add appropriate section marker
	if '[verse' in line.lower() and verse_count < song_structure['verses']:
	verse_count += 1
	lines.append(f"\n[Verse {verse_count}]")
	current_section = 'verse'
	elif '[chorus' in line.lower() and chorus_count < song_structure['choruses']:
	chorus_count += 1
	lines.append(f"\n[Chorus {chorus_count}]")
	current_section = 'chorus'
	else:
	# Add line to current section
	section_lines.append(line)

	# Handle the last section
	if section_lines:
	while len(section_lines) < 4:
	section_lines.append("...")
	lines.extend(section_lines[:4])

	# If we don't have enough sections, add them
	while verse_count < song_structure['verses'] or chorus_count < song_structure['choruses']:
	if verse_count < song_structure['verses']:
	verse_count += 1
	lines.append(f"\n[Verse {verse_count}]")
	lines.extend(["..." for _ in range(4)])
	if chorus_count < song_structure['choruses']:
	chorus_count += 1
	lines.append(f"\n[Chorus {chorus_count}]")
	lines.extend(["..." for _ in range(4)])

	return "\n".join(lines)

	def generate_lyrics_with_retry(prompt, song_structure, max_retries=5, initial_wait=2):
	"""Generate lyrics using GPT2-Medium with retry logic"""
	wait_time = initial_wait

	for attempt in range(max_retries):
	try:
	response = requests.post(
	LYRICS_API_URL,
	headers=headers,
	json={
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": song_structure['tokens'],
	"temperature": 0.7, # Lower temperature for more focused output
	"top_p": 0.85,
	"do_sample": True,
	"return_full_text": False,
	"repetition_penalty": 1.1, # Reduced repetition penalty
	"presence_penalty": 0.3,
	"frequency_penalty": 0.3
	}
	}
	)

	if response.status_code == 200:
	result = response.json()
	if isinstance(result, list) and len(result) > 0:
	generated_text = result[0].get("generated_text", "")
	if not generated_text:
	continue

	formatted_lyrics = format_lyrics(generated_text, song_structure)

	# Verify we have actual content and it looks like lyrics
	content_lines = [l for l in formatted_lyrics.split('\n')
	if l.strip() and not l.strip().startswith('[') and l.strip() != '...']

	# More lenient line length check
	if len(content_lines) < 4 or any(len(line.split()) > 20 for line in content_lines):
	if attempt < max_retries - 1:
	print("Generated text doesn't look like lyrics, retrying...")
	continue

	return formatted_lyrics

	elif response.status_code == 503:
	print(f"Model loading, attempt {attempt + 1}/{max_retries}. Waiting {wait_time} seconds...")
	time.sleep(wait_time)
	wait_time *= 1.5
	continue
	else:
	print(f"Error response: {response.text}")
	if attempt < max_retries - 1:
	continue
	return f"Error generating lyrics: {response.text}"

	except Exception as e:
	print(f"Error on attempt {attempt + 1}: {str(e)}")
	if attempt < max_retries - 1:
	time.sleep(wait_time)
	wait_time *= 1.5
	continue
	return f"Error after {max_retries} attempts: {str(e)}"

	return "Failed to generate lyrics after multiple attempts. Please try again."

	def format_results(classification_results, lyrics, prompt):
	"""Format the results for display"""
	# Format classification results
	classification_text = "Classification Results:\n"
	for i, result in enumerate(classification_results):
	classification_text += f"{i+1}. {result['label']}: {result['score']}\n"

	# Format final output
	output = f"""
	{classification_text}
	\n---Generated Lyrics---\n
	{lyrics}
	"""
	return output

	def classify_and_generate(audio_file):
	"""
	Classify the audio and generate matching lyrics
	"""
	if audio_file is None:
	return "Please upload an audio file."

	try:
	token = os.environ.get('HF_TOKEN')
	if not token:
	return "Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token."

	# Get audio duration and calculate structure
	if isinstance(audio_file, tuple):
	audio_path = audio_file[0]
	else:
	audio_path = audio_file

	duration = get_audio_duration(audio_path)
	song_structure = calculate_song_structure(duration)
	print(f"Audio duration: {duration:.2f}s, Structure: {song_structure}")

	# Create a temporary file to handle the audio data
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
	# Copy the audio file to our temporary file
	shutil.copy2(audio_path, temp_audio.name)

	# Read the temporary file
	with open(temp_audio.name, "rb") as f:
	data = f.read()

	print("Sending request to Audio Classification API...")
	response = requests.post(AUDIO_API_URL, headers=headers, data=data)

	# Clean up the temporary file
	try:
	os.unlink(temp_audio.name)
	except:
	pass

	if response.status_code == 200:
	classification_results = response.json()
	# Format classification results
	formatted_results = []
	for result in classification_results:
	formatted_results.append({
	'label': result['label'],
	'score': f"{result['score']*100:.2f}%"
	})

	# Generate lyrics based on classification with retry logic
	print("Generating lyrics based on classification...")
	prompt = create_lyrics_prompt(formatted_results, song_structure)
	lyrics = generate_lyrics_with_retry(prompt, song_structure)

	# Format and return results
	return format_results(formatted_results, lyrics, prompt)

	elif response.status_code == 401:
	return "Error: Invalid or missing API token. Please check your Hugging Face API token."
	elif response.status_code == 503:
	return "Error: Model is loading. Please try again in a few seconds."
	else:
	return f"Error: API returned status code {response.status_code}\nResponse: {response.text}"

	except Exception as e:
	import traceback
	error_details = traceback.format_exc()
	return f"Error processing request: {str(e)}\nDetails:\n{error_details}"

	# Create Gradio interface
	iface = gr.Interface(
	fn=classify_and_generate,
	inputs=gr.Audio(type="filepath", label="Upload Audio File"),
	outputs=gr.Textbox(
	label="Results",
	lines=15,
	placeholder="Upload an audio file to see classification results and generated lyrics..."
	),
	title="Music Genre Classifier + Lyric Generator",
	description="Upload an audio file to classify its genre and generate matching lyrics using AI.",
	examples=[],
	)

	# Launch the interface
	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)