Spaces:

eyov
/

LyricExtractor

Running

App Files Files Community

LyricExtractor / app.py

eyov

update of not working properly on HF

0067ac9 verified about 2 months ago

raw

history blame contribute delete

4.42 kB

	import os
	import sys
	import logging
	import gradio as gr
	import shutil
	from demucs_handler import DemucsProcessor, check_dependencies, configure_model
	from whisper_handler import WhisperTranscriber
	import tempfile
	import torch
	import torchaudio
	import soundfile as sf
	import librosa
	import numpy as np

	# Set up logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)

	def validate_environment():
	try:
	import torch
	import torchaudio
	import demucs
	logging.info(f"PyTorch version: {torch.__version__}")
	logging.info(f"Torchaudio version: {torchaudio.__version__}")
	logging.info(f"CUDA available: {torch.cuda.is_available()}")
	except ImportError as e:
	logging.error(f"Environment validation failed: {e}")
	sys.exit(1)

	def create_interface():
	validate_environment()
	processor = DemucsProcessor()
	transcriber = WhisperTranscriber()

	def process_audio(audio_file, whisper_model="base", progress=gr.Progress()):
	if audio_file is None:
	return None, "Please upload an audio file."

	temp_files = []
	try:
	progress(0, desc="Starting processing")
	logging.info(f"Processing file: {audio_file}")

	with tempfile.TemporaryDirectory() as temp_dir:
	temp_audio_path = os.path.join(temp_dir, "input.wav")
	vocals_output_path = os.path.join(temp_dir, "vocals.wav")

	# Convert to WAV first
	audio, sr = librosa.load(audio_file, sr=44100)
	# Fixed: use samplerate instead of sr
	sf.write(temp_audio_path, audio, samplerate=sr)
	temp_files.append(temp_audio_path)

	progress(0.1, desc="Separating vocals")
	try:
	vocals_path = processor.separate_vocals(temp_audio_path)
	# Copy vocals to output path
	shutil.copy2(vocals_path, vocals_output_path)
	temp_files.append(vocals_output_path)
	except RuntimeError as e:
	logging.error(f"Vocal separation failed: {str(e)}")
	return None, f"Vocal separation failed: {str(e)}"

	# Load the processed vocals for playback
	vocals_audio, vocals_sr = librosa.load(vocals_output_path, sr=None)

	progress(0.75, desc="Transcribing")
	lyrics = transcriber.transcribe(vocals_output_path)
	progress(1.0, desc="Processing complete")

	# Return the audio data tuple and lyrics
	return (vocals_sr, vocals_audio), lyrics

	except Exception as e:
	error_message = f"Processing error: {str(e)}"
	logging.error(error_message)
	return None, error_message
	finally:
	# Cleanup temporary files
	for file in temp_files:
	if file and os.path.exists(file):
	try:
	os.remove(file)
	except:
	pass

	interface = gr.Interface(
	fn=process_audio,
	inputs=[
	gr.Audio(label="Upload Audio File", type="filepath"),
	gr.Dropdown(
	choices=["tiny", "base", "small", "medium", "large-v2"],
	value="medium",
	label="Whisper Model Size"
	)
	],
	outputs=[
	gr.Audio(label="Isolated Vocals", type="numpy"),
	gr.Textbox(label="Transcribed Lyrics", lines=10, max_lines=20)
	],
	title="Audio Lyrics Extractor",
	description="Upload an audio file to extract vocals and transcribe lyrics\n"+
	" Created by Ever Olivares - Looking for Summer 2025 Internship Opportunities\n" +
	" Connect with me: [LinkedIn](https://www.linkedin.com/in/everolivares/)"+" Currently not working as intended on HF tested on LightningAI with T4 running largeV2",
	analytics_enabled=False
	)
	return interface

	if __name__ == "__main__":
	if not check_dependencies():
	print("Please install missing dependencies")
	exit(1)
	interface = create_interface()
	interface.launch()