Spaces:
Running
Running
import os | |
import sys | |
import logging | |
import gradio as gr | |
import shutil | |
from demucs_handler import DemucsProcessor, check_dependencies, configure_model | |
from whisper_handler import WhisperTranscriber | |
import tempfile | |
import torch | |
import torchaudio | |
import soundfile as sf | |
import librosa | |
import numpy as np | |
# Set up logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
def validate_environment(): | |
try: | |
import torch | |
import torchaudio | |
import demucs | |
logging.info(f"PyTorch version: {torch.__version__}") | |
logging.info(f"Torchaudio version: {torchaudio.__version__}") | |
logging.info(f"CUDA available: {torch.cuda.is_available()}") | |
except ImportError as e: | |
logging.error(f"Environment validation failed: {e}") | |
sys.exit(1) | |
def create_interface(): | |
validate_environment() | |
processor = DemucsProcessor() | |
transcriber = WhisperTranscriber() | |
def process_audio(audio_file, whisper_model="base", progress=gr.Progress()): | |
if audio_file is None: | |
return None, "Please upload an audio file." | |
temp_files = [] | |
try: | |
progress(0, desc="Starting processing") | |
logging.info(f"Processing file: {audio_file}") | |
with tempfile.TemporaryDirectory() as temp_dir: | |
temp_audio_path = os.path.join(temp_dir, "input.wav") | |
vocals_output_path = os.path.join(temp_dir, "vocals.wav") | |
# Convert to WAV first | |
audio, sr = librosa.load(audio_file, sr=44100) | |
# Fixed: use samplerate instead of sr | |
sf.write(temp_audio_path, audio, samplerate=sr) | |
temp_files.append(temp_audio_path) | |
progress(0.1, desc="Separating vocals") | |
try: | |
vocals_path = processor.separate_vocals(temp_audio_path) | |
# Copy vocals to output path | |
shutil.copy2(vocals_path, vocals_output_path) | |
temp_files.append(vocals_output_path) | |
except RuntimeError as e: | |
logging.error(f"Vocal separation failed: {str(e)}") | |
return None, f"Vocal separation failed: {str(e)}" | |
# Load the processed vocals for playback | |
vocals_audio, vocals_sr = librosa.load(vocals_output_path, sr=None) | |
progress(0.75, desc="Transcribing") | |
lyrics = transcriber.transcribe(vocals_output_path) | |
progress(1.0, desc="Processing complete") | |
# Return the audio data tuple and lyrics | |
return (vocals_sr, vocals_audio), lyrics | |
except Exception as e: | |
error_message = f"Processing error: {str(e)}" | |
logging.error(error_message) | |
return None, error_message | |
finally: | |
# Cleanup temporary files | |
for file in temp_files: | |
if file and os.path.exists(file): | |
try: | |
os.remove(file) | |
except: | |
pass | |
interface = gr.Interface( | |
fn=process_audio, | |
inputs=[ | |
gr.Audio(label="Upload Audio File", type="filepath"), | |
gr.Dropdown( | |
choices=["tiny", "base", "small", "medium", "large-v2"], | |
value="medium", | |
label="Whisper Model Size" | |
) | |
], | |
outputs=[ | |
gr.Audio(label="Isolated Vocals", type="numpy"), | |
gr.Textbox(label="Transcribed Lyrics", lines=10, max_lines=20) | |
], | |
title="Audio Lyrics Extractor", | |
description="Upload an audio file to extract vocals and transcribe lyrics\n"+ | |
" Created by Ever Olivares - Looking for Summer 2025 Internship Opportunities\n" + | |
" Connect with me: [LinkedIn](https://www.linkedin.com/in/everolivares/)"+" Currently not working as intended on HF tested on LightningAI with T4 running largeV2", | |
analytics_enabled=False | |
) | |
return interface | |
if __name__ == "__main__": | |
if not check_dependencies(): | |
print("Please install missing dependencies") | |
exit(1) | |
interface = create_interface() | |
interface.launch() | |