Spaces:
Running
Running
import gradio as gr | |
import os | |
import tempfile | |
from pathlib import Path | |
from typing import List, Tuple, Optional | |
from concurrent.futures import ThreadPoolExecutor | |
import logging | |
import soundfile as sf | |
import numpy as np | |
import shutil | |
from validators import AudioValidator | |
from demucs_handler import DemucsProcessor | |
from basic_pitch_handler import BasicPitchConverter | |
# Suppress TF logging | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | |
logging.getLogger('tensorflow').setLevel(logging.ERROR) | |
logger = logging.getLogger(__name__) | |
# Create a persistent directory for outputs | |
OUTPUT_DIR = Path("/tmp/audio_processor") | |
OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
def process_single_audio(audio_path: str, stem_type: str, convert_midi: bool) -> Tuple[Tuple[int, np.ndarray], Optional[str]]: | |
try: | |
# Create unique subdirectory for this processing | |
process_dir = OUTPUT_DIR / str(hash(audio_path)) | |
process_dir.mkdir(parents=True, exist_ok=True) | |
processor = DemucsProcessor() | |
converter = BasicPitchConverter() | |
print(f"Starting processing of file: {audio_path}") | |
# Process stems | |
sources, sample_rate = processor.separate_stems(audio_path) | |
print(f"Number of sources returned: {sources.shape}") | |
print(f"Stem type requested: {stem_type}") | |
# Get the requested stem | |
stem_index = ["drums", "bass", "other", "vocals"].index(stem_type) | |
selected_stem = sources[0, stem_index] | |
# Save stem | |
stem_path = process_dir / f"{stem_type}.wav" | |
processor.save_stem(selected_stem, stem_type, str(process_dir), sample_rate) | |
print(f"Saved stem to: {stem_path}") | |
# Load the saved audio file for Gradio | |
audio_data, sr = sf.read(str(stem_path)) | |
if len(audio_data.shape) > 1: | |
audio_data = audio_data.mean(axis=1) # Convert to mono if stereo | |
# Convert to int16 format | |
audio_data = (audio_data * 32767).astype(np.int16) | |
# Convert to MIDI if requested | |
midi_path = None | |
if convert_midi: | |
midi_path = process_dir / f"{stem_type}.mid" | |
converter.convert_to_midi(str(stem_path), str(midi_path)) | |
print(f"Saved MIDI to: {midi_path}") | |
return (sr, audio_data), str(midi_path) if midi_path else None | |
except Exception as e: | |
print(f"Error in process_single_audio: {str(e)}") | |
raise | |
def create_interface(): | |
processor = DemucsProcessor() | |
converter = BasicPitchConverter() | |
validator = AudioValidator() | |
def process_audio( | |
audio_files: List[str], | |
stem_type: str, | |
convert_midi: bool = True, | |
progress=gr.Progress() | |
) -> Tuple[Tuple[int, np.ndarray], Optional[str]]: | |
try: | |
print(f"Starting processing of {len(audio_files)} files") | |
print(f"Selected stem type: {stem_type}") | |
# Process single file for now | |
if len(audio_files) > 0: | |
audio_path = audio_files[0] # Take first file | |
print(f"Processing file: {audio_path}") | |
return process_single_audio(audio_path, stem_type, convert_midi) | |
else: | |
raise ValueError("No audio files provided") | |
except Exception as e: | |
print(f"Error in audio processing: {str(e)}") | |
raise gr.Error(str(e)) | |
interface = gr.Interface( | |
fn=process_audio, | |
inputs=[ | |
gr.File( | |
file_count="multiple", | |
file_types=AudioValidator.SUPPORTED_FORMATS, | |
label="Upload Audio Files" | |
), | |
gr.Dropdown( | |
choices=["vocals", "drums", "bass", "other"], | |
label="Select Stem", | |
value="vocals" | |
), | |
gr.Checkbox(label="Convert to MIDI", value=True) | |
], | |
outputs=[ | |
gr.Audio(label="Separated Stems", type="numpy"), | |
gr.File(label="MIDI Files") | |
], | |
title="Audio Stem Separator & MIDI Converter", | |
description="Upload audio files to separate stems and convert to MIDI", | |
cache_examples=True, | |
allow_flagging="never" | |
) | |
return interface | |
if __name__ == "__main__": | |
interface = create_interface() | |
interface.launch( | |
share=False, | |
server_name="0.0.0.0", | |
server_port=7860, | |
auth=None, | |
ssl_keyfile=None, | |
ssl_certfile=None | |
) |