import torch import torchaudio from demucs.pretrained import get_model from demucs.apply import apply_model import tempfile import os import numpy as np import librosa class DemucsProcessor: def __init__(self, model_name="htdemucs"): self.model_name = model_name self.device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {self.device}") self.model = get_model(model_name) self.model.to(self.device) self.sources = self.model.sources print(f"Model loaded successfully on {self.device}") print(f"Available sources: {self.sources}") def load_audio(self, file_path): try: waveform, sample_rate = torchaudio.load(file_path) print(f"Audio loaded - Shape: {waveform.shape}, Sample rate: {sample_rate}") # Handle mono input if waveform.dim() == 1: waveform = waveform.unsqueeze(0) if waveform.shape[0] == 1: waveform = waveform.repeat(2, 1) return waveform, sample_rate except Exception as e: print(f"Error loading with torchaudio: {e}") try: # Fallback to librosa audio, sr = librosa.load(file_path, sr=44100, mono=False) if audio.ndim == 1: audio = np.vstack([audio, audio]) waveform = torch.from_numpy(audio) return waveform, sr except Exception as e: raise RuntimeError(f"Failed to load audio: {str(e)}") def separate_vocals(self, audio_path): try: # Load audio waveform, sample_rate = self.load_audio(audio_path) print(f"Audio loaded - Shape: {waveform.shape}, Sample rate: {sample_rate}") # Ensure correct shape and device waveform = waveform.to(self.device) # Add batch dimension waveform = waveform.unsqueeze(0) # Process the entire audio at once instead of segments with torch.no_grad(): sources = apply_model(self.model, waveform) # Get vocals vocals_idx = self.sources.index('vocals') vocals = sources[:, vocals_idx] # Save to temporary file with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: torchaudio.save( tmp.name, vocals.squeeze(0).cpu(), sample_rate, format='wav' ) return tmp.name except Exception as e: raise RuntimeError(f"Separation failed: {str(e)}") def configure_model(): return { "segment_size": 16 if torch.cuda.is_available() else 4, # Increased from 8 "overlap": 0.1, "sample_rate": 44100, "channels": 2 } def check_dependencies(): try: import torch import torchaudio import librosa import demucs from demucs.pretrained import get_model # Test audio loading test_audio = np.random.random(44100) test_tensor = torch.from_numpy(test_audio) print("All required packages are installed correctly") return True except ImportError as e: print(f"Missing dependency: {str(e)}") return False