import spaces # Importar spaces antes de cualquier cosa relacionada con CUDA import gradio as gr import torchaudio from audiocraft.models import MusicGen from audiocraft.data.audio import audio_write import os import uuid import torch import re # Configuración para usar GPU si está disponible; de lo contrario, usar CPU device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Cargar los modelos una sola vez def load_model(model_name): """Cargar el modelo especificado.""" return MusicGen.get_pretrained(model_name) # Inicializar los modelos models = { 'Large': load_model('nateraw/musicgen-songstarter-v0.2'), 'Small': load_model('facebook/musicgen-small') } def get_model(model_choice): """Obtener el modelo adecuado según la elección del usuario.""" return models.get(model_choice, models['Large']) # Valor predeterminado: 'Large' @spaces.GPU(duration=0) # Indicador para Hugging Face Spaces def generate_music(description, melody_audio, duration, model_choice): model = get_model(model_choice) # Obtener el modelo seleccionado description = clean_text(description) model.set_generation_params(duration=int(duration * 1000)) # Convertir segundos a milisegundos try: with torch.no_grad(): if description: description = [description] if melody_audio: melody, sr = torchaudio.load(melody_audio, normalize=True) melody = melody.to(device) if torch.cuda.is_available() else melody wav = model.generate_with_chroma(description, melody[None], sr) else: wav = model.generate(description) else: wav = model.generate_unconditional(1) filename = f'{str(uuid.uuid4())}.wav' path = audio_write(filename, wav[0].cpu().to(torch.float32), model.sample_rate, strategy="loudness", loudness_compressor=True) if not os.path.exists(path): raise ValueError(f'Failed to save audio to {path}') return path except Exception as e: return str(e) def clean_text(text): text = re.sub(r'http\S+', '', text) text = re.sub(r'[^a-zA-Z0-9\s]', '', text) return text # Definir los elementos de la interfaz de Gradio description = gr.Textbox(label="Description", placeholder="Acoustic, guitar, melody, trap, D minor, 90 bpm") melody_audio = gr.Audio(label="Melody Audio (optional)", type="filepath") duration = gr.Number(label="Duration (seconds)", value=10, precision=0) model_choice = gr.Radio(choices=['Small', 'Large'], label="Model Choice", value='Large') output_path = gr.Audio(label="Generated Music", type="filepath") # Configurar la interfaz de Gradio gr.Interface( fn=generate_music, inputs=[description, melody_audio, duration, model_choice], outputs=output_path, title="MusicGen Demo", description="Generate music using the MusicGen model. Choose between the Small or Large model.", examples=[ ["trap, synthesizer, music starters, dark, G# minor, 140 bpm", "./assets/kalhonaho.mp3", 20, 'Large'], ["upbeat, electronic, synthesizer, dance, 120 bpm", None, 30, 'Small'] ] ).launch()