import os import gradio as gr import torchaudio import torch from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32) pipeline.to("cuda") def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Low quality", negative_prompt2="Low quality"): save_lora_dir = "output" os.makedirs(save_lora_dir, exist_ok=True) waveform, sample_rate = torchaudio.load(audio_file1) duration = waveform.shape[1] / sample_rate duration = int(duration) _ = pipeline( audio_file=audio_file1, audio_file2=audio_file2, audio_length_in_s=duration, time_pooling=2, freq_pooling=2, prompt_1=prompt1, prompt_2=prompt2, negative_prompt_1=negative_prompt1, negative_prompt_2=negative_prompt2, save_lora_dir=save_lora_dir, use_adain=True, use_reschedule=True, num_inference_steps=50, lamd=0.6, output_path=save_lora_dir, num_frames=5, fix_lora=None, use_lora=True, lora_steps=50, noisy_latent_with_lora=True, morphing_with_lora=True, use_morph_prompt=True, guidance_scale=7.5, ) output_paths = [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")] return output_paths def interface(audio1, audio2, prompt1, prompt2): output_paths = morph_audio(audio1, audio2, prompt1, prompt2) return output_paths # Gradio UI with gr.Blocks() as demo: gr.Markdown("### Audio Morphing Demo with AudioLDM2") with gr.Row(): audio_file1 = gr.Audio(label="Upload Audio File 1", type="filepath") audio_file2 = gr.Audio(label="Upload Audio File 2", type="filepath") with gr.Row(): prompt1 = gr.Textbox(label="Prompt for Audio File 1") prompt2 = gr.Textbox(label="Prompt for Audio File 2") output_audios = gr.Audio(label="Generated Morphing Audios", type="filepath", interactive=False) morph_button = gr.Button("Generate Morphing Audio") morph_button.click( interface, inputs=[audio_file1, audio_file2, prompt1, prompt2], outputs=[output_audios] ) demo.launch()