Spaces:
Runtime error
Runtime error
import os | |
import gc | |
import torch | |
import shutil | |
import atexit | |
import torchaudio | |
import numpy as np | |
import gradio as gr | |
from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline | |
os.environ["CUDA_VISIBLE_DEVICES"] = "6" | |
# Initialize AudioLDM2 Pipeline | |
torch.cuda.set_device(0) | |
dtype = torch.float32 | |
pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=dtype) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
pipeline.to(device) | |
def morph_audio(audio_file1, audio_file2, num_inference_steps, prompt1='', prompt2='', negative_prompt1="Low quality", negative_prompt2="Low quality"): | |
save_lora_dir = "output" | |
if os.path.exists(save_lora_dir): | |
shutil.rmtree(save_lora_dir) | |
os.makedirs(save_lora_dir, exist_ok=True) | |
# Load audio and compute duration | |
waveform1, sample_rate1 = torchaudio.load(audio_file1) | |
duration1 = waveform1.shape[1] / sample_rate1 | |
waveform2, sample_rate2 = torchaudio.load(audio_file2) | |
duration2 = waveform2.shape[1] / sample_rate2 | |
# Compare durations and take the shorter one | |
duration = int(min(duration1, duration2)) | |
# Perform morphing using the pipeline | |
_ = pipeline( | |
dtype = dtype, | |
audio_file=audio_file1, | |
audio_file2=audio_file2, | |
audio_length_in_s=duration, | |
time_pooling=2, | |
freq_pooling=2, | |
prompt_1=prompt1, | |
prompt_2=prompt2, | |
negative_prompt_1=negative_prompt1, | |
negative_prompt_2=negative_prompt2, | |
save_lora_dir=save_lora_dir, | |
use_adain=True, | |
use_reschedule=False, | |
num_inference_steps=num_inference_steps, | |
lamd=0.6, | |
output_path=save_lora_dir, | |
num_frames=5, | |
fix_lora=None, | |
use_lora=True, | |
lora_steps=2, | |
noisy_latent_with_lora=True, | |
morphing_with_lora=True, | |
use_morph_prompt=True, | |
guidance_scale=7.5, | |
) | |
# Collect the output file paths | |
output_paths = sorted( | |
[os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")], | |
key=lambda x: int(os.path.splitext(os.path.basename(x))[0]) | |
) | |
del waveform1, waveform2, _ | |
torch.cuda.empty_cache() | |
gc.collect() | |
return output_paths | |
def morph_audio_with_morphing_factor(audio_file1, audio_file2, alpha,num_inference_steps, prompt1='', prompt2='', negative_prompt1="Low quality", negative_prompt2="Low quality"): | |
save_lora_dir = "output" | |
if os.path.exists(save_lora_dir): | |
shutil.rmtree(save_lora_dir) | |
os.makedirs(save_lora_dir, exist_ok=True) | |
# Load audio and compute duration | |
waveform1, sample_rate1 = torchaudio.load(audio_file1) | |
duration1 = waveform1.shape[1] / sample_rate1 | |
waveform2, sample_rate2 = torchaudio.load(audio_file2) | |
duration2 = waveform2.shape[1] / sample_rate2 | |
# Compare durations and take the shorter one | |
duration = int(min(duration1, duration2)) | |
try: | |
# Perform morphing using the pipeline | |
_ = pipeline( | |
dtype = dtype, | |
morphing_factor = alpha, | |
audio_file=audio_file1, | |
audio_file2=audio_file2, | |
audio_length_in_s=duration, | |
time_pooling=2, | |
freq_pooling=2, | |
prompt_1=prompt1, | |
prompt_2=prompt2, | |
negative_prompt_1=negative_prompt1, | |
negative_prompt_2=negative_prompt2, | |
save_lora_dir=save_lora_dir, | |
use_adain=True, | |
use_reschedule=False, | |
num_inference_steps=num_inference_steps, | |
lamd=0.6, | |
output_path=save_lora_dir, | |
num_frames=5, | |
fix_lora=None, | |
use_lora=True, | |
lora_steps=2, | |
noisy_latent_with_lora=True, | |
morphing_with_lora=True, | |
use_morph_prompt=True, | |
guidance_scale=7.5, | |
) | |
output_paths = os.path.join(save_lora_dir, 'interpolated.wav') | |
except RuntimeError as e: | |
if "CUDA out of memory" in str(e): | |
print("CUDA out of memory. Releasing unused memory...") | |
torch.cuda.empty_cache() | |
gc.collect() | |
raise e | |
# # Collect the output file paths | |
# del waveform1, waveform2, _ | |
# torch.cuda.empty_cache() | |
# gc.collect() | |
return output_paths | |
def cleanup_output_dir(): | |
save_lora_dir = "output" | |
if os.path.exists(save_lora_dir): | |
shutil.rmtree(save_lora_dir) | |
print(f"Cleaned up directory: {save_lora_dir}") | |
atexit.register(cleanup_output_dir) | |
# Gradio interface function | |
def interface(audio1, audio2, alpha, num_inference_steps): | |
output_paths = morph_audio_with_morphing_factor(audio1, audio2, alpha, num_inference_steps) | |
return output_paths | |
# Gradio Interface | |
# demo = gr.Interface( | |
# fn=interface, | |
# inputs=[ | |
# gr.Audio(label="Upload Audio File 1", type="filepath"), | |
# gr.Audio(label="Upload Audio File 2", type="filepath"), | |
# gr.Slider(0, 1, step=0.01, label="Interpolation Alpha"), | |
# gr.Slider(10, 50, step=1, label="Inference Steps"), | |
# # gr.Textbox(label="Prompt for Audio File 1"), | |
# # gr.Textbox(label="Prompt for Audio File 2"), | |
# ], | |
# outputs=gr.Audio(label="Interpolated Audio") | |
# ) | |
with gr.Blocks() as demo: | |
with gr.Tab("Sound Morphing with fixed frames."): | |
gr.Markdown("### Upload two audio files for morphing") | |
with gr.Row(): | |
audio1 = gr.Audio(label="Upload Audio File 1", type="filepath") | |
audio2 = gr.Audio(label="Upload Audio File 2", type="filepath") | |
num_inference_steps = gr.Slider(10, 50, step=1, label="Inference Steps", value=50) | |
outputs = [ | |
gr.Audio(label="Morphing audio 1"), | |
gr.Audio(label="Morphing audio 2"), | |
gr.Audio(label="Morphing audio 3"), | |
gr.Audio(label="Morphing audio 4"), | |
gr.Audio(label="Morphing audio 5"), | |
] | |
submit_btn1 = gr.Button("Submit") | |
submit_btn1.click(morph_audio, inputs=[audio1, audio2, num_inference_steps], outputs=outputs) | |
with gr.Tab("Sound Morphing with specified morphing factor."): | |
gr.Markdown("### Upload two audio files for morphing") | |
with gr.Row(): | |
audio1 = gr.Audio(label="Upload Audio File 1", type="filepath") | |
audio2 = gr.Audio(label="Upload Audio File 2", type="filepath") | |
alpha = gr.Slider(0, 1, step=0.01, label="Interpolation Alpha") | |
num_inference_steps = gr.Slider(10, 50, step=1, label="Inference Steps", value=50) | |
outputs=gr.Audio(label="Interpolated Audio") | |
submit_btn2 = gr.Button("Submit") | |
submit_btn2.click(morph_audio_with_morphing_factor, inputs=[audio1, audio2, alpha, num_inference_steps], outputs=outputs) | |
if __name__ == "__main__": | |
demo.launch(share=True) |