Spaces:
Runtime error
Runtime error
File size: 2,998 Bytes
d57e374 4cf73d6 075c9a6 4cf73d6 d57e374 4cf73d6 075c9a6 d57e374 1834911 d57e374 075c9a6 d57e374 075c9a6 d57e374 075c9a6 d57e374 075c9a6 d57e374 075c9a6 d57e374 075c9a6 d57e374 075c9a6 d57e374 075c9a6 4cf73d6 075c9a6 d57e374 075c9a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import torch
import torchaudio
import numpy as np
import gradio as gr
from huggingface_hub import hf_hub_download
model_path = hf_hub_download(
repo_id="DennisHung/Pre-trained_AudioMAE_weights",
filename="pretrained.pth",
local_dir="./",
local_dir_use_symlinks=False
)
model_path = hf_hub_download(
repo_id="DennisHung/Pre-trained_AudioMAE_weights",
filename="pytorch_model.bin",
local_dir="./",
local_dir_use_symlinks=False
)
from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline
# Initialize AudioLDM2 Pipeline
pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipeline.to(device)
# Audio morphing function
def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Low quality", negative_prompt2="Low quality"):
save_lora_dir = "output"
os.makedirs(save_lora_dir, exist_ok=True)
# Load audio and compute duration
waveform, sample_rate = torchaudio.load(audio_file1)
duration = waveform.shape[1] / sample_rate
duration = int(duration)
# Perform morphing using the pipeline
_ = pipeline(
audio_file=audio_file1,
audio_file2=audio_file2,
audio_length_in_s=duration,
time_pooling=2,
freq_pooling=2,
prompt_1=prompt1,
prompt_2=prompt2,
negative_prompt_1=negative_prompt1,
negative_prompt_2=negative_prompt2,
save_lora_dir=save_lora_dir,
use_adain=True,
use_reschedule=False,
num_inference_steps=50,
lamd=0.6,
output_path=save_lora_dir,
num_frames=5,
fix_lora=None,
use_lora=True,
lora_steps=50,
noisy_latent_with_lora=True,
morphing_with_lora=True,
use_morph_prompt=True,
guidance_scale=7.5,
)
# Collect the output file paths
output_paths = [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")]
return output_paths
# Gradio interface function
def interface(audio1, audio2, prompt1, prompt2):
output_paths = morph_audio(audio1, audio2, prompt1, prompt2)
return output_paths
# Gradio Interface
demo = gr.Interface(
fn=interface,
inputs=[
gr.Audio(label="Upload Audio File 1", type="filepath"),
gr.Audio(label="Upload Audio File 2", type="filepath"),
# gr.Slider(4, 6, step=1, label="Octave 1"),
gr.Textbox(label="Prompt for Audio File 1"),
gr.Textbox(label="Prompt for Audio File 2")
],
outputs=[
gr.Audio(label="Morphing audio 1"),
gr.Audio(label="Morphing audio 2"),
gr.Audio(label="Morphing audio 3"),
gr.Audio(label="Morphing audio 4"),
gr.Audio(label="Morphing audio 5"),
],
)
if __name__ == "__main__":
demo.launch() |