Spaces:
Paused
Paused
File size: 1,449 Bytes
2de6199 e09662f e58ab9f e09662f 2de6199 955b728 2de6199 f101f29 2de6199 fac1cff 2de6199 c222126 2de6199 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
import torch
from diffusers import AudioLDM2Pipeline
# make Space compatible with CPU duplicates
if torch.cuda.is_available():
device = "cuda"
torch_dtype = torch.float16
else:
device = "cpu"
torch_dtype = torch.float32
# load the diffusers pipeline
repo_id = "cvssp/audioldm2"
pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
# pipe.unet = torch.compile(pipe.unet)
# set the generator for reproducibility
generator = torch.Generator(device)
def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates):
if text is None:
raise gr.Error("Please provide a text input.")
# test values
negative_prompt="Low quality."
duration=10
guidance_scale=3.5
random_seed=45
n_candidates=3
waveforms = pipe(
text,
audio_length_in_s=duration,
guidance_scale=guidance_scale,
num_inference_steps=20,
negative_prompt=negative_prompt,
num_waveforms_per_prompt=n_candidates if n_candidates else 1,
generator=generator.manual_seed(int(random_seed)),
)["audios"]
print(waveforms)
return [gr.make_waveform((16000, waveforms[0])), gr.make_waveform((16000, waveforms[1])), gr.make_waveform((16000, waveforms[2]))]
gradio_interface = gr.Interface(
fn = text2audio,
inputs = "text",
outputs = ["audio", "audio", "audio"],
)
gradio_interface.launch()
|