Spaces:
Runtime error
Runtime error
import streamlit as st | |
import torch | |
from diffusers import AudioLDM2Pipeline | |
# make Space compatible with CPU duplicates | |
if torch.cuda.is_available(): | |
device = "cuda" | |
torch_dtype = torch.float16 | |
else: | |
device = "cpu" | |
torch_dtype = torch.float32 | |
# load the diffusers pipeline | |
repo_id = "cvssp/audioldm2" | |
pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device) | |
# set the generator for reproducibility | |
generator = torch.Generator(device) | |
def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates): | |
if text is None: | |
st.error("Please provide a text input.") | |
return | |
waveforms = pipe( | |
text, | |
audio_length_in_s=duration, | |
guidance_scale=guidance_scale, | |
num_inference_steps=200, | |
negative_prompt=negative_prompt, | |
num_waveforms_per_prompt=int(n_candidates) if n_candidates else 1, | |
generator=generator.manual_seed(int(random_seed)), | |
)["audios"] | |
st.audio(waveforms[0], format="audio/wav", sample_rate=16000) | |
# Streamlit UI | |
st.title("AudioLDM 2: A General Framework for Audio, Music, and Speech Generation") | |
st.markdown( | |
"[Paper](https://arxiv.org/abs/2308.05734) [Project Page](https://audioldm.github.io/audioldm2) [Diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm2)" | |
) | |
st.markdown("This is the demo for AudioLDM 2, powered by 🧨 Diffusers. For faster inference without waiting in queue, you may duplicate the space and upgrade to a GPU in the settings.") | |
st.markdown("### Input") | |
text = st.text_input("Input text", "The vibrant beat of Brazilian samba drums") | |
negative_prompt = st.text_input("Negative prompt", "Low quality") | |
st.markdown("### Configuration") | |
duration = st.slider("Duration (seconds)", 5.0, 15.0, 10.0, step=2.5) | |
guidance_scale = st.slider("Guidance scale", 0.0, 7.0, 3.5, step=0.5) | |
n_candidates = st.slider("Number waveforms to generate", 1.0, 5.0, 3.0, step=1.0) | |
random_seed = st.number_input("Seed", 1.0, 100.0, 45.0) | |
if st.button("Submit"): | |
text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates) |