Musica / app.py
DanLeBossDeESGI's picture
Update app.py
9f8404c
raw
history blame
2.15 kB
import streamlit as st
import torch
from diffusers import AudioLDM2Pipeline
# make Space compatible with CPU duplicates
if torch.cuda.is_available():
device = "cuda"
torch_dtype = torch.float16
else:
device = "cpu"
torch_dtype = torch.float32
# load the diffusers pipeline
repo_id = "cvssp/audioldm2"
pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
# set the generator for reproducibility
generator = torch.Generator(device)
def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates):
if text is None:
st.error("Please provide a text input.")
return
waveforms = pipe(
text,
audio_length_in_s=duration,
guidance_scale=guidance_scale,
num_inference_steps=200,
negative_prompt=negative_prompt,
num_waveforms_per_prompt=int(n_candidates) if n_candidates else 1,
generator=generator.manual_seed(int(random_seed)),
)["audios"]
st.audio(waveforms[0], format="audio/wav", sample_rate=16000)
# Streamlit UI
st.title("AudioLDM 2: A General Framework for Audio, Music, and Speech Generation")
st.markdown(
"[Paper](https://arxiv.org/abs/2308.05734) [Project Page](https://audioldm.github.io/audioldm2) [Diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm2)"
)
st.markdown("This is the demo for AudioLDM 2, powered by 🧨 Diffusers. For faster inference without waiting in queue, you may duplicate the space and upgrade to a GPU in the settings.")
st.markdown("### Input")
text = st.text_input("Input text", "The vibrant beat of Brazilian samba drums")
negative_prompt = st.text_input("Negative prompt", "Low quality")
st.markdown("### Configuration")
duration = st.slider("Duration (seconds)", 5.0, 15.0, 10.0, step=2.5)
guidance_scale = st.slider("Guidance scale", 0.0, 7.0, 3.5, step=0.5)
n_candidates = st.slider("Number waveforms to generate", 1.0, 5.0, 3.0, step=1.0)
random_seed = st.number_input("Seed", 1.0, 100.0, 45.0)
if st.button("Submit"):
text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates)