DanLeBossDeESGI commited on
Commit
0c6352e
·
1 Parent(s): 0767df8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -48
app.py CHANGED
@@ -1,49 +1,90 @@
1
- import argparse
2
- from concurrent.futures import ProcessPoolExecutor
3
- import os
4
- from pathlib import Path
5
- import subprocess as sp
6
- from tempfile import NamedTemporaryFile
7
- import time
8
- import typing as tp
9
- import warnings
10
-
11
  import torch
12
- import gradio as gr
13
-
14
- from audiocraft.data.audio_utils import convert_audio
15
- from audiocraft.data.audio import audio_write
16
- from audiocraft.models import MusicGen
17
-
18
- def ui_batched(launch_kwargs):
19
- with gr.Blocks() as demo:
20
- gr.Markdown(
21
- """
22
- # MusicGen
23
- This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
24
- a simple and controllable model for music generation
25
- presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
26
- <br/>
27
- <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true"
28
- style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
29
- <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;"
30
- src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
31
- for longer sequences, more control and no queue.</p>
32
- """
33
- )
34
- with gr.Row():
35
- with gr.Column():
36
- with gr.Row():
37
- text = gr.Text(label="Describe your music", lines=2, interactive=True)
38
- with gr.Column():
39
- radio = gr.Radio(["file", "mic"], value="file",
40
- label="Condition on a melody (optional) File or Mic")
41
- melody = gr.Audio(source="upload", type="numpy", label="File",
42
- interactive=True, elem_id="melody-input")
43
- with gr.Row():
44
- submit = gr.Button("Generate")
45
- with gr.Column():
46
- output = gr.Video(label="Generated Music")
47
- submit.click(predict_batched, inputs=[text, melody],
48
- outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
49
- radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
 
 
 
 
 
 
 
 
 
2
  import torch
3
+ from diffusers import AudioLDM2Pipeline
4
+
5
+ # make Space compatible with CPU duplicates
6
+ if torch.cuda.is_available():
7
+ device = "cuda"
8
+ torch_dtype = torch.float16
9
+ else:
10
+ device = "cpu"
11
+ torch_dtype = torch.float32
12
+
13
+ # load the diffusers pipeline
14
+ repo_id = "cvssp/audioldm2"
15
+ pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
16
+
17
+ # set the generator for reproducibility
18
+ generator = torch.Generator(device)
19
+
20
+
21
+ def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates):
22
+ if text is None:
23
+ st.error("Please provide a text input.")
24
+ return
25
+
26
+ waveforms = pipe(
27
+ text,
28
+ audio_length_in_s=duration,
29
+ guidance_scale=guidance_scale,
30
+ num_inference_steps=200,
31
+ negative_prompt=negative_prompt,
32
+ num_waveforms_per_prompt=n_candidates if n_candidates else 1,
33
+ generator=generator.manual_seed(int(random_seed)),
34
+ )["audios"]
35
+
36
+ st.audio(waveforms[0], format="audio/wav")
37
+
38
+
39
+ # Streamlit UI
40
+ st.title("AudioLDM 2: A General Framework for Audio, Music, and Speech Generation")
41
+
42
+ st.markdown(
43
+ "[Paper](https://arxiv.org/abs/2308.05734) [Project Page](https://audioldm.github.io/audioldm2) [Diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm2)"
44
+ )
45
+
46
+ st.markdown("This is the demo for AudioLDM 2, powered by 🧨 Diffusers. For faster inference without waiting in queue, you may duplicate the space and upgrade to a GPU in the settings.")
47
+
48
+ st.markdown("### Input")
49
+ text = st.text_input("Input text", "The vibrant beat of Brazilian samba drums")
50
+ negative_prompt = st.text_input("Negative prompt", "Low quality")
51
+
52
+ st.markdown("### Configuration")
53
+ duration = st.slider("Duration (seconds)", 5, 15, 10, step=2.5)
54
+ guidance_scale = st.slider("Guidance scale", 0, 7, 3.5, step=0.5)
55
+ n_candidates = st.slider("Number waveforms to generate", 1, 5, 3, step=1)
56
+ random_seed = st.number_input("Seed", 1, 100, 45)
57
+
58
+ if st.button("Submit"):
59
+ text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates)
60
+
61
+ st.markdown("### Output")
62
+ st.markdown("Share your generations with the community by clicking the share icon at the top right of the generated audio!")
63
+
64
+ st.markdown("Model by [Haohe Liu](https://twitter.com/LiuHaohe). Code and demo by 🤗 Hugging Face.")
65
+
66
+ st.markdown("### Examples")
67
+ examples = [
68
+ ["A hammer is hitting a wooden surface.", "Low quality.", 10, 3.5, 45, 3],
69
+ ["A cat is meowing for attention.", "Low quality.", 10, 3.5, 45, 3],
70
+ ["An excited crowd cheering at a sports game.", "Low quality.", 10, 3.5, 45, 3],
71
+ ["Birds singing sweetly in a blooming garden.", "Low quality.", 10, 3.5, 45, 3],
72
+ ["A modern synthesizer creating futuristic soundscapes.", "Low quality.", 10, 3.5, 45, 3],
73
+ ["The vibrant beat of Brazilian samba drums.", "Low quality.", 10, 3.5, 45, 3],
74
+ ]
75
+
76
+ for example in examples:
77
+ st.markdown("---")
78
+ st.markdown(f"**Input**: {example[0]}")
79
+ st.markdown(f"**Negative Prompt**: {example[1]}")
80
+ st.markdown(f"**Duration**: {example[2]} seconds")
81
+ st.markdown(f"**Guidance Scale**: {example[3]}")
82
+ st.markdown(f"**Seed**: {example[4]}")
83
+ st.markdown(f"**Number Waveforms**: {example[5]}")
84
+ if st.button("Generate", key=f"example_{examples.index(example)}"):
85
+ text2audio(*example)
86
+
87
+ st.markdown("### Additional Information")
88
+ st.markdown(
89
+ "We build the model with data from [AudioSet](http://research.google.com/audioset/), [Freesound](https://freesound.org/), and [BBC Sound Effect library](https://sound-effects.bbcrewind.co.uk/). We share this demo based on the [UK copyright exception](https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/375954/Research.pdf) of data for academic research."
90
+ )