File size: 3,211 Bytes
3883c60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gradio

import webui.modules.implementations.audioldm2 as aldm2
from webui.modules import util


def generate(prompt, negative, duration, steps, cfg, seed, progress=gradio.Progress()):
    output = aldm2.generate(prompt, negative, steps, duration, cfg, seed,
                           callback=lambda step, _, _2: progress((step, steps), desc='Generating...'))
    if isinstance(output, str):
        return None, None, output
    else:
        return output[1], util.make_waveform(output[1]), f'Successfully generated audio with seed: {output[0]}.'


def audioldm2_tab():
    with gradio.Row():
        with gradio.Row():
            selected = gradio.Dropdown(aldm2.models, value='cvssp/audioldm2',
                                       label='Model')
            with gradio.Column(elem_classes='smallsplit'):
                load_button = gradio.Button('πŸš€', variant='tool secondary')
                unload_button = gradio.Button('πŸ’£', variant='tool primary')

            def load(model):
                aldm2.create_model(model)
                return gradio.update()

            def unload():
                aldm2.delete_model()
                return gradio.update()

            load_button.click(fn=load, inputs=selected, outputs=selected, show_progress=True)
            unload_button.click(fn=unload, outputs=selected, show_progress=True)
        with gradio.Row():
            gen_button = gradio.Button('Generate', variant='primary')

    with gradio.Row():
        with gradio.Column():
            prompt = gradio.TextArea(label='Prompt', info='Put the audio you want here.',
                                     placeholder='The sound of a hammer hitting a wooden surface')
            neg_prompt = gradio.TextArea(label='Negative prompt', info='Put things to avoid generating here.',
                                         placeholder='low bitrate, low quality, bad quality')
            duration = gradio.Number(5, label='Duration (s)', info='Duration for the generation in seconds.')
            seed = gradio.Number(-1, label='Seed',
                                 info='Default: -1 (random). Set the seed for generation, random seed is used when a negative number is given.',
                                 precision=0)
            with gradio.Accordion('βž• Extra options', open=False):
                cfg = gradio.Slider(1, 20, 3.5, step=0.01, label='CFG scale',
                                    info='Default: 2.5. How much should the prompt affect the audio on every step?')
                steps = gradio.Slider(1, 300, 50, step=1, label='Steps',
                                      info='Default: 10. How many diffusion steps should be performed?')
        with gradio.Column():
            with gradio.Row():
                audio_out = gradio.Audio(label='Generated audio', interactive=False)
            with gradio.Row():
                video_out = gradio.Video(label='Waveform video', interactive=False)
            with gradio.Row():
                text_out = gradio.Textbox(label='Result')

    gen_button.click(fn=generate, inputs=[prompt, neg_prompt, duration, steps, cfg, seed],
                     outputs=[audio_out, video_out, text_out])