File size: 983 Bytes
5d9292a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from scipy.io.wavfile import write as write_wav
from transformers import AutoProcessor, BarkModel
import gradio


processor = AutoProcessor.from_pretrained("suno/bark-small")
model = BarkModel.from_pretrained("suno/bark-small")

def generate_speech(text, voice_preset="v2/en_speaker_6"):
    inputs = processor(text, voice_preset=voice_preset)

    audio_array = model.generate(**inputs)
    audio_array = audio_array.cpu().numpy().squeeze()

    # save audio to disk, but first take the sample rate from the model config
    sample_rate = model.generation_config.sample_rate
    write_wav("bark_generation.wav", sample_rate, audio_array)
    return "bark_generation.wav"

# generate_speech("Hello uh ... [clears throat], my dog is cute [laughter]")

iface = gradio.Interface(fn = generate_speech,
                        inputs = 'text',
                        outputs = 'audio',
                        title = 'Text to Speech'
                        )

iface.launch(share=True)