File size: 1,145 Bytes
c4a731d
1b0d41c
 
 
 
c4a731d
1b0d41c
 
c4a731d
1b0d41c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4a731d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
from transformers import BarkModel, AutoProcessor
import torch
from scipy.io.wavfile import write as write_wav
import os

'''
This app runs a text to voice transformer

'''
### Because we are using CPU we add this code: ###
device = "cpu"
# load in fp16
model = BarkModel.from_pretrained("suno/bark-small").to(device)
processor = AutoProcessor.from_pretrained("suno/bark")

voice_preset = "v2/en_speaker_3"

def generate_audio(text, preset, output_file_name = "bark_generation"):
    file_name = output_file_name + ".wav"
    inputs = processor(text, voice_preset)
    audio_array = model.generate(**inputs)
    audio_array = audio_array.cpu().numpy().squeeze()
    sample_rate = model.generation_config.sample_rate
    write_wav(file_name, sample_rate, audio_array)
    return file_name

#Presets drop down
presets = ["v2/en_speaker_0", "v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3","v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6", "v2/en_speaker_9"]

#Gradio interface
iface = gr.Interface(fn=generate_audio, inputs=["text", gr.components.Dropdown(choices=presets),"text"], outputs="audio")
iface.launch()