Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import BarkModel, AutoProcessor | |
import torch | |
from scipy.io.wavfile import write as write_wav | |
import os | |
''' | |
This app runs a text to voice transformer | |
''' | |
### Because we are using CPU we add this code: ### | |
device = "cpu" | |
# load in fp16 | |
model = BarkModel.from_pretrained("suno/bark-small").to(device) | |
processor = AutoProcessor.from_pretrained("suno/bark") | |
voice_preset = "v2/en_speaker_3" | |
def generate_audio(text, preset, output_file_name = "bark_generation"): | |
file_name = output_file_name + ".wav" | |
inputs = processor(text, voice_preset) | |
audio_array = model.generate(**inputs) | |
audio_array = audio_array.cpu().numpy().squeeze() | |
sample_rate = model.generation_config.sample_rate | |
write_wav(file_name, sample_rate, audio_array) | |
return file_name | |
#Presets drop down | |
presets = ["v2/en_speaker_0", "v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3","v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6", "v2/en_speaker_9"] | |
#Gradio interface | |
iface = gr.Interface(fn=generate_audio, inputs=["text", gr.components.Dropdown(choices=presets),"text"], outputs="audio") | |
iface.launch() |