Spaces:
Running
Running
File size: 1,921 Bytes
2707e70 f7f6e43 2707e70 ba00a43 f7f6e43 ba00a43 f5c319c f7f6e43 aa1b2f5 467f7e2 f7f6e43 467f7e2 f7f6e43 2707e70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
from transformers import BarkModel, AutoProcessor
import torch
from scipy.io.wavfile import write as write_wav
import os
## if you run on GPU use the following code: ####
device = "cuda" if torch.cuda.is_available() else "cpu"
model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
model.enable_cpu_offload()
# ### if you run on CPU use the following code: ####
# device = "cpu"
# ### load in fp16
# model = BarkModel.from_pretrained("suno/bark-small").to(device)
processor = AutoProcessor.from_pretrained("suno/bark")
voice_preset = "v2/en_speaker_3"
# generate audio
# def generate_audio(text, preset, output_file_name="bark_generation"):
# file_name = output_file_name + ".wav"
# inputs = processor(text, voice_preset=preset)
# audio_array = model.generate(**inputs)
# audio_array = audio_array.cpu().numpy().squeeze()
# sample_rate = model.generation_config.sample_rate
# write_wav(file_name, sample_rate, audio_array)
# return file_name
def generate_audio(text, preset, output_file_name="bark_generation"):
file_name = output_file_name + ".wav"
inputs = processor(text, voice_preset=preset)
# Ensure the inputs are on the right device
for k, v in inputs.items():
if isinstance(v, torch.Tensor):
inputs[k] = v.to(device)
audio_array = model.generate(**inputs)
audio_array = audio_array.cpu().numpy().squeeze()
sample_rate = model.generation_config.sample_rate
write_wav(file_name, sample_rate, audio_array)
return file_name
#Bark Presets List
presets = ["v2/en_speaker_0","v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3", "v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6"]
#Gradio Interface
iface = gr.Interface(fn=generate_audio, inputs=["text", gr.components.Dropdown(choices=presets), "text"], outputs="audio")
iface.launch() |