Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import Text2SpeechForConditionalGeneration, Text2SpeechTokenizer | |
# Load TTS model and tokenizer | |
model_name = "facebook/wav2vec2-base-960h" | |
tts_model = Text2SpeechForConditionalGeneration.from_pretrained(model_name) | |
tokenizer = Text2SpeechTokenizer.from_pretrained(model_name) | |
def text_to_speech(text): | |
inputs = tokenizer(text, return_tensors="pt", clean_up_tokenization_spaces=True) | |
with gr.Output() as out: | |
speech = tts_model.generate(**inputs) | |
gr.Audio(speech[0].numpy(), type="audio/wav") | |
iface = gr.Interface(fn=text_to_speech, inputs="text", outputs="audio") | |
iface.launch() | |