text-to-speech / app.py
vishal2002's picture
Create app.py
1d6c4be verified
raw
history blame
644 Bytes
import gradio as gr
from transformers import Text2SpeechForConditionalGeneration, Text2SpeechTokenizer
# Load TTS model and tokenizer
model_name = "facebook/wav2vec2-base-960h"
tts_model = Text2SpeechForConditionalGeneration.from_pretrained(model_name)
tokenizer = Text2SpeechTokenizer.from_pretrained(model_name)
def text_to_speech(text):
inputs = tokenizer(text, return_tensors="pt", clean_up_tokenization_spaces=True)
with gr.Output() as out:
speech = tts_model.generate(**inputs)
gr.Audio(speech[0].numpy(), type="audio/wav")
iface = gr.Interface(fn=text_to_speech, inputs="text", outputs="audio")
iface.launch()