import gradio as gr from transformers import AutoModelForSpeechSeq2Seq, AutoTokenizer, pipeline tokenizer = AutoTokenizer.from_pretrained("MarcNg/fastspeech2-vi-infore") model = AutoModelForSpeechSeq2Seq.from_pretrained("MarcNg/fastspeech2-vi-infore") tts_pipeline = pipeline("text-to-speech", model=model, tokenizer=tokenizer) def text_to_speech(text): output = tts_pipeline(text) return output["audio"].numpy() iface = gr.Interface( fn=text_to_speech, inputs="text", outputs="audio", title="Vietnamese Text-to-Speech", description="Enter Vietnamese text to convert to speech using FastSpeech 2 model." ) iface.launch()