Urdu-TTS / app.py
Mudassir-75's picture
Update app.py
d4af064 verified
import gradio as gr
import torch
from transformers import VitsModel, AutoTokenizer
# 1. Load the model (Nastaliq-based) and tokenizer
# This checkpoint is intended for Urdu text in its traditional (Nastaliq) script.
model_name = "facebook/mms-tts-urd"
model = VitsModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 2. Define the inference function
def generate_urdu_speech(urdu_text):
# Tokenize the input text
inputs = tokenizer(urdu_text, return_tensors="pt")
# Perform inference with the model
with torch.no_grad():
output = model(**inputs).waveform
# Convert PyTorch tensor to NumPy array
waveform = output.squeeze().cpu().numpy()
sample_rate = model.config.sampling_rate
# Gradio’s Audio component expects (sample_rate, audio_data)
return (sample_rate, waveform)
# 3. Build the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("""
# Urdu TTS Demo (Nastaliq Script)
Enter text in Urdu (Nastaliq) script, and this demo will synthesize speech using the Facebook MMS TTS model for Urdu.
""")
# Text input for Urdu (Nastaliq)
text_input = gr.Textbox(
label="Enter Urdu text",
placeholder="مثال کے طور پر...",
lines=3
)
# Audio output
audio_output = gr.Audio(label="Generated Urdu Speech", type="numpy")
# Generate button
generate_button = gr.Button("Generate Speech")
# Wire up the button to the function
generate_button.click(
fn=generate_urdu_speech,
inputs=text_input,
outputs=audio_output
)
# 4. Launch the Gradio app
if __name__ == "__main__":
demo.launch()