import gradio as gr import torch from transformers import VitsModel, AutoTokenizer # 1. Load the model (Nastaliq-based) and tokenizer # This checkpoint is intended for Urdu text in its traditional (Nastaliq) script. model_name = "facebook/mms-tts-urd" model = VitsModel.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # 2. Define the inference function def generate_urdu_speech(urdu_text): # Tokenize the input text inputs = tokenizer(urdu_text, return_tensors="pt") # Perform inference with the model with torch.no_grad(): output = model(**inputs).waveform # Convert PyTorch tensor to NumPy array waveform = output.squeeze().cpu().numpy() sample_rate = model.config.sampling_rate # Gradio’s Audio component expects (sample_rate, audio_data) return (sample_rate, waveform) # 3. Build the Gradio interface with gr.Blocks() as demo: gr.Markdown(""" # Urdu TTS Demo (Nastaliq Script) Enter text in Urdu (Nastaliq) script, and this demo will synthesize speech using the Facebook MMS TTS model for Urdu. """) # Text input for Urdu (Nastaliq) text_input = gr.Textbox( label="Enter Urdu text", placeholder="مثال کے طور پر...", lines=3 ) # Audio output audio_output = gr.Audio(label="Generated Urdu Speech", type="numpy") # Generate button generate_button = gr.Button("Generate Speech") # Wire up the button to the function generate_button.click( fn=generate_urdu_speech, inputs=text_input, outputs=audio_output ) # 4. Launch the Gradio app if __name__ == "__main__": demo.launch()