|
import gradio as gr |
|
import torch |
|
from transformers import VitsModel, AutoTokenizer |
|
|
|
|
|
|
|
model_name = "facebook/mms-tts-urd" |
|
model = VitsModel.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
def generate_urdu_speech(urdu_text): |
|
|
|
inputs = tokenizer(urdu_text, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
output = model(**inputs).waveform |
|
|
|
|
|
waveform = output.squeeze().cpu().numpy() |
|
sample_rate = model.config.sampling_rate |
|
|
|
|
|
return (sample_rate, waveform) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(""" |
|
# Urdu TTS Demo (Nastaliq Script) |
|
|
|
Enter text in Urdu (Nastaliq) script, and this demo will synthesize speech using the Facebook MMS TTS model for Urdu. |
|
""") |
|
|
|
|
|
text_input = gr.Textbox( |
|
label="Enter Urdu text", |
|
placeholder="مثال کے طور پر...", |
|
lines=3 |
|
) |
|
|
|
|
|
audio_output = gr.Audio(label="Generated Urdu Speech", type="numpy") |
|
|
|
|
|
generate_button = gr.Button("Generate Speech") |
|
|
|
|
|
generate_button.click( |
|
fn=generate_urdu_speech, |
|
inputs=text_input, |
|
outputs=audio_output |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|