File size: 1,688 Bytes
b729e27 d4af064 b729e27 d4af064 9a27c21 d4af064 b729e27 d4af064 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
import torch
from transformers import VitsModel, AutoTokenizer
# 1. Load the model (Nastaliq-based) and tokenizer
# This checkpoint is intended for Urdu text in its traditional (Nastaliq) script.
model_name = "facebook/mms-tts-urd"
model = VitsModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 2. Define the inference function
def generate_urdu_speech(urdu_text):
# Tokenize the input text
inputs = tokenizer(urdu_text, return_tensors="pt")
# Perform inference with the model
with torch.no_grad():
output = model(**inputs).waveform
# Convert PyTorch tensor to NumPy array
waveform = output.squeeze().cpu().numpy()
sample_rate = model.config.sampling_rate
# Gradio’s Audio component expects (sample_rate, audio_data)
return (sample_rate, waveform)
# 3. Build the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("""
# Urdu TTS Demo (Nastaliq Script)
Enter text in Urdu (Nastaliq) script, and this demo will synthesize speech using the Facebook MMS TTS model for Urdu.
""")
# Text input for Urdu (Nastaliq)
text_input = gr.Textbox(
label="Enter Urdu text",
placeholder="مثال کے طور پر...",
lines=3
)
# Audio output
audio_output = gr.Audio(label="Generated Urdu Speech", type="numpy")
# Generate button
generate_button = gr.Button("Generate Speech")
# Wire up the button to the function
generate_button.click(
fn=generate_urdu_speech,
inputs=text_input,
outputs=audio_output
)
# 4. Launch the Gradio app
if __name__ == "__main__":
demo.launch()
|