Spaces:

ganga4364
/

mms-tts-bod

Running

ganga4364 commited on Oct 8, 2024

Commit

fdb2ada

verified ·

1 Parent(s): 90e01e6

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
+import torch
+import scipy.io.wavfile
+from io import BytesIO
+# Load the processor and model for TTS
+model_id = "ganga4364/mms-tts-bod-finetune-sherab"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
+# Function to generate audio from input text and save it to a file
+def generate_audio(input_text):
+    # Process the text input
+    inputs = processor(text=input_text, return_tensors="pt")
+    # Generate the speech using the model
+    with torch.no_grad():
+        outputs = model.generate(**inputs)
+    # Decode the output to obtain the waveform
+    audio_waveform = processor.decode(outputs[0], output_type="np")
+    # Save the audio to a file (e.g., 'output.wav')
+    file_path = "output.wav"
+    scipy.io.wavfile.write(file_path, rate=16000, data=audio_waveform)  # Assuming 16kHz sample rate
+    # Return the file path so Gradio can return the audio file
+    return file_path
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=generate_audio,
+    inputs="text",
+    outputs="audio",  # Output should be the audio file
+    title="Text-to-Speech Tibetan Model",
+    description="Enter text to generate speech with a fine-tuned Tibetan voice and return the audio."
+)
+# Launch the Gradio app
+iface.launch()