File size: 1,177 Bytes
fdb2ada
e6c6652
bfb8ce2
 
72ea965
e6c6652
 
fdb2ada
bfb8ce2
e6c6652
 
bfb8ce2
fdb2ada
e6c6652
 
 
 
 
 
 
aa1eb45
e6c6652
 
fdb2ada
 
 
e6c6652
 
 
 
 
fdb2ada
 
d6262cc
e6c6652
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import scipy.io.wavfile
import numpy as np

# Load the MMS-TTS model and processor for Tibetan (bod)
model_id = "ganga4364/mms-tts-bod-female"  # Replace with your fine-tuned model if necessary


# Use the text-to-speech pipeline with the model
synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU


# Function to perform TTS inference and save audio to a file
def generate_audio(input_text):
    # Perform TTS inference
    speech = synthesiser(input_text)
    file_path = "finetuned_output.wav"
    # Save the audio to a file (e.g., 'output.wav')
    scipy.io.wavfile.write(file_path, rate=speech["sampling_rate"], data=speech["audio"][0])

    # Return the path to the audio file
    return file_path

# Create the Gradio interface
iface = gr.Interface(
    fn=generate_audio,
    inputs="text",  # Text input for the TTS
    outputs="audio",  # Output will be an audio file
    title="Tibetan Text-to-Speech (MMS-TTS)",
    description="Enter Tibetan text and generate speech using MMS-TTS."
)

# Launch the Gradio interface
iface.launch()