Spaces:

ganga4364
/

mms-tts-bod

Sleeping

File size: 1,177 Bytes

fdb2ada
e6c6652
bfb8ce2
 
72ea965
e6c6652
 
fdb2ada
bfb8ce2
e6c6652
 
bfb8ce2
fdb2ada
e6c6652
 
 
 
 
 
 
aa1eb45
e6c6652
 
fdb2ada
 
 
e6c6652
 
 
 
 
fdb2ada
 
d6262cc
e6c6652

import gradio as gr
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import scipy.io.wavfile
import numpy as np

# Load the MMS-TTS model and processor for Tibetan (bod)
model_id = "ganga4364/mms-tts-bod-female"  # Replace with your fine-tuned model if necessary


# Use the text-to-speech pipeline with the model
synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU


# Function to perform TTS inference and save audio to a file
def generate_audio(input_text):
    # Perform TTS inference
    speech = synthesiser(input_text)
    file_path = "finetuned_output.wav"
    # Save the audio to a file (e.g., 'output.wav')
    scipy.io.wavfile.write(file_path, rate=speech["sampling_rate"], data=speech["audio"][0])

    # Return the path to the audio file
    return file_path

# Create the Gradio interface
iface = gr.Interface(
    fn=generate_audio,
    inputs="text",  # Text input for the TTS
    outputs="audio",  # Output will be an audio file
    title="Tibetan Text-to-Speech (MMS-TTS)",
    description="Enter Tibetan text and generate speech using MMS-TTS."
)

# Launch the Gradio interface
iface.launch()