Spaces:
Sleeping
Sleeping
File size: 1,562 Bytes
786f6ac 5de81a9 786f6ac d16456b 5de81a9 786f6ac 5de81a9 786f6ac 5de81a9 786f6ac 5de81a9 786f6ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import scipy.io.wavfile
import numpy as np
# Load the MMS-TTS model and processor for Tibetan (bod)
model_id = "openpecha/mms-tts-sherab"
# Use the text-to-speech pipeline with the model
synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU
def replace_numbers_with_convert(sentence, wylie=True):
pattern = r'\d+(\.\d+)?'
def replace(match):
return convert(match.group(), wylie)
result = re.sub(pattern, replace, sentence)
return result
def num2letter(sentence):
tibetan_nums = "༠༡༢༣༤༥༦༧༨༩"
for i, n in enumerate(tibetan_nums):
sentence = sentence.replace(n, str(i))
result = replace_numbers_with_convert(sentence, wylie=False)
return result
# Function to perform TTS inference and save audio to a file
def generate_audio(input_text):
# preprocess
text = num2letter(text)
# Perform TTS inference
speech = synthesiser(input_text)
# postprocess
audio = noisereduce.reduce_noise(y=speech["audio"], sr=speech["sampling_rate"])
return audio, speech["sampling_rate"]
# Create the Gradio interface
iface = gr.Interface(
fn=generate_audio,
inputs="text", # Text input for the TTS
outputs="audio", # Output will be an audio file
title="Tibetan Text-to-Speech (MMS-TTS) Sherab",
description="Enter Tibetan text and generate speech using MMS-TTS."
)
# Launch the Gradio interface
iface.launch()
|