File size: 2,494 Bytes
681fa4c 9d84e74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from tts_infer.tts import TextToMel, MelToWav
from tts_infer.transliterate import XlitEngine
from tts_infer.num_to_word_on_sent import normalize_nums
import re
import numpy as np
from scipy.io.wavfile import write
from mosestokenizer import *
from indicnlp.tokenize import sentence_tokenize
import gradio as gr
INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
def split_sentences(paragraph, language):
if language == "en":
with MosesSentenceSplitter(language) as splitter:
return splitter([paragraph])
elif language in INDIC:
return sentence_tokenize.sentence_split(paragraph, lang=language)
device='cpu'
text_to_mel = TextToMel(glow_model_dir='vakyansh-tts/tts_infer/odia/glow', device=device)
mel_to_wav = MelToWav(hifi_model_dir='vakyansh-tts/tts_infer/odia/hifi', device=device)
def run_tts(text, lang):
final_text = text
mel = text_to_mel.generate_mel(final_text)
audio, sr = mel_to_wav.generate_wav(mel)
write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
return (sr, audio)
def run_tts_paragraph(text, lang):
audio_list = []
split_sentences_list = split_sentences(text, language='hi')
for sent in split_sentences_list:
sr, audio = run_tts(sent, lang)
audio_list.append(audio)
concatenated_audio = np.concatenate([i for i in audio_list])
write(filename='temp_long.wav', rate=sr, data=concatenated_audio)
return (sr, concatenated_audio)
_, audio = run_tts("ଆମେ ଦୁଖିତ, ଆପଣଙ୍କର ଚିନ୍ତାଧାରାକୁ ସମାଧାନ କରିବାରେ ଅସମର୍ଥ, ଆମେ ଆପଣଙ୍କ ସହ ଯୋଗାଯୋଗ କରିବାକୁ ୱାର୍କସପ୍ଦ ଦଳକୁ କହିବୁ, ତୁମର ଦିନ ଶୁଭମୟ ହଉ.", "or")
options = ["Odia"]
newOptions = ["Male","Female"]
language = gr.Dropdown(options,label="Select language")
gender = gr.Dropdown(newOptions,label="Select Voice")
input = gr.Textbox(
label="Input from model will appear here:",
lines=5
)
output = gr.Audio(label="Output from model will appear here:", type="filepath")
gr.Interface(run_tts, inputs = [input,language], outputs=output,
streaming=True, interactive=True,
analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False); |