File size: 2,494 Bytes
681fa4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d84e74
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from tts_infer.tts import TextToMel, MelToWav
from tts_infer.transliterate import XlitEngine
from tts_infer.num_to_word_on_sent import normalize_nums

import re
import numpy as np
from scipy.io.wavfile import write

from mosestokenizer import *
from indicnlp.tokenize import sentence_tokenize
import gradio as gr


INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]

def split_sentences(paragraph, language):
    if language == "en":
        with MosesSentenceSplitter(language) as splitter:
            return splitter([paragraph])
    elif language in INDIC:
        return sentence_tokenize.sentence_split(paragraph, lang=language)


device='cpu'
text_to_mel = TextToMel(glow_model_dir='vakyansh-tts/tts_infer/odia/glow', device=device)
mel_to_wav = MelToWav(hifi_model_dir='vakyansh-tts/tts_infer/odia/hifi', device=device)


def run_tts(text, lang):
    final_text = text
    mel = text_to_mel.generate_mel(final_text)
    audio, sr = mel_to_wav.generate_wav(mel)
    write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
    return (sr, audio)

def run_tts_paragraph(text, lang):
    audio_list = []
    split_sentences_list = split_sentences(text, language='hi')

    for sent in split_sentences_list:
        sr, audio = run_tts(sent, lang)
        audio_list.append(audio)

    concatenated_audio = np.concatenate([i for i in audio_list])
    write(filename='temp_long.wav', rate=sr, data=concatenated_audio)
    return (sr, concatenated_audio)



_, audio = run_tts("ଆମେ ଦୁଖିତ, ଆପଣଙ୍କର ଚିନ୍ତାଧାରାକୁ ସମାଧାନ କରିବାରେ ଅସମର୍ଥ, ଆମେ ଆପଣଙ୍କ ସହ ଯୋଗାଯୋଗ କରିବାକୁ ୱାର୍କସପ୍ଦ ଦଳକୁ କହିବୁ, ତୁମର ଦିନ ଶୁଭମୟ ହଉ.", "or")


options = ["Odia"]

newOptions = ["Male","Female"]
language = gr.Dropdown(options,label="Select language") 
gender =  gr.Dropdown(newOptions,label="Select Voice")
input = gr.Textbox(
            label="Input from model will appear here:",
            lines=5
        )
output = gr.Audio(label="Output from model will appear here:", type="filepath") 

gr.Interface(run_tts, inputs = [input,language],  outputs=output,
             streaming=True, interactive=True,
             analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);