BishanSingh246
commited on
Commit
•
681fa4c
1
Parent(s):
9d84e74
Add application file
Browse files- .vscode/settings.json +4 -4
- app.py +65 -65
.vscode/settings.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
{
|
2 |
-
"python.analysis.extraPaths": [
|
3 |
-
"./vakyansh-tts"
|
4 |
-
]
|
5 |
}
|
|
|
1 |
+
{
|
2 |
+
"python.analysis.extraPaths": [
|
3 |
+
"./vakyansh-tts"
|
4 |
+
]
|
5 |
}
|
app.py
CHANGED
@@ -1,66 +1,66 @@
|
|
1 |
-
from tts_infer.tts import TextToMel, MelToWav
|
2 |
-
from tts_infer.transliterate import XlitEngine
|
3 |
-
from tts_infer.num_to_word_on_sent import normalize_nums
|
4 |
-
|
5 |
-
import re
|
6 |
-
import numpy as np
|
7 |
-
from scipy.io.wavfile import write
|
8 |
-
|
9 |
-
from mosestokenizer import *
|
10 |
-
from indicnlp.tokenize import sentence_tokenize
|
11 |
-
import gradio as gr
|
12 |
-
|
13 |
-
|
14 |
-
INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
|
15 |
-
|
16 |
-
def split_sentences(paragraph, language):
|
17 |
-
if language == "en":
|
18 |
-
with MosesSentenceSplitter(language) as splitter:
|
19 |
-
return splitter([paragraph])
|
20 |
-
elif language in INDIC:
|
21 |
-
return sentence_tokenize.sentence_split(paragraph, lang=language)
|
22 |
-
|
23 |
-
|
24 |
-
device='cpu'
|
25 |
-
text_to_mel = TextToMel(glow_model_dir='vakyansh-tts/tts_infer/odia/glow', device=device)
|
26 |
-
mel_to_wav = MelToWav(hifi_model_dir='vakyansh-tts/tts_infer/odia/hifi', device=device)
|
27 |
-
|
28 |
-
|
29 |
-
def run_tts(text, lang):
|
30 |
-
final_text = text
|
31 |
-
mel = text_to_mel.generate_mel(final_text)
|
32 |
-
audio, sr = mel_to_wav.generate_wav(mel)
|
33 |
-
write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
|
34 |
-
return (sr, audio)
|
35 |
-
|
36 |
-
def run_tts_paragraph(text, lang):
|
37 |
-
audio_list = []
|
38 |
-
split_sentences_list = split_sentences(text, language='hi')
|
39 |
-
|
40 |
-
for sent in split_sentences_list:
|
41 |
-
sr, audio = run_tts(sent, lang)
|
42 |
-
audio_list.append(audio)
|
43 |
-
|
44 |
-
concatenated_audio = np.concatenate([i for i in audio_list])
|
45 |
-
write(filename='temp_long.wav', rate=sr, data=concatenated_audio)
|
46 |
-
return (sr, concatenated_audio)
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
_, audio = run_tts("ଆମେ ଦୁଖିତ, ଆପଣଙ୍କର ଚିନ୍ତାଧାରାକୁ ସମାଧାନ କରିବାରେ ଅସମର୍ଥ, ଆମେ ଆପଣଙ୍କ ସହ ଯୋଗାଯୋଗ କରିବାକୁ ୱାର୍କସପ୍ଦ ଦଳକୁ କହିବୁ, ତୁମର ଦିନ ଶୁଭମୟ ହଉ.", "or")
|
51 |
-
|
52 |
-
|
53 |
-
options = ["Odia"]
|
54 |
-
|
55 |
-
newOptions = ["Male","Female"]
|
56 |
-
language = gr.Dropdown(options,label="Select language")
|
57 |
-
gender = gr.Dropdown(newOptions,label="Select Voice")
|
58 |
-
input = gr.Textbox(
|
59 |
-
label="Input from model will appear here:",
|
60 |
-
lines=5
|
61 |
-
)
|
62 |
-
output = gr.Audio(label="Output from model will appear here:", type="filepath")
|
63 |
-
|
64 |
-
gr.Interface(run_tts, inputs = [input,language], outputs=output,
|
65 |
-
streaming=True, interactive=True,
|
66 |
analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);
|
|
|
1 |
+
from tts_infer.tts import TextToMel, MelToWav
|
2 |
+
from tts_infer.transliterate import XlitEngine
|
3 |
+
from tts_infer.num_to_word_on_sent import normalize_nums
|
4 |
+
|
5 |
+
import re
|
6 |
+
import numpy as np
|
7 |
+
from scipy.io.wavfile import write
|
8 |
+
|
9 |
+
from mosestokenizer import *
|
10 |
+
from indicnlp.tokenize import sentence_tokenize
|
11 |
+
import gradio as gr
|
12 |
+
|
13 |
+
|
14 |
+
INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
|
15 |
+
|
16 |
+
def split_sentences(paragraph, language):
|
17 |
+
if language == "en":
|
18 |
+
with MosesSentenceSplitter(language) as splitter:
|
19 |
+
return splitter([paragraph])
|
20 |
+
elif language in INDIC:
|
21 |
+
return sentence_tokenize.sentence_split(paragraph, lang=language)
|
22 |
+
|
23 |
+
|
24 |
+
device='cpu'
|
25 |
+
text_to_mel = TextToMel(glow_model_dir='vakyansh-tts/tts_infer/odia/glow', device=device)
|
26 |
+
mel_to_wav = MelToWav(hifi_model_dir='vakyansh-tts/tts_infer/odia/hifi', device=device)
|
27 |
+
|
28 |
+
|
29 |
+
def run_tts(text, lang):
|
30 |
+
final_text = text
|
31 |
+
mel = text_to_mel.generate_mel(final_text)
|
32 |
+
audio, sr = mel_to_wav.generate_wav(mel)
|
33 |
+
write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
|
34 |
+
return (sr, audio)
|
35 |
+
|
36 |
+
def run_tts_paragraph(text, lang):
|
37 |
+
audio_list = []
|
38 |
+
split_sentences_list = split_sentences(text, language='hi')
|
39 |
+
|
40 |
+
for sent in split_sentences_list:
|
41 |
+
sr, audio = run_tts(sent, lang)
|
42 |
+
audio_list.append(audio)
|
43 |
+
|
44 |
+
concatenated_audio = np.concatenate([i for i in audio_list])
|
45 |
+
write(filename='temp_long.wav', rate=sr, data=concatenated_audio)
|
46 |
+
return (sr, concatenated_audio)
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
_, audio = run_tts("ଆମେ ଦୁଖିତ, ଆପଣଙ୍କର ଚିନ୍ତାଧାରାକୁ ସମାଧାନ କରିବାରେ ଅସମର୍ଥ, ଆମେ ଆପଣଙ୍କ ସହ ଯୋଗାଯୋଗ କରିବାକୁ ୱାର୍କସପ୍ଦ ଦଳକୁ କହିବୁ, ତୁମର ଦିନ ଶୁଭମୟ ହଉ.", "or")
|
51 |
+
|
52 |
+
|
53 |
+
options = ["Odia"]
|
54 |
+
|
55 |
+
newOptions = ["Male","Female"]
|
56 |
+
language = gr.Dropdown(options,label="Select language")
|
57 |
+
gender = gr.Dropdown(newOptions,label="Select Voice")
|
58 |
+
input = gr.Textbox(
|
59 |
+
label="Input from model will appear here:",
|
60 |
+
lines=5
|
61 |
+
)
|
62 |
+
output = gr.Audio(label="Output from model will appear here:", type="filepath")
|
63 |
+
|
64 |
+
gr.Interface(run_tts, inputs = [input,language], outputs=output,
|
65 |
+
streaming=True, interactive=True,
|
66 |
analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);
|