BishanSingh246 commited on
Commit
681fa4c
1 Parent(s): 9d84e74

Add application file

Browse files
Files changed (2) hide show
  1. .vscode/settings.json +4 -4
  2. app.py +65 -65
.vscode/settings.json CHANGED
@@ -1,5 +1,5 @@
1
- {
2
- "python.analysis.extraPaths": [
3
- "./vakyansh-tts"
4
- ]
5
  }
 
1
+ {
2
+ "python.analysis.extraPaths": [
3
+ "./vakyansh-tts"
4
+ ]
5
  }
app.py CHANGED
@@ -1,66 +1,66 @@
1
- from tts_infer.tts import TextToMel, MelToWav
2
- from tts_infer.transliterate import XlitEngine
3
- from tts_infer.num_to_word_on_sent import normalize_nums
4
-
5
- import re
6
- import numpy as np
7
- from scipy.io.wavfile import write
8
-
9
- from mosestokenizer import *
10
- from indicnlp.tokenize import sentence_tokenize
11
- import gradio as gr
12
-
13
-
14
- INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
15
-
16
- def split_sentences(paragraph, language):
17
- if language == "en":
18
- with MosesSentenceSplitter(language) as splitter:
19
- return splitter([paragraph])
20
- elif language in INDIC:
21
- return sentence_tokenize.sentence_split(paragraph, lang=language)
22
-
23
-
24
- device='cpu'
25
- text_to_mel = TextToMel(glow_model_dir='vakyansh-tts/tts_infer/odia/glow', device=device)
26
- mel_to_wav = MelToWav(hifi_model_dir='vakyansh-tts/tts_infer/odia/hifi', device=device)
27
-
28
-
29
- def run_tts(text, lang):
30
- final_text = text
31
- mel = text_to_mel.generate_mel(final_text)
32
- audio, sr = mel_to_wav.generate_wav(mel)
33
- write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
34
- return (sr, audio)
35
-
36
- def run_tts_paragraph(text, lang):
37
- audio_list = []
38
- split_sentences_list = split_sentences(text, language='hi')
39
-
40
- for sent in split_sentences_list:
41
- sr, audio = run_tts(sent, lang)
42
- audio_list.append(audio)
43
-
44
- concatenated_audio = np.concatenate([i for i in audio_list])
45
- write(filename='temp_long.wav', rate=sr, data=concatenated_audio)
46
- return (sr, concatenated_audio)
47
-
48
-
49
-
50
- _, audio = run_tts("ଆମେ ଦୁଖିତ, ଆପଣଙ୍କର ଚିନ୍ତାଧାରାକୁ ସମାଧାନ କରିବାରେ ଅସମର୍ଥ, ଆମେ ଆପଣଙ୍କ ସହ ଯୋଗାଯୋଗ କରିବାକୁ ୱାର୍କସପ୍ଦ ଦଳକୁ କହିବୁ, ତୁମର ଦିନ ଶୁଭମୟ ହଉ.", "or")
51
-
52
-
53
- options = ["Odia"]
54
-
55
- newOptions = ["Male","Female"]
56
- language = gr.Dropdown(options,label="Select language")
57
- gender = gr.Dropdown(newOptions,label="Select Voice")
58
- input = gr.Textbox(
59
- label="Input from model will appear here:",
60
- lines=5
61
- )
62
- output = gr.Audio(label="Output from model will appear here:", type="filepath")
63
-
64
- gr.Interface(run_tts, inputs = [input,language], outputs=output,
65
- streaming=True, interactive=True,
66
  analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);
 
1
+ from tts_infer.tts import TextToMel, MelToWav
2
+ from tts_infer.transliterate import XlitEngine
3
+ from tts_infer.num_to_word_on_sent import normalize_nums
4
+
5
+ import re
6
+ import numpy as np
7
+ from scipy.io.wavfile import write
8
+
9
+ from mosestokenizer import *
10
+ from indicnlp.tokenize import sentence_tokenize
11
+ import gradio as gr
12
+
13
+
14
+ INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
15
+
16
+ def split_sentences(paragraph, language):
17
+ if language == "en":
18
+ with MosesSentenceSplitter(language) as splitter:
19
+ return splitter([paragraph])
20
+ elif language in INDIC:
21
+ return sentence_tokenize.sentence_split(paragraph, lang=language)
22
+
23
+
24
+ device='cpu'
25
+ text_to_mel = TextToMel(glow_model_dir='vakyansh-tts/tts_infer/odia/glow', device=device)
26
+ mel_to_wav = MelToWav(hifi_model_dir='vakyansh-tts/tts_infer/odia/hifi', device=device)
27
+
28
+
29
+ def run_tts(text, lang):
30
+ final_text = text
31
+ mel = text_to_mel.generate_mel(final_text)
32
+ audio, sr = mel_to_wav.generate_wav(mel)
33
+ write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
34
+ return (sr, audio)
35
+
36
+ def run_tts_paragraph(text, lang):
37
+ audio_list = []
38
+ split_sentences_list = split_sentences(text, language='hi')
39
+
40
+ for sent in split_sentences_list:
41
+ sr, audio = run_tts(sent, lang)
42
+ audio_list.append(audio)
43
+
44
+ concatenated_audio = np.concatenate([i for i in audio_list])
45
+ write(filename='temp_long.wav', rate=sr, data=concatenated_audio)
46
+ return (sr, concatenated_audio)
47
+
48
+
49
+
50
+ _, audio = run_tts("ଆମେ ଦୁଖିତ, ଆପଣଙ୍କର ଚିନ୍ତାଧାରାକୁ ସମାଧାନ କରିବାରେ ଅସମର୍ଥ, ଆମେ ଆପଣଙ୍କ ସହ ଯୋଗାଯୋଗ କରିବାକୁ ୱାର୍କସପ୍ଦ ଦଳକୁ କହିବୁ, ତୁମର ଦିନ ଶୁଭମୟ ହଉ.", "or")
51
+
52
+
53
+ options = ["Odia"]
54
+
55
+ newOptions = ["Male","Female"]
56
+ language = gr.Dropdown(options,label="Select language")
57
+ gender = gr.Dropdown(newOptions,label="Select Voice")
58
+ input = gr.Textbox(
59
+ label="Input from model will appear here:",
60
+ lines=5
61
+ )
62
+ output = gr.Audio(label="Output from model will appear here:", type="filepath")
63
+
64
+ gr.Interface(run_tts, inputs = [input,language], outputs=output,
65
+ streaming=True, interactive=True,
66
  analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);