Change phonemizer to proprietary one
Browse files- .gitignore +11 -1
- app.py +27 -6
- packages.txt +0 -3
.gitignore
CHANGED
@@ -157,4 +157,14 @@ cython_debug/
|
|
157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
-
#.idea/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
|
162 |
+
deletion_token.txt
|
163 |
+
|
164 |
+
.vscode
|
165 |
+
|
166 |
+
en_us
|
167 |
+
input.txt
|
168 |
+
input.xml
|
169 |
+
phn.zip
|
170 |
+
xml_nlp
|
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
from espnet2.bin.tts_inference import Text2Speech
|
2 |
import torch
|
3 |
from parallel_wavegan.utils import download_pretrained_model, load_model
|
4 |
-
from phonemizer import phonemize
|
5 |
-
from phonemizer.separator import Separator
|
6 |
import gradio as gr
|
|
|
|
|
|
|
7 |
|
8 |
-
s = Separator(word=None, phone=" ")
|
9 |
config_path = "config.yaml"
|
10 |
model_path = "model.pth"
|
11 |
|
@@ -14,6 +14,13 @@ vocoder_tag = "ljspeech_parallel_wavegan.v3"
|
|
14 |
vocoder = load_model(download_pretrained_model(vocoder_tag)).to("cpu").eval()
|
15 |
vocoder.remove_weight_norm()
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
global_styles = {
|
18 |
"Style 1": torch.load("style1.pt"),
|
19 |
"Style 2": torch.load("style2.pt"),
|
@@ -24,6 +31,22 @@ global_styles = {
|
|
24 |
}
|
25 |
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
|
28 |
with torch.no_grad():
|
29 |
text2speech = Text2Speech(
|
@@ -44,9 +67,7 @@ def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
|
|
44 |
|
45 |
style_emb = torch.flatten(global_styles[global_style])
|
46 |
|
47 |
-
phoneme_string = phonemize(
|
48 |
-
text, language="mb-us1", backend="espeak-mbrola", separator=s
|
49 |
-
)
|
50 |
phonemes = phoneme_string.split(" ")
|
51 |
|
52 |
max_edit_index = -1
|
|
|
1 |
from espnet2.bin.tts_inference import Text2Speech
|
2 |
import torch
|
3 |
from parallel_wavegan.utils import download_pretrained_model, load_model
|
|
|
|
|
4 |
import gradio as gr
|
5 |
+
import os
|
6 |
+
import subprocess
|
7 |
+
from zipfile import ZipFile
|
8 |
|
|
|
9 |
config_path = "config.yaml"
|
10 |
model_path = "model.pth"
|
11 |
|
|
|
14 |
vocoder = load_model(download_pretrained_model(vocoder_tag)).to("cpu").eval()
|
15 |
vocoder.remove_weight_norm()
|
16 |
|
17 |
+
url = os.environ.get("PHN_URL")
|
18 |
+
subprocess.call(["wget", url, "-q"])
|
19 |
+
|
20 |
+
with ZipFile("phn.zip", "r") as zip_ref:
|
21 |
+
zip_ref.extractall()
|
22 |
+
subprocess.call(["chmod", "+x", "xml_nlp"])
|
23 |
+
|
24 |
global_styles = {
|
25 |
"Style 1": torch.load("style1.pt"),
|
26 |
"Style 2": torch.load("style2.pt"),
|
|
|
31 |
}
|
32 |
|
33 |
|
34 |
+
def phonemize(text):
|
35 |
+
with open("input.txt", "w+") as f:
|
36 |
+
f.write(text)
|
37 |
+
|
38 |
+
with open("input.xml", "w") as f:
|
39 |
+
pass
|
40 |
+
|
41 |
+
subprocess.call(["./xml_nlp", "input", "180", "en_us/enu.ini", "en_us"])
|
42 |
+
|
43 |
+
phoneme_string = ""
|
44 |
+
with open("input.xml", "r") as f:
|
45 |
+
for line in f.readlines():
|
46 |
+
phoneme_string += line.split("[")[-1][:-2]
|
47 |
+
return phoneme_string
|
48 |
+
|
49 |
+
|
50 |
def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
|
51 |
with torch.no_grad():
|
52 |
text2speech = Text2Speech(
|
|
|
67 |
|
68 |
style_emb = torch.flatten(global_styles[global_style])
|
69 |
|
70 |
+
phoneme_string = phonemize(text)
|
|
|
|
|
71 |
phonemes = phoneme_string.split(" ")
|
72 |
|
73 |
max_edit_index = -1
|
packages.txt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
mbrola
|
2 |
-
mbrola-us1
|
3 |
-
espeak-ng
|
|
|
|
|
|
|
|