Spaces:
Sleeping
Sleeping
waysolong
commited on
Commit
·
f5b630a
1
Parent(s):
2619420
fix bug
Browse files
app.py
CHANGED
@@ -22,11 +22,9 @@
|
|
22 |
import logging
|
23 |
import os
|
24 |
import time
|
25 |
-
import uuid
|
26 |
|
27 |
import gradio as gr
|
28 |
-
import
|
29 |
-
|
30 |
from model import get_pretrained_model, language_to_models
|
31 |
|
32 |
title = "# Text-to-speech (TTS)"
|
@@ -76,9 +74,16 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
|
|
76 |
logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
|
77 |
sid = int(sid)
|
78 |
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
start = time.time()
|
81 |
-
dst_file,duration = get_pretrained_model(text)
|
82 |
end = time.time()
|
83 |
|
84 |
|
@@ -189,18 +194,9 @@ with demo:
|
|
189 |
gr.Markdown(description)
|
190 |
|
191 |
|
192 |
-
def download_espeak_ng_data():
|
193 |
-
os.system(
|
194 |
-
"""
|
195 |
-
cd /tmp
|
196 |
-
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
|
197 |
-
tar xf espeak-ng-data.tar.bz2
|
198 |
-
"""
|
199 |
-
)
|
200 |
-
|
201 |
|
202 |
if __name__ == "__main__":
|
203 |
-
|
204 |
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
205 |
|
206 |
logging.basicConfig(format=formatter, level=logging.INFO)
|
|
|
22 |
import logging
|
23 |
import os
|
24 |
import time
|
|
|
25 |
|
26 |
import gradio as gr
|
27 |
+
import yaml
|
|
|
28 |
from model import get_pretrained_model, language_to_models
|
29 |
|
30 |
title = "# Text-to-speech (TTS)"
|
|
|
74 |
logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
|
75 |
sid = int(sid)
|
76 |
|
77 |
+
config = "examples/biaobei/config.yaml"
|
78 |
+
checkpoint = "checkpoints\checkpoint_140000.pth.tar"
|
79 |
+
if os.path.exists(config):
|
80 |
+
print("file cunzai ")
|
81 |
+
else:
|
82 |
+
print("12")
|
83 |
+
with open(config) as f:
|
84 |
+
config = yaml.safe_load(f)
|
85 |
start = time.time()
|
86 |
+
dst_file, duration = get_pretrained_model(text,config,checkpoint)
|
87 |
end = time.time()
|
88 |
|
89 |
|
|
|
194 |
gr.Markdown(description)
|
195 |
|
196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
if __name__ == "__main__":
|
199 |
+
|
200 |
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
201 |
|
202 |
logging.basicConfig(format=formatter, level=logging.INFO)
|
model.py
CHANGED
@@ -1,12 +1,20 @@
|
|
1 |
-
from functools import lru_cache
|
2 |
import torch,json,os
|
3 |
-
|
4 |
from scipy.io import wavfile
|
5 |
from mtts.text import TextProcessor
|
6 |
from mtts.models.fs2_model import FastSpeech2
|
7 |
import numpy as np
|
8 |
with open("dict_han_pinyin.json","r",encoding="utf-8") as f:
|
|
|
9 |
data_dict = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def normalize(wav):
|
11 |
assert wav.dtype == np.float32
|
12 |
eps = 1e-6
|
@@ -23,17 +31,15 @@ def to_int16(wav):
|
|
23 |
wav = wav = wav * 32767
|
24 |
wav = np.clamp(wav, -32767, 32768)
|
25 |
return wav.astype('int16')
|
|
|
26 |
def __build_vocoder(config):
|
27 |
vocoder_name = config['vocoder']['type']
|
28 |
VocoderClass = eval(vocoder_name)
|
29 |
model = VocoderClass(config=config['vocoder'][vocoder_name])
|
30 |
return model
|
31 |
-
|
32 |
-
def get_pretrained_model(line):
|
33 |
-
|
34 |
-
checkpoint = "checkpoints\checkpoint_140000.pth.tar"
|
35 |
-
with open(config) as f:
|
36 |
-
config = yaml.safe_load(f)
|
37 |
|
38 |
sr = config['fbank']['sample_rate']
|
39 |
vocoder = __build_vocoder(config)
|
@@ -44,7 +50,6 @@ def get_pretrained_model(line):
|
|
44 |
if 'model' in sd.keys():
|
45 |
sd = sd['model']
|
46 |
model.load_state_dict(sd)
|
47 |
-
del sd # to save mem
|
48 |
model = model.to("cpu")
|
49 |
torch.set_grad_enabled(False)
|
50 |
|
@@ -75,7 +80,3 @@ def get_pretrained_model(line):
|
|
75 |
#np.save(dst_file+'.npy',mel_postnet.cpu().numpy())
|
76 |
wavfile.write(dst_file, sr, wav)
|
77 |
return dst_file,2.0
|
78 |
-
chinese_models = {
|
79 |
-
"csukuangfj/vits-piper-zh_CN-huayan-medium": 1}
|
80 |
-
language_to_models = {
|
81 |
-
"Chinese (Mandarin, 普通话)": list(chinese_models.keys())}
|
|
|
|
|
1 |
import torch,json,os
|
2 |
+
|
3 |
from scipy.io import wavfile
|
4 |
from mtts.text import TextProcessor
|
5 |
from mtts.models.fs2_model import FastSpeech2
|
6 |
import numpy as np
|
7 |
with open("dict_han_pinyin.json","r",encoding="utf-8") as f:
|
8 |
+
print("loading")
|
9 |
data_dict = json.load(f)
|
10 |
+
|
11 |
+
|
12 |
+
chinese_models = {
|
13 |
+
"csukuangfj/vits-piper-zh_CN-huayan-medium": 1}
|
14 |
+
language_to_models = {
|
15 |
+
"Chinese (Mandarin, 普通话)": list(chinese_models.keys())}
|
16 |
+
|
17 |
+
|
18 |
def normalize(wav):
|
19 |
assert wav.dtype == np.float32
|
20 |
eps = 1e-6
|
|
|
31 |
wav = wav = wav * 32767
|
32 |
wav = np.clamp(wav, -32767, 32768)
|
33 |
return wav.astype('int16')
|
34 |
+
|
35 |
def __build_vocoder(config):
|
36 |
vocoder_name = config['vocoder']['type']
|
37 |
VocoderClass = eval(vocoder_name)
|
38 |
model = VocoderClass(config=config['vocoder'][vocoder_name])
|
39 |
return model
|
40 |
+
|
41 |
+
def get_pretrained_model(line,config,checkpoint):
|
42 |
+
|
|
|
|
|
|
|
43 |
|
44 |
sr = config['fbank']['sample_rate']
|
45 |
vocoder = __build_vocoder(config)
|
|
|
50 |
if 'model' in sd.keys():
|
51 |
sd = sd['model']
|
52 |
model.load_state_dict(sd)
|
|
|
53 |
model = model.to("cpu")
|
54 |
torch.set_grad_enabled(False)
|
55 |
|
|
|
80 |
#np.save(dst_file+'.npy',mel_postnet.cpu().numpy())
|
81 |
wavfile.write(dst_file, sr, wav)
|
82 |
return dst_file,2.0
|
|
|
|
|
|
|
|