Spaces:
Sleeping
Sleeping
waysolong
commited on
Commit
·
77efc8b
1
Parent(s):
a4d821b
opt time
Browse files
app.py
CHANGED
@@ -20,13 +20,14 @@
|
|
20 |
# https://gradio.app/docs/#dropdown
|
21 |
|
22 |
import logging
|
23 |
-
import os
|
24 |
import time
|
25 |
|
26 |
import gradio as gr
|
27 |
import yaml
|
28 |
from model import get_pretrained_model, language_to_models
|
29 |
-
|
|
|
30 |
title = "# Text-to-speech (TTS)"
|
31 |
|
32 |
description = """
|
@@ -74,16 +75,9 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
|
|
74 |
logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
|
75 |
sid = int(sid)
|
76 |
|
77 |
-
|
78 |
-
checkpoint = "checkpoints/checkpoint_140000.pth.tar"
|
79 |
-
if os.path.exists(config):
|
80 |
-
print("file cunzai ")
|
81 |
-
else:
|
82 |
-
print("12")
|
83 |
-
with open(config) as f:
|
84 |
-
config = yaml.safe_load(f)
|
85 |
start = time.time()
|
86 |
-
dst_file, duration = get_pretrained_model(text,config,
|
87 |
end = time.time()
|
88 |
|
89 |
|
@@ -102,10 +96,30 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
|
|
102 |
|
103 |
return dst_file, build_html_output(info)
|
104 |
|
105 |
-
|
|
|
|
|
|
|
|
|
106 |
demo = gr.Blocks(css=css)
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
with demo:
|
110 |
gr.Markdown(title)
|
111 |
language_choices = list(language_to_models.keys())
|
|
|
20 |
# https://gradio.app/docs/#dropdown
|
21 |
|
22 |
import logging
|
23 |
+
import os,torch
|
24 |
import time
|
25 |
|
26 |
import gradio as gr
|
27 |
import yaml
|
28 |
from model import get_pretrained_model, language_to_models
|
29 |
+
from mtts.text import TextProcessor
|
30 |
+
from mtts.models.fs2_model import FastSpeech2
|
31 |
title = "# Text-to-speech (TTS)"
|
32 |
|
33 |
description = """
|
|
|
75 |
logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
|
76 |
sid = int(sid)
|
77 |
|
78 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
start = time.time()
|
80 |
+
dst_file, duration = get_pretrained_model(model,text,config,text_processor,vocoder)
|
81 |
end = time.time()
|
82 |
|
83 |
|
|
|
96 |
|
97 |
return dst_file, build_html_output(info)
|
98 |
|
99 |
+
def __build_vocoder(config):
|
100 |
+
vocoder_name = config['vocoder']['type']
|
101 |
+
VocoderClass = eval(vocoder_name)
|
102 |
+
model = VocoderClass(config=config['vocoder'][vocoder_name])
|
103 |
+
return model
|
104 |
demo = gr.Blocks(css=css)
|
105 |
+
config = "examples/biaobei/config.yaml"
|
106 |
+
checkpoint = "checkpoints/checkpoint_140000.pth.tar"
|
107 |
+
if os.path.exists(config):
|
108 |
+
print("file cunzai ")
|
109 |
+
else:
|
110 |
+
print("12")
|
111 |
+
with open(config) as f:
|
112 |
+
config = yaml.safe_load(f)
|
113 |
+
vocoder = __build_vocoder(config)
|
114 |
+
text_processor = TextProcessor(config)
|
115 |
+
model = FastSpeech2(config)
|
116 |
+
if checkpoint != '':
|
117 |
+
print("loading model")
|
118 |
+
sd = torch.load(checkpoint, map_location="cpu")
|
119 |
+
if 'model' in sd.keys():
|
120 |
+
sd = sd['model']
|
121 |
+
model.load_state_dict(sd)
|
122 |
+
model = model.to("cpu")
|
123 |
with demo:
|
124 |
gr.Markdown(title)
|
125 |
language_choices = list(language_to_models.keys())
|
model.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
import torch,json,os
|
2 |
from mtts.models.vocoder import *
|
3 |
from scipy.io import wavfile
|
4 |
-
|
5 |
-
from mtts.models.fs2_model import FastSpeech2
|
6 |
import numpy as np
|
7 |
with open("dict_han_pinyin.json","r",encoding="utf-8") as f:
|
8 |
print("loading")
|
@@ -32,27 +31,10 @@ def to_int16(wav):
|
|
32 |
wav = np.clamp(wav, -32767, 32768)
|
33 |
return wav.astype('int16')
|
34 |
|
35 |
-
def __build_vocoder(config):
|
36 |
-
vocoder_name = config['vocoder']['type']
|
37 |
-
VocoderClass = eval(vocoder_name)
|
38 |
-
model = VocoderClass(config=config['vocoder'][vocoder_name])
|
39 |
-
return model
|
40 |
|
41 |
-
def get_pretrained_model(line,config,checkpoint):
|
42 |
-
|
43 |
|
|
|
44 |
sr = config['fbank']['sample_rate']
|
45 |
-
vocoder = __build_vocoder(config)
|
46 |
-
text_processor = TextProcessor(config)
|
47 |
-
model = FastSpeech2(config)
|
48 |
-
if checkpoint != '':
|
49 |
-
sd = torch.load(checkpoint, map_location="cpu")
|
50 |
-
if 'model' in sd.keys():
|
51 |
-
sd = sd['model']
|
52 |
-
model.load_state_dict(sd)
|
53 |
-
model = model.to("cpu")
|
54 |
-
torch.set_grad_enabled(False)
|
55 |
-
|
56 |
pinyin = ""
|
57 |
hanzi = ""
|
58 |
for i in line:
|
|
|
1 |
import torch,json,os
|
2 |
from mtts.models.vocoder import *
|
3 |
from scipy.io import wavfile
|
4 |
+
|
|
|
5 |
import numpy as np
|
6 |
with open("dict_han_pinyin.json","r",encoding="utf-8") as f:
|
7 |
print("loading")
|
|
|
31 |
wav = np.clamp(wav, -32767, 32768)
|
32 |
return wav.astype('int16')
|
33 |
|
|
|
|
|
|
|
|
|
|
|
34 |
|
|
|
|
|
35 |
|
36 |
+
def get_pretrained_model(model,line,config,text_processor,vocoder):
|
37 |
sr = config['fbank']['sample_rate']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
pinyin = ""
|
39 |
hanzi = ""
|
40 |
for i in line:
|