waysolong commited on
Commit
77efc8b
·
1 Parent(s): a4d821b
Files changed (2) hide show
  1. app.py +28 -14
  2. model.py +2 -20
app.py CHANGED
@@ -20,13 +20,14 @@
20
  # https://gradio.app/docs/#dropdown
21
 
22
  import logging
23
- import os
24
  import time
25
 
26
  import gradio as gr
27
  import yaml
28
  from model import get_pretrained_model, language_to_models
29
-
 
30
  title = "# Text-to-speech (TTS)"
31
 
32
  description = """
@@ -74,16 +75,9 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
74
  logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
75
  sid = int(sid)
76
 
77
- config = "examples/biaobei/config.yaml"
78
- checkpoint = "checkpoints/checkpoint_140000.pth.tar"
79
- if os.path.exists(config):
80
- print("file cunzai ")
81
- else:
82
- print("12")
83
- with open(config) as f:
84
- config = yaml.safe_load(f)
85
  start = time.time()
86
- dst_file, duration = get_pretrained_model(text,config,checkpoint)
87
  end = time.time()
88
 
89
 
@@ -102,10 +96,30 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
102
 
103
  return dst_file, build_html_output(info)
104
 
105
-
 
 
 
 
106
  demo = gr.Blocks(css=css)
107
-
108
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  with demo:
110
  gr.Markdown(title)
111
  language_choices = list(language_to_models.keys())
 
20
  # https://gradio.app/docs/#dropdown
21
 
22
  import logging
23
+ import os,torch
24
  import time
25
 
26
  import gradio as gr
27
  import yaml
28
  from model import get_pretrained_model, language_to_models
29
+ from mtts.text import TextProcessor
30
+ from mtts.models.fs2_model import FastSpeech2
31
  title = "# Text-to-speech (TTS)"
32
 
33
  description = """
 
75
  logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
76
  sid = int(sid)
77
 
78
+
 
 
 
 
 
 
 
79
  start = time.time()
80
+ dst_file, duration = get_pretrained_model(model,text,config,text_processor,vocoder)
81
  end = time.time()
82
 
83
 
 
96
 
97
  return dst_file, build_html_output(info)
98
 
99
+ def __build_vocoder(config):
100
+ vocoder_name = config['vocoder']['type']
101
+ VocoderClass = eval(vocoder_name)
102
+ model = VocoderClass(config=config['vocoder'][vocoder_name])
103
+ return model
104
  demo = gr.Blocks(css=css)
105
+ config = "examples/biaobei/config.yaml"
106
+ checkpoint = "checkpoints/checkpoint_140000.pth.tar"
107
+ if os.path.exists(config):
108
+ print("file cunzai ")
109
+ else:
110
+ print("12")
111
+ with open(config) as f:
112
+ config = yaml.safe_load(f)
113
+ vocoder = __build_vocoder(config)
114
+ text_processor = TextProcessor(config)
115
+ model = FastSpeech2(config)
116
+ if checkpoint != '':
117
+ print("loading model")
118
+ sd = torch.load(checkpoint, map_location="cpu")
119
+ if 'model' in sd.keys():
120
+ sd = sd['model']
121
+ model.load_state_dict(sd)
122
+ model = model.to("cpu")
123
  with demo:
124
  gr.Markdown(title)
125
  language_choices = list(language_to_models.keys())
model.py CHANGED
@@ -1,8 +1,7 @@
1
  import torch,json,os
2
  from mtts.models.vocoder import *
3
  from scipy.io import wavfile
4
- from mtts.text import TextProcessor
5
- from mtts.models.fs2_model import FastSpeech2
6
  import numpy as np
7
  with open("dict_han_pinyin.json","r",encoding="utf-8") as f:
8
  print("loading")
@@ -32,27 +31,10 @@ def to_int16(wav):
32
  wav = np.clamp(wav, -32767, 32768)
33
  return wav.astype('int16')
34
 
35
- def __build_vocoder(config):
36
- vocoder_name = config['vocoder']['type']
37
- VocoderClass = eval(vocoder_name)
38
- model = VocoderClass(config=config['vocoder'][vocoder_name])
39
- return model
40
 
41
- def get_pretrained_model(line,config,checkpoint):
42
-
43
 
 
44
  sr = config['fbank']['sample_rate']
45
- vocoder = __build_vocoder(config)
46
- text_processor = TextProcessor(config)
47
- model = FastSpeech2(config)
48
- if checkpoint != '':
49
- sd = torch.load(checkpoint, map_location="cpu")
50
- if 'model' in sd.keys():
51
- sd = sd['model']
52
- model.load_state_dict(sd)
53
- model = model.to("cpu")
54
- torch.set_grad_enabled(False)
55
-
56
  pinyin = ""
57
  hanzi = ""
58
  for i in line:
 
1
  import torch,json,os
2
  from mtts.models.vocoder import *
3
  from scipy.io import wavfile
4
+
 
5
  import numpy as np
6
  with open("dict_han_pinyin.json","r",encoding="utf-8") as f:
7
  print("loading")
 
31
  wav = np.clamp(wav, -32767, 32768)
32
  return wav.astype('int16')
33
 
 
 
 
 
 
34
 
 
 
35
 
36
+ def get_pretrained_model(model,line,config,text_processor,vocoder):
37
  sr = config['fbank']['sample_rate']
 
 
 
 
 
 
 
 
 
 
 
38
  pinyin = ""
39
  hanzi = ""
40
  for i in line: