RUSH-miaomi commited on
Commit
6e5173a
·
1 Parent(s): b4a5b14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -21
app.py CHANGED
@@ -1,16 +1,16 @@
 
 
1
  import sys, os
2
-
3
- if sys.platform == "darwin":
4
- os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
5
-
6
  import logging
7
-
8
  logging.getLogger("numba").setLevel(logging.WARNING)
9
  logging.getLogger("markdown_it").setLevel(logging.WARNING)
10
  logging.getLogger("urllib3").setLevel(logging.WARNING)
11
  logging.getLogger("matplotlib").setLevel(logging.WARNING)
12
 
13
- logging.basicConfig(level=logging.INFO, format="| %(name)s | %(levelname)s | %(message)s")
 
 
14
 
15
  logger = logging.getLogger(__name__)
16
 
@@ -24,7 +24,7 @@ from text import cleaned_text_to_sequence, get_bert
24
  from text.cleaner import clean_text
25
  import gradio as gr
26
  import webbrowser
27
-
28
 
29
  net_g = None
30
 
@@ -69,6 +69,7 @@ def get_text(text, language_str, hps):
69
  language = torch.LongTensor(language)
70
  return bert, ja_bert, phone, tone, language
71
 
 
72
  def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid, language):
73
  global net_g
74
  bert, ja_bert, phones, tones, lang_ids = get_text(text, language, hps)
@@ -103,6 +104,7 @@ def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid, langua
103
  torch.cuda.empty_cache()
104
  return audio
105
 
 
106
  def generate_audio(slices, sdp_ratio, noise_scale, noise_scale_w, length_scale, speaker, language):
107
  audio_list = []
108
  silence = np.zeros(hps.data.sampling_rate // 2)
@@ -121,6 +123,7 @@ def generate_audio(slices, sdp_ratio, noise_scale, noise_scale_w, length_scale,
121
  audio_list.append(silence) # 将静音添加到列表中
122
  return audio_list
123
 
 
124
  def tts_fn(text: str, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale, language):
125
  audio_list = []
126
  if language == "mix":
@@ -147,7 +150,7 @@ def tts_fn(text: str, speaker, sdp_ratio, noise_scale, noise_scale_w, length_sca
147
  if __name__ == "__main__":
148
  parser = argparse.ArgumentParser()
149
  parser.add_argument(
150
- "-m", "--model", default="./logs/maolei/G_4800.pth", help="path of your model"
151
  )
152
  parser.add_argument(
153
  "-c",
@@ -194,20 +197,53 @@ if __name__ == "__main__":
194
  with gr.Blocks() as app:
195
  with gr.Row():
196
  with gr.Column():
197
-
198
- text = gr.TextArea(label="Text", placeholder="Input Text Here",
199
- value="猫雷最强!")
200
- speaker = gr.Dropdown(choices=speakers, value=speakers[0], label='Speaker')
201
- sdp_ratio = gr.Slider(minimum=0.1, maximum=1, value=0.2, step=0.01, label='SDP/DP混合比')
202
- noise_scale = gr.Slider(minimum=0.1, maximum=1, value=0.5, step=0.01, label='感情调节')
203
- noise_scale_w = gr.Slider(minimum=0.1, maximum=1, value=0.9, step=0.01, label='音素长度')
204
- length_scale = gr.Slider(minimum=0.1, maximum=2, value=1, step=0.01, label='生成长度')
205
- language = gr.Dropdown(choices=languages, value=languages[0], label="选择语言(该模型mix有问题先别选)" )
206
- btn = gr.Button("点击生成", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  with gr.Column():
208
- text_output = gr.Textbox(label="Message")
209
- audio_output = gr.Audio(label="Output Audio")
210
-
 
 
 
 
211
  btn.click(
212
  tts_fn,
213
  inputs=[
@@ -221,6 +257,7 @@ if __name__ == "__main__":
221
  ],
222
  outputs=[text_output, audio_output],
223
  )
 
224
 
225
 
226
 
 
1
+ # flake8: noqa: E402
2
+ import re
3
  import sys, os
 
 
 
 
4
  import logging
5
+ import re_matching
6
  logging.getLogger("numba").setLevel(logging.WARNING)
7
  logging.getLogger("markdown_it").setLevel(logging.WARNING)
8
  logging.getLogger("urllib3").setLevel(logging.WARNING)
9
  logging.getLogger("matplotlib").setLevel(logging.WARNING)
10
 
11
+ logging.basicConfig(
12
+ level=logging.INFO, format="| %(name)s | %(levelname)s | %(message)s"
13
+ )
14
 
15
  logger = logging.getLogger(__name__)
16
 
 
24
  from text.cleaner import clean_text
25
  import gradio as gr
26
  import webbrowser
27
+ import numpy as np
28
 
29
  net_g = None
30
 
 
69
  language = torch.LongTensor(language)
70
  return bert, ja_bert, phone, tone, language
71
 
72
+
73
  def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid, language):
74
  global net_g
75
  bert, ja_bert, phones, tones, lang_ids = get_text(text, language, hps)
 
104
  torch.cuda.empty_cache()
105
  return audio
106
 
107
+
108
  def generate_audio(slices, sdp_ratio, noise_scale, noise_scale_w, length_scale, speaker, language):
109
  audio_list = []
110
  silence = np.zeros(hps.data.sampling_rate // 2)
 
123
  audio_list.append(silence) # 将静音添加到列表中
124
  return audio_list
125
 
126
+
127
  def tts_fn(text: str, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale, language):
128
  audio_list = []
129
  if language == "mix":
 
150
  if __name__ == "__main__":
151
  parser = argparse.ArgumentParser()
152
  parser.add_argument(
153
+ "-m", "--model", default="./logs/as/G_8000.pth", help="path of your model"
154
  )
155
  parser.add_argument(
156
  "-c",
 
197
  with gr.Blocks() as app:
198
  with gr.Row():
199
  with gr.Column():
200
+ gr.Markdown(value="""
201
+ bert-vits-v1.1.1整合包作者:@spicysama\n
202
+ 整合包b站链接:https://www.bilibili.com/video/BV1hu4y1W7dW\n
203
+ 声音归属:@猫雷NyaRu_Official\n
204
+ Bert-VITS2项目:https://github.com/Stardust-minus/Bert-VITS2\n
205
+ 猫雷的B站账号:https://space.bilibili.com/697091119
206
+ 发布二创作品请标注本项目作者及链接、作品使用Bert-VITS2 AI生成!\n
207
+ """)
208
+ text = gr.TextArea(
209
+ label="输入文本内容",
210
+ placeholder="""
211
+ 如果你选择语言为\'mix\',必须按照格式输入,否则报错:
212
+ 格式举例(zh是中文,jp是日语,不区分大小写;说话人举例:gongzi):
213
+ [说话人1]<zh>你好,こんにちは! <jp>こんにちは,世界。
214
+ [说话人2]<zh>你好吗?<jp>元気ですか?
215
+ [说话人3]<zh>谢谢。<jp>どういたしまして。
216
+ ...
217
+ 另外,所有的语言选项都可以用'|'分割长段实现分句生成。
218
+ """
219
+ )
220
+ speaker = gr.Dropdown(
221
+ choices=speakers, value=speakers[0], label="选择说话人"
222
+ )
223
+ sdp_ratio = gr.Slider(
224
+ minimum=0, maximum=1, value=0.2, step=0.1, label="SDP/DP混合比"
225
+ )
226
+ noise_scale = gr.Slider(
227
+ minimum=0.1, maximum=2, value=0.2, step=0.1, label="感情"
228
+ )
229
+ noise_scale_w = gr.Slider(
230
+ minimum=0.1, maximum=2, value=0.9, step=0.1, label="音素长度"
231
+ )
232
+ length_scale = gr.Slider(
233
+ minimum=0.1, maximum=2, value=0.8, step=0.1, label="语速"
234
+ )
235
+ language = gr.Dropdown(
236
+ choices=languages, value=languages[0], label="选择语言(该模型mix混合效果不好,先别用)"
237
+ )
238
+ btn = gr.Button("生成音频!", variant="primary")
239
  with gr.Column():
240
+ text_output = gr.Textbox(label="状态信息")
241
+ audio_output = gr.Audio(label="输出音频")
242
+ explain_image = gr.Image(label="参数解释信息",
243
+ show_label=True,
244
+ show_share_button=False,
245
+ show_download_button=False,
246
+ value=os.path.abspath("./img/参数说明.png"))
247
  btn.click(
248
  tts_fn,
249
  inputs=[
 
257
  ],
258
  outputs=[text_output, audio_output],
259
  )
260
+
261
 
262
 
263