lj1995 commited on
Commit
6d40265
·
verified ·
1 Parent(s): 6686af7

Update inference_webui.py

Browse files
Files changed (1) hide show
  1. inference_webui.py +42 -57
inference_webui.py CHANGED
@@ -84,22 +84,15 @@ from module.mel_processing import spectrogram_torch
84
  from module.models import SynthesizerTrn
85
  from text import cleaned_text_to_sequence
86
  from text.cleaner import clean_text
87
- # from tools.i18n.i18n import I18nAuto, scan_language_list
88
  from tools.my_utils import load_audio
89
 
90
- # language=os.environ.get("language","Auto")
91
- # language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
92
- # i18n = I18nAuto(language="Auto")
93
 
94
  # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
95
 
96
- i18n_dict={}
97
- json_root="tools/i18n/locale"
98
- for name in os.listdir(json_root):
99
- with open("%s/%s"%(json_root,name),"r")as f:
100
- data=json.loads(f.read())
101
- i18n_dict[name.split(".json")[0].replace("_","-")]=data
102
- i18n=gr.I18n(**i18n_dict)
103
 
104
  if torch.cuda.is_available():
105
  device = "cuda"
@@ -108,34 +101,26 @@ else:
108
  device = "cpu"
109
  is_half = False
110
 
111
- # i18n_dict={}
112
- # json_root="tools/i18n/locale"
113
- # for name in os.listdir(json_root):
114
- # with open("%s/%s"%(json_root,name),"r")as f:
115
- # data=json.loads(f.read())
116
- # i18n_dict[name.split(".json")[0].replace("_","-")]=data
117
- # i18n=gr.I18n(**i18n_dict)
118
-
119
  dict_language_v1 = {
120
- i18n("中文").key: "all_zh", # 全部按中文识别
121
- i18n("英文").key: "en", # 全部按英文识别#######不变
122
- i18n("日文").key: "all_ja", # 全部按日文识别
123
- i18n("中英混合").key: "zh", # 按中英混合识别####不变
124
- i18n("日英混合").key: "ja", # 按日英混合识别####不变
125
- i18n("多语种混合").key: "auto", # 多语种启动切分识别语种
126
  }
127
  dict_language_v2 = {
128
- i18n("中文").key: "all_zh", # 全部按中文识别
129
- i18n("英文").key: "en", # 全部按英文识别#######不变
130
- i18n("日文").key: "all_ja", # 全部按日文识别
131
- i18n("粤语").key: "all_yue", # 全部按中文识别
132
- i18n("韩文").key: "all_ko", # 全部按韩文识别
133
- i18n("中英混合").key: "zh", # 按中英混合识别####不变
134
- i18n("日英混合").key: "ja", # 按日英混合识别####不变
135
- i18n("粤英混合").key: "yue", # 按粤英混合识别####不变
136
- i18n("韩英混合").key: "ko", # 按韩英混合识别####不变
137
- i18n("多语种混合").key: "auto", # 多语种启动切分识别语种
138
- i18n("多语种混合(粤语)").key: "auto_yue", # 多语种启动切分识别语种
139
  }
140
  dict_language = dict_language_v1 if version == "v1" else dict_language_v2
141
 
@@ -226,7 +211,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
226
  print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
227
  dict_language = dict_language_v1 if version == "v1" else dict_language_v2
228
  if prompt_language is not None and text_language is not None:
229
- if prompt_language in list(dict_language.keys()):
230
  prompt_text_update, prompt_language_update = (
231
  {"__type__": "update"},
232
  {"__type__": "update", "value": prompt_language},
@@ -234,14 +219,14 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
234
  else:
235
  prompt_text_update = {"__type__": "update", "value": ""}
236
  prompt_language_update = {"__type__": "update", "value": i18n("中文")}
237
- if text_language in list(dict_language.keys()):
238
  text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
239
  else:
240
  text_update = {"__type__": "update", "value": ""}
241
  text_language_update = {"__type__": "update", "value": i18n("中文")}
242
  return (
243
- {"__type__": "update", "choices": list(dict_language.keys())},
244
- {"__type__": "update", "choices": list(dict_language.keys())},
245
  prompt_text_update,
246
  prompt_language_update,
247
  text_update,
@@ -483,12 +468,12 @@ def get_tts_wav(
483
  prompt_text = prompt_text.strip("\n")
484
  if prompt_text[-1] not in splits:
485
  prompt_text += "。" if prompt_language != "en" else "."
486
- print(i18n("实际输入的参考文本:").key, prompt_text)
487
  text = text.strip("\n")
488
  if text[0] not in splits and len(get_first(text)) < 4:
489
  text = "。" + text if text_language != "en" else "." + text
490
 
491
- print(i18n("实际输入的目标文本:").key, text)
492
  zero_wav = np.zeros(
493
  int(hps.data.sampling_rate * 0.3),
494
  dtype=np.float16 if is_half == True else np.float32,
@@ -532,7 +517,7 @@ def get_tts_wav(
532
  text = cut5(text)
533
  while "\n\n" in text:
534
  text = text.replace("\n\n", "\n")
535
- print(i18n("实际输入的目标文本(切句后):").key, text)
536
  texts = text.split("\n")
537
  texts = process_text(texts)
538
  texts = merge_short_text_in_array(texts, 5)
@@ -548,9 +533,9 @@ def get_tts_wav(
548
  continue
549
  if text[-1] not in splits:
550
  text += "。" if text_language != "en" else "."
551
- print(i18n("实际输入的目标文本(每句):").key, text)
552
  phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
553
- print(i18n("前端处理后的文本(每句):").key, norm_text2)
554
  if not ref_free:
555
  bert = torch.cat([bert1, bert2], 1)
556
  all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
@@ -563,7 +548,7 @@ def get_tts_wav(
563
 
564
  t2 = ttime()
565
  # cache_key="%s-%s-%s-%s-%s-%s-%s-%s"%(ref_wav_path,prompt_text,prompt_language,text,text_language,top_k,top_p,temperature)
566
- # print(cache.keys(),if_freeze)
567
  if i_text in cache and if_freeze == True:
568
  pred_semantic = cache[i_text]
569
  else:
@@ -813,7 +798,7 @@ with gr.Blocks(
813
  ),
814
  )
815
  prompt_language = gr.Dropdown(
816
- label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文")
817
  )
818
  inp_refs = gr.File(
819
  label=i18n(
@@ -828,18 +813,18 @@ with gr.Blocks(
828
  with gr.Column():
829
  text_language = gr.Dropdown(
830
  label=i18n("需要合成的语种。限制范围越小判别效果越好。"),
831
- choices=list(dict_language.keys()),
832
  value=i18n("中文"),
833
  )
834
  how_to_cut = gr.Dropdown(
835
  label=i18n("怎么切"),
836
  choices=[
837
- i18n("不切").key,
838
- i18n("凑四句一切").key,
839
- i18n("凑50字一切").key,
840
- i18n("按中文句号。切").key,
841
- i18n("按英文句号.切").key,
842
- i18n("按标点符号切").key,
843
  ],
844
  value=i18n("凑四句一切"),
845
  interactive=True,
@@ -907,9 +892,9 @@ if __name__ == "__main__":
907
  gen = get_tts_wav(
908
  ref_wav_path=file_name,
909
  prompt_text="",
910
- prompt_language=i18n("中文").key,
911
  text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
912
- text_language=i18n("多语种混合").key,
913
  inp_refs=[],
914
  )
915
  next(gen)
@@ -918,5 +903,5 @@ if __name__ == "__main__":
918
  server_name="0.0.0.0",
919
  inbrowser=True,
920
  show_api=False,
921
- allowed_paths=["/"],i18n=i18n
922
  )
 
84
  from module.models import SynthesizerTrn
85
  from text import cleaned_text_to_sequence
86
  from text.cleaner import clean_text
87
+ from tools.i18n.i18n import I18nAuto, scan_language_list
88
  from tools.my_utils import load_audio
89
 
90
+ language=os.environ.get("language","Auto")
91
+ language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
92
+ i18n = I18nAuto(language="Auto")
93
 
94
  # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
95
 
 
 
 
 
 
 
 
96
 
97
  if torch.cuda.is_available():
98
  device = "cuda"
 
101
  device = "cpu"
102
  is_half = False
103
 
 
 
 
 
 
 
 
 
104
  dict_language_v1 = {
105
+ i18n("中文"): "all_zh", # 全部按中文识别
106
+ i18n("英文"): "en", # 全部按英文识别#######不变
107
+ i18n("日文"): "all_ja", # 全部按日文识别
108
+ i18n("中英混合"): "zh", # 按中英混合识别####不变
109
+ i18n("日英混合"): "ja", # 按日英混合识别####不变
110
+ i18n("多语种混合"): "auto", # 多语种启动切分识别语种
111
  }
112
  dict_language_v2 = {
113
+ i18n("中文"): "all_zh", # 全部按中文识别
114
+ i18n("英文"): "en", # 全部按英文识别#######不变
115
+ i18n("日文"): "all_ja", # 全部按日文识别
116
+ i18n("粤语"): "all_yue", # 全部按中文识别
117
+ i18n("韩文"): "all_ko", # 全部按韩文识别
118
+ i18n("中英混合"): "zh", # 按中英混合识别####不变
119
+ i18n("日英混合"): "ja", # 按日英混合识别####不变
120
+ i18n("粤英混合"): "yue", # 按粤英混合识别####不变
121
+ i18n("韩英混合"): "ko", # 按韩英混合识别####不变
122
+ i18n("多语种混合"): "auto", # 多语种启动切分识别语种
123
+ i18n("多语种混合(粤语)"): "auto_yue", # 多语种启动切分识别语种
124
  }
125
  dict_language = dict_language_v1 if version == "v1" else dict_language_v2
126
 
 
211
  print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
212
  dict_language = dict_language_v1 if version == "v1" else dict_language_v2
213
  if prompt_language is not None and text_language is not None:
214
+ if prompt_language in list(dict_languages()):
215
  prompt_text_update, prompt_language_update = (
216
  {"__type__": "update"},
217
  {"__type__": "update", "value": prompt_language},
 
219
  else:
220
  prompt_text_update = {"__type__": "update", "value": ""}
221
  prompt_language_update = {"__type__": "update", "value": i18n("中文")}
222
+ if text_language in list(dict_languages()):
223
  text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
224
  else:
225
  text_update = {"__type__": "update", "value": ""}
226
  text_language_update = {"__type__": "update", "value": i18n("中文")}
227
  return (
228
+ {"__type__": "update", "choices": list(dict_languages())},
229
+ {"__type__": "update", "choices": list(dict_languages())},
230
  prompt_text_update,
231
  prompt_language_update,
232
  text_update,
 
468
  prompt_text = prompt_text.strip("\n")
469
  if prompt_text[-1] not in splits:
470
  prompt_text += "。" if prompt_language != "en" else "."
471
+ print(i18n("实际输入的参考文本:"), prompt_text)
472
  text = text.strip("\n")
473
  if text[0] not in splits and len(get_first(text)) < 4:
474
  text = "。" + text if text_language != "en" else "." + text
475
 
476
+ print(i18n("实际输入的目标文本:"), text)
477
  zero_wav = np.zeros(
478
  int(hps.data.sampling_rate * 0.3),
479
  dtype=np.float16 if is_half == True else np.float32,
 
517
  text = cut5(text)
518
  while "\n\n" in text:
519
  text = text.replace("\n\n", "\n")
520
+ print(i18n("实际输入的目标文本(切句后):"), text)
521
  texts = text.split("\n")
522
  texts = process_text(texts)
523
  texts = merge_short_text_in_array(texts, 5)
 
533
  continue
534
  if text[-1] not in splits:
535
  text += "。" if text_language != "en" else "."
536
+ print(i18n("实际输入的目标文本(每句):"), text)
537
  phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
538
+ print(i18n("前端处理后的文本(每句):"), norm_text2)
539
  if not ref_free:
540
  bert = torch.cat([bert1, bert2], 1)
541
  all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
 
548
 
549
  t2 = ttime()
550
  # cache_key="%s-%s-%s-%s-%s-%s-%s-%s"%(ref_wav_path,prompt_text,prompt_language,text,text_language,top_k,top_p,temperature)
551
+ # print(caches(),if_freeze)
552
  if i_text in cache and if_freeze == True:
553
  pred_semantic = cache[i_text]
554
  else:
 
798
  ),
799
  )
800
  prompt_language = gr.Dropdown(
801
+ label=i18n("参考音频的语种"), choices=list(dict_languages()), value=i18n("中文")
802
  )
803
  inp_refs = gr.File(
804
  label=i18n(
 
813
  with gr.Column():
814
  text_language = gr.Dropdown(
815
  label=i18n("需要合成的语种。限制范围越小判别效果越好。"),
816
+ choices=list(dict_languages()),
817
  value=i18n("中文"),
818
  )
819
  how_to_cut = gr.Dropdown(
820
  label=i18n("怎么切"),
821
  choices=[
822
+ i18n("不切"),
823
+ i18n("凑四句一切"),
824
+ i18n("凑50字一切"),
825
+ i18n("按中文句号。切"),
826
+ i18n("按英文句号.切"),
827
+ i18n("按标点符号切"),
828
  ],
829
  value=i18n("凑四句一切"),
830
  interactive=True,
 
892
  gen = get_tts_wav(
893
  ref_wav_path=file_name,
894
  prompt_text="",
895
+ prompt_language=i18n("中文"),
896
  text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
897
+ text_language=i18n("多语种混合"),
898
  inp_refs=[],
899
  )
900
  next(gen)
 
903
  server_name="0.0.0.0",
904
  inbrowser=True,
905
  show_api=False,
906
+ allowed_paths=["/"]#,i18n=i18n
907
  )