lj1995 commited on
Commit
4c1cc9a
·
verified ·
1 Parent(s): b40b16a

Update inference_webui.py

Browse files
Files changed (1) hide show
  1. inference_webui.py +39 -31
inference_webui.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
 
3
  os.makedirs("pretrained_models", exist_ok=True)
4
  from huggingface_hub import snapshot_download
@@ -84,15 +85,22 @@ from module.mel_processing import spectrogram_torch
84
  from module.models import SynthesizerTrn
85
  from text import cleaned_text_to_sequence
86
  from text.cleaner import clean_text
87
- from tools.i18n.i18n import I18nAuto, scan_language_list
88
  from tools.my_utils import load_audio
89
 
90
- language=os.environ.get("language","Auto")
91
- language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
92
- i18n = I18nAuto(language="Auto")
93
 
94
  # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
95
 
 
 
 
 
 
 
 
96
 
97
  if torch.cuda.is_available():
98
  device = "cuda"
@@ -102,25 +110,25 @@ else:
102
  is_half = False
103
 
104
  dict_language_v1 = {
105
- i18n("中文"): "all_zh", # 全部按中文识别
106
- i18n("英文"): "en", # 全部按英文识别#######不变
107
- i18n("日文"): "all_ja", # 全部按日文识别
108
- i18n("中英混合"): "zh", # 按中英混合识别####不变
109
- i18n("日英混合"): "ja", # 按日英混合识别####不变
110
- i18n("多语种混合"): "auto", # 多语种启动切分识别语种
111
  }
112
  dict_language_v2 = {
113
- i18n("中文"): "all_zh", # 全部按中文识别
114
- i18n("英文"): "en", # 全部按英文识别#######不变
115
- i18n("日文"): "all_ja", # 全部按日文识别
116
- i18n("粤语"): "all_yue", # 全部按中文识别
117
- i18n("韩文"): "all_ko", # 全部按韩文识别
118
- i18n("中英混合"): "zh", # 按中英混合识别####不变
119
- i18n("日英混合"): "ja", # 按日英混合识别####不变
120
- i18n("粤英混合"): "yue", # 按粤英混合识别####不变
121
- i18n("韩英混合"): "ko", # 按韩英混合识别####不变
122
- i18n("多语种混合"): "auto", # 多语种启动切分识别语种
123
- i18n("多语种混合(粤语)"): "auto_yue", # 多语种启动切分识别语种
124
  }
125
  dict_language = dict_language_v1 if version == "v1" else dict_language_v2
126
 
@@ -211,7 +219,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
211
  print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
212
  dict_language = dict_language_v1 if version == "v1" else dict_language_v2
213
  if prompt_language is not None and text_language is not None:
214
- if prompt_language in list(dict_language.keys()):
215
  prompt_text_update, prompt_language_update = (
216
  {"__type__": "update"},
217
  {"__type__": "update", "value": prompt_language},
@@ -219,7 +227,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
219
  else:
220
  prompt_text_update = {"__type__": "update", "value": ""}
221
  prompt_language_update = {"__type__": "update", "value": i18n("中文")}
222
- if text_language in list(dict_language.keys()):
223
  text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
224
  else:
225
  text_update = {"__type__": "update", "value": ""}
@@ -468,12 +476,12 @@ def get_tts_wav(
468
  prompt_text = prompt_text.strip("\n")
469
  if prompt_text[-1] not in splits:
470
  prompt_text += "。" if prompt_language != "en" else "."
471
- print(i18n("实际输入的参考文本:"), prompt_text)
472
  text = text.strip("\n")
473
  if text[0] not in splits and len(get_first(text)) < 4:
474
  text = "。" + text if text_language != "en" else "." + text
475
 
476
- print(i18n("实际输入的目标文本:"), text)
477
  zero_wav = np.zeros(
478
  int(hps.data.sampling_rate * 0.3),
479
  dtype=np.float16 if is_half == True else np.float32,
@@ -517,7 +525,7 @@ def get_tts_wav(
517
  text = cut5(text)
518
  while "\n\n" in text:
519
  text = text.replace("\n\n", "\n")
520
- print(i18n("实际输入的目标文本(切句后):"), text)
521
  texts = text.split("\n")
522
  texts = process_text(texts)
523
  texts = merge_short_text_in_array(texts, 5)
@@ -533,9 +541,9 @@ def get_tts_wav(
533
  continue
534
  if text[-1] not in splits:
535
  text += "。" if text_language != "en" else "."
536
- print(i18n("实际输入的目标文本(每句):"), text)
537
  phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
538
- print(i18n("前端处理后的文本(每句):"), norm_text2)
539
  if not ref_free:
540
  bert = torch.cat([bert1, bert2], 1)
541
  all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
@@ -892,9 +900,9 @@ if __name__ == "__main__":
892
  gen = get_tts_wav(
893
  ref_wav_path=file_name,
894
  prompt_text="",
895
- prompt_language=i18n("中文"),
896
  text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
897
- text_language=i18n("多语种混合"),
898
  inp_refs=[],
899
  )
900
  next(gen)
@@ -903,5 +911,5 @@ if __name__ == "__main__":
903
  server_name="0.0.0.0",
904
  inbrowser=True,
905
  show_api=False,
906
- allowed_paths=["/"]#,i18n=i18n
907
  )
 
1
  import os
2
+ os.system("pip install gradio-client==1.10.4 gradio-5.35.0-py3-none-any.whl")
3
 
4
  os.makedirs("pretrained_models", exist_ok=True)
5
  from huggingface_hub import snapshot_download
 
85
  from module.models import SynthesizerTrn
86
  from text import cleaned_text_to_sequence
87
  from text.cleaner import clean_text
88
+ # from tools.i18n.i18n import I18nAuto, scan_language_list
89
  from tools.my_utils import load_audio
90
 
91
+ # language=os.environ.get("language","Auto")
92
+ # language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
93
+ # i18n = I18nAuto(language="Auto")
94
 
95
  # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
96
 
97
+ i18n_dict={}
98
+ json_root="tools/i18n/locale"
99
+ for name in os.listdir(json_root):
100
+ with open("%s/%s"%(json_root,name),"r")as f:
101
+ data=json.loads(f.read())
102
+ i18n_dict[name.split(".json")[0].replace("_","-")]=data
103
+ i18n=gr.I18n(**i18n_dict)
104
 
105
  if torch.cuda.is_available():
106
  device = "cuda"
 
110
  is_half = False
111
 
112
  dict_language_v1 = {
113
+ "中文": "all_zh", # 全部按中文识别
114
+ "英文": "en", # 全部按英文识别#######不变
115
+ "日文": "all_ja", # 全部按日文识别
116
+ "中英混合": "zh", # 按中英混合识别####不变
117
+ "日英混合": "ja", # 按日英混合识别####不变
118
+ "多语种混合": "auto", # 多语种启动切分识别语种
119
  }
120
  dict_language_v2 = {
121
+ "中文": "all_zh", # 全部按中文识别
122
+ "英文": "en", # 全部按英文识别#######不变
123
+ "日文": "all_ja", # 全部按日文识别
124
+ "粤语": "all_yue", # 全部按中文识别
125
+ "韩文": "all_ko", # 全部按韩文识别
126
+ "中英混合": "zh", # 按中英混合识别####不变
127
+ "日英混合": "ja", # 按日英混合识别####不变
128
+ "粤英混合": "yue", # 按粤英混合识别####不变
129
+ "韩英混合": "ko", # 按韩英混合识别####不变
130
+ "多语种混合": "auto", # 多语种启动切分识别语种
131
+ "多语种混合(粤语)": "auto_yue", # 多语种启动切分识别语种
132
  }
133
  dict_language = dict_language_v1 if version == "v1" else dict_language_v2
134
 
 
219
  print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
220
  dict_language = dict_language_v1 if version == "v1" else dict_language_v2
221
  if prompt_language is not None and text_language is not None:
222
+ if prompt_language in dict_language:
223
  prompt_text_update, prompt_language_update = (
224
  {"__type__": "update"},
225
  {"__type__": "update", "value": prompt_language},
 
227
  else:
228
  prompt_text_update = {"__type__": "update", "value": ""}
229
  prompt_language_update = {"__type__": "update", "value": i18n("中文")}
230
+ if text_language in dict_language:
231
  text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
232
  else:
233
  text_update = {"__type__": "update", "value": ""}
 
476
  prompt_text = prompt_text.strip("\n")
477
  if prompt_text[-1] not in splits:
478
  prompt_text += "。" if prompt_language != "en" else "."
479
+ print(i18n("实际输入的参考文本:").key, prompt_text)
480
  text = text.strip("\n")
481
  if text[0] not in splits and len(get_first(text)) < 4:
482
  text = "。" + text if text_language != "en" else "." + text
483
 
484
+ print(i18n("实际输入的目标文本:").key, text)
485
  zero_wav = np.zeros(
486
  int(hps.data.sampling_rate * 0.3),
487
  dtype=np.float16 if is_half == True else np.float32,
 
525
  text = cut5(text)
526
  while "\n\n" in text:
527
  text = text.replace("\n\n", "\n")
528
+ print(i18n("实际输入的目标文本(切句后):").key, text)
529
  texts = text.split("\n")
530
  texts = process_text(texts)
531
  texts = merge_short_text_in_array(texts, 5)
 
541
  continue
542
  if text[-1] not in splits:
543
  text += "。" if text_language != "en" else "."
544
+ print(i18n("实际输入的目标文本(每句):").key, text)
545
  phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
546
+ print(i18n("前端处理后的文本(每句):").key, norm_text2)
547
  if not ref_free:
548
  bert = torch.cat([bert1, bert2], 1)
549
  all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
 
900
  gen = get_tts_wav(
901
  ref_wav_path=file_name,
902
  prompt_text="",
903
+ prompt_language="中文",
904
  text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
905
+ text_language="多语种混合",
906
  inp_refs=[],
907
  )
908
  next(gen)
 
911
  server_name="0.0.0.0",
912
  inbrowser=True,
913
  show_api=False,
914
+ allowed_paths=["/"],i18n=i18n
915
  )