GPT-SoVITS-ProPlus

Build error

App Files Files Community

lj1995 commited on Jul 11

Commit

6d40265

verified ·

1 Parent(s): 6686af7

Update inference_webui.py

Browse files

Files changed (1) hide show

inference_webui.py +42 -57

inference_webui.py CHANGED Viewed

@@ -84,22 +84,15 @@ from module.mel_processing import spectrogram_torch
 from module.models import SynthesizerTrn
 from text import cleaned_text_to_sequence
 from text.cleaner import clean_text
-# from tools.i18n.i18n import I18nAuto, scan_language_list
 from tools.my_utils import load_audio
-# language=os.environ.get("language","Auto")
-# language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
-# i18n = I18nAuto(language="Auto")
 # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
-i18n_dict={}
-json_root="tools/i18n/locale"
-for name in os.listdir(json_root):
-    with open("%s/%s"%(json_root,name),"r")as f:
-        data=json.loads(f.read())
-    i18n_dict[name.split(".json")[0].replace("_","-")]=data
-i18n=gr.I18n(**i18n_dict)
 if torch.cuda.is_available():
     device = "cuda"
@@ -108,34 +101,26 @@ else:
     device = "cpu"
     is_half = False
-# i18n_dict={}
-# json_root="tools/i18n/locale"
-# for name in os.listdir(json_root):
-#     with open("%s/%s"%(json_root,name),"r")as f:
-#         data=json.loads(f.read())
-#     i18n_dict[name.split(".json")[0].replace("_","-")]=data
-# i18n=gr.I18n(**i18n_dict)
 dict_language_v1 = {
-    i18n("中文").key: "all_zh",  # 全部按中文识别
-    i18n("英文").key: "en",  # 全部按英文识别#######不变
-    i18n("日文").key: "all_ja",  # 全部按日文识别
-    i18n("中英混合").key: "zh",  # 按中英混合识别####不变
-    i18n("日英混合").key: "ja",  # 按日英混合识别####不变
-    i18n("多语种混合").key: "auto",  # 多语种启动切分识别语种
 }
 dict_language_v2 = {
-    i18n("中文").key: "all_zh",  # 全部按中文识别
-    i18n("英文").key: "en",  # 全部按英文识别#######不变
-    i18n("日文").key: "all_ja",  # 全部按日文识别
-    i18n("粤语").key: "all_yue",  # 全部按中文识别
-    i18n("韩文").key: "all_ko",  # 全部按韩文识别
-    i18n("中英混合").key: "zh",  # 按中英混合识别####不变
-    i18n("日英混合").key: "ja",  # 按日英混合识别####不变
-    i18n("粤英混合").key: "yue",  # 按粤英混合识别####不变
-    i18n("韩英混合").key: "ko",  # 按韩英混合识别####不变
-    i18n("多语种混合").key: "auto",  # 多语种启动切分识别语种
-    i18n("多语种混合(粤语)").key: "auto_yue",  # 多语种启动切分识别语种
 }
 dict_language = dict_language_v1 if version == "v1" else dict_language_v2
@@ -226,7 +211,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
     print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
     dict_language = dict_language_v1 if version == "v1" else dict_language_v2
     if prompt_language is not None and text_language is not None:
-        if prompt_language in list(dict_language.keys()):
             prompt_text_update, prompt_language_update = (
                 {"__type__": "update"},
                 {"__type__": "update", "value": prompt_language},
@@ -234,14 +219,14 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
         else:
             prompt_text_update = {"__type__": "update", "value": ""}
             prompt_language_update = {"__type__": "update", "value": i18n("中文")}
-        if text_language in list(dict_language.keys()):
             text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
         else:
             text_update = {"__type__": "update", "value": ""}
             text_language_update = {"__type__": "update", "value": i18n("中文")}
         return (
-            {"__type__": "update", "choices": list(dict_language.keys())},
-            {"__type__": "update", "choices": list(dict_language.keys())},
             prompt_text_update,
             prompt_language_update,
             text_update,
@@ -483,12 +468,12 @@ def get_tts_wav(
         prompt_text = prompt_text.strip("\n")
         if prompt_text[-1] not in splits:
             prompt_text += "。" if prompt_language != "en" else "."
-        print(i18n("实际输入的参考文本:").key, prompt_text)
     text = text.strip("\n")
     if text[0] not in splits and len(get_first(text)) < 4:
         text = "。" + text if text_language != "en" else "." + text
-    print(i18n("实际输入的目标文本:").key, text)
     zero_wav = np.zeros(
         int(hps.data.sampling_rate * 0.3),
         dtype=np.float16 if is_half == True else np.float32,
@@ -532,7 +517,7 @@ def get_tts_wav(
         text = cut5(text)
     while "\n\n" in text:
         text = text.replace("\n\n", "\n")
-    print(i18n("实际输入的目标文本(切句后):").key, text)
     texts = text.split("\n")
     texts = process_text(texts)
     texts = merge_short_text_in_array(texts, 5)
@@ -548,9 +533,9 @@ def get_tts_wav(
             continue
         if text[-1] not in splits:
             text += "。" if text_language != "en" else "."
-        print(i18n("实际输入的目标文本(每句):").key, text)
         phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
-        print(i18n("前端处理后的文本(每句):").key, norm_text2)
         if not ref_free:
             bert = torch.cat([bert1, bert2], 1)
             all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
@@ -563,7 +548,7 @@ def get_tts_wav(
         t2 = ttime()
         # cache_key="%s-%s-%s-%s-%s-%s-%s-%s"%(ref_wav_path,prompt_text,prompt_language,text,text_language,top_k,top_p,temperature)
-        # print(cache.keys(),if_freeze)
         if i_text in cache and if_freeze == True:
             pred_semantic = cache[i_text]
         else:
@@ -813,7 +798,7 @@ with gr.Blocks(
                 ),
             )
         prompt_language = gr.Dropdown(
-            label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文")
         )
         inp_refs = gr.File(
             label=i18n(
@@ -828,18 +813,18 @@ with gr.Blocks(
         with gr.Column():
             text_language = gr.Dropdown(
                 label=i18n("需要合成的语种。限制范围越小判别效果越好。"),
-                choices=list(dict_language.keys()),
                 value=i18n("中文"),
             )
             how_to_cut = gr.Dropdown(
                 label=i18n("怎么切"),
                 choices=[
-                    i18n("不切").key,
-                    i18n("凑四句一切").key,
-                    i18n("凑50字一切").key,
-                    i18n("按中文句号。切").key,
-                    i18n("按英文句号.切").key,
-                    i18n("按标点符号切").key,
                 ],
                 value=i18n("凑四句一切"),
                 interactive=True,
@@ -907,9 +892,9 @@ if __name__ == "__main__":
             gen = get_tts_wav(
                 ref_wav_path=file_name,
                 prompt_text="",
-                prompt_language=i18n("中文").key,
                 text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
-                text_language=i18n("多语种混合").key,
                 inp_refs=[],
             )
             next(gen)
@@ -918,5 +903,5 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         inbrowser=True,
         show_api=False,
-        allowed_paths=["/"],i18n=i18n
     )

 from module.models import SynthesizerTrn
 from text import cleaned_text_to_sequence
 from text.cleaner import clean_text
+from tools.i18n.i18n import I18nAuto, scan_language_list
 from tools.my_utils import load_audio
+language=os.environ.get("language","Auto")
+language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
+i18n = I18nAuto(language="Auto")
 # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
 if torch.cuda.is_available():
     device = "cuda"
     device = "cpu"
     is_half = False
 dict_language_v1 = {
+    i18n("中文"): "all_zh",  # 全部按中文识别
+    i18n("英文"): "en",  # 全部按英文识别#######不变
+    i18n("日文"): "all_ja",  # 全部按日文识别
+    i18n("中英混合"): "zh",  # 按中英混合识别####不变
+    i18n("日英混合"): "ja",  # 按日英混合识别####不变
+    i18n("多语种混合"): "auto",  # 多语种启动切分识别语种
 }
 dict_language_v2 = {
+    i18n("中文"): "all_zh",  # 全部按中文识别
+    i18n("英文"): "en",  # 全部按英文识别#######不变
+    i18n("日文"): "all_ja",  # 全部按日文识别
+    i18n("粤语"): "all_yue",  # 全部按中文识别
+    i18n("韩文"): "all_ko",  # 全部按韩文识别
+    i18n("中英混合"): "zh",  # 按中英混合识别####不变
+    i18n("日英混合"): "ja",  # 按日英混合识别####不变
+    i18n("粤英混合"): "yue",  # 按粤英混合识别####不变
+    i18n("韩英混合"): "ko",  # 按韩英混合识别####不变
+    i18n("多语种混合"): "auto",  # 多语种启动切分识别语种
+    i18n("多语种混合(粤语)"): "auto_yue",  # 多语种启动切分识别语种
 }
 dict_language = dict_language_v1 if version == "v1" else dict_language_v2
     print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
     dict_language = dict_language_v1 if version == "v1" else dict_language_v2
     if prompt_language is not None and text_language is not None:
+        if prompt_language in list(dict_languages()):
             prompt_text_update, prompt_language_update = (
                 {"__type__": "update"},
                 {"__type__": "update", "value": prompt_language},
         else:
             prompt_text_update = {"__type__": "update", "value": ""}
             prompt_language_update = {"__type__": "update", "value": i18n("中文")}
+        if text_language in list(dict_languages()):
             text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
         else:
             text_update = {"__type__": "update", "value": ""}
             text_language_update = {"__type__": "update", "value": i18n("中文")}
         return (
+            {"__type__": "update", "choices": list(dict_languages())},
+            {"__type__": "update", "choices": list(dict_languages())},
             prompt_text_update,
             prompt_language_update,
             text_update,
         prompt_text = prompt_text.strip("\n")
         if prompt_text[-1] not in splits:
             prompt_text += "。" if prompt_language != "en" else "."
+        print(i18n("实际输入的参考文本:"), prompt_text)
     text = text.strip("\n")
     if text[0] not in splits and len(get_first(text)) < 4:
         text = "。" + text if text_language != "en" else "." + text
+    print(i18n("实际输入的目标文本:"), text)
     zero_wav = np.zeros(
         int(hps.data.sampling_rate * 0.3),
         dtype=np.float16 if is_half == True else np.float32,
         text = cut5(text)
     while "\n\n" in text:
         text = text.replace("\n\n", "\n")
+    print(i18n("实际输入的目标文本(切句后):"), text)
     texts = text.split("\n")
     texts = process_text(texts)
     texts = merge_short_text_in_array(texts, 5)
             continue
         if text[-1] not in splits:
             text += "。" if text_language != "en" else "."
+        print(i18n("实际输入的目标文本(每句):"), text)
         phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
+        print(i18n("前端处理后的文本(每句):"), norm_text2)
         if not ref_free:
             bert = torch.cat([bert1, bert2], 1)
             all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
         t2 = ttime()
         # cache_key="%s-%s-%s-%s-%s-%s-%s-%s"%(ref_wav_path,prompt_text,prompt_language,text,text_language,top_k,top_p,temperature)
+        # print(caches(),if_freeze)
         if i_text in cache and if_freeze == True:
             pred_semantic = cache[i_text]
         else:
                 ),
             )
         prompt_language = gr.Dropdown(
+            label=i18n("参考音频的语种"), choices=list(dict_languages()), value=i18n("中文")
         )
         inp_refs = gr.File(
             label=i18n(
         with gr.Column():
             text_language = gr.Dropdown(
                 label=i18n("需要合成的语种。限制范围越小判别效果越好。"),
+                choices=list(dict_languages()),
                 value=i18n("中文"),
             )
             how_to_cut = gr.Dropdown(
                 label=i18n("怎么切"),
                 choices=[
+                    i18n("不切"),
+                    i18n("凑四句一切"),
+                    i18n("凑50字一切"),
+                    i18n("按中文句号。切"),
+                    i18n("按英文句号.切"),
+                    i18n("按标点符号切"),
                 ],
                 value=i18n("凑四句一切"),
                 interactive=True,
             gen = get_tts_wav(
                 ref_wav_path=file_name,
                 prompt_text="",
+                prompt_language=i18n("中文"),
                 text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
+                text_language=i18n("多语种混合"),
                 inp_refs=[],
             )
             next(gen)
         server_name="0.0.0.0",
         inbrowser=True,
         show_api=False,
+        allowed_paths=["/"]#,i18n=i18n
     )