GPT-SoVITS-ProPlus

Running on Zero

App Files Files Community

lj1995 commited on 12 days ago

Commit

31ba509

verified ·

1 Parent(s): 2b68c4c

Update inference_webui.py

Browse files

Files changed (1) hide show

inference_webui.py +44 -35

inference_webui.py CHANGED Viewed

@@ -84,15 +84,23 @@ from module.mel_processing import spectrogram_torch
 from module.models import SynthesizerTrn
 from text import cleaned_text_to_sequence
 from text.cleaner import clean_text
-from tools.i18n.i18n import I18nAuto, scan_language_list
 from tools.my_utils import load_audio
-language=os.environ.get("language","Auto")
-language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
-i18n = I18nAuto(language="Auto")
 # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
 if torch.cuda.is_available():
     device = "cuda"
     is_half = True  # eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
@@ -107,26 +115,27 @@ else:
 #         data=json.loads(f.read())
 #     i18n_dict[name.split(".json")[0].replace("_","-")]=data
 # i18n=gr.I18n(**i18n_dict)
 dict_language_v1 = {
-    i18n("中文"): "all_zh",  # 全部按中文识别
-    i18n("英文"): "en",  # 全部按英文识别#######不变
-    i18n("日文"): "all_ja",  # 全部按日文识别
-    i18n("中英混合"): "zh",  # 按中英混合识别####不变
-    i18n("日英混合"): "ja",  # 按日英混合识别####不变
-    i18n("多语种混合"): "auto",  # 多语种启动切分识别语种
 }
 dict_language_v2 = {
-    i18n("中文"): "all_zh",  # 全部按中文识别
-    i18n("英文"): "en",  # 全部按英文识别#######不变
-    i18n("日文"): "all_ja",  # 全部按日文识别
-    i18n("粤语"): "all_yue",  # 全部按中文识别
-    i18n("韩文"): "all_ko",  # 全部按韩文识别
-    i18n("中英混合"): "zh",  # 按中英混合识别####不变
-    i18n("日英混合"): "ja",  # 按日英混合识别####不变
-    i18n("粤英混合"): "yue",  # 按粤英混合识别####不变
-    i18n("韩英混合"): "ko",  # 按韩英混合识别####不变
-    i18n("多语种混合"): "auto",  # 多语种启动切分识别语种
-    i18n("多语种混合(粤语)"): "auto_yue",  # 多语种启动切分识别语种
 }
 dict_language = dict_language_v1 if version == "v1" else dict_language_v2
@@ -474,12 +483,12 @@ def get_tts_wav(
         prompt_text = prompt_text.strip("\n")
         if prompt_text[-1] not in splits:
             prompt_text += "。" if prompt_language != "en" else "."
-        print(i18n("实际输入的参考文本:"), prompt_text)
     text = text.strip("\n")
     if text[0] not in splits and len(get_first(text)) < 4:
         text = "。" + text if text_language != "en" else "." + text
-    print(i18n("实际输入的目标文本:"), text)
     zero_wav = np.zeros(
         int(hps.data.sampling_rate * 0.3),
         dtype=np.float16 if is_half == True else np.float32,
@@ -523,7 +532,7 @@ def get_tts_wav(
         text = cut5(text)
     while "\n\n" in text:
         text = text.replace("\n\n", "\n")
-    print(i18n("实际输入的目标文本(切句后):"), text)
     texts = text.split("\n")
     texts = process_text(texts)
     texts = merge_short_text_in_array(texts, 5)
@@ -539,9 +548,9 @@ def get_tts_wav(
             continue
         if text[-1] not in splits:
             text += "。" if text_language != "en" else "."
-        print(i18n("实际输入的目标文本(每句):"), text)
         phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
-        print(i18n("前端处理后的文本(每句):"), norm_text2)
         if not ref_free:
             bert = torch.cat([bert1, bert2], 1)
             all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
@@ -825,12 +834,12 @@ with gr.Blocks(
             how_to_cut = gr.Dropdown(
                 label=i18n("怎么切"),
                 choices=[
-                    i18n("不切"),
-                    i18n("凑四句一切"),
-                    i18n("凑50字一切"),
-                    i18n("按中文句号。切"),
-                    i18n("按英文句号.切"),
-                    i18n("按标点符号切"),
                 ],
                 value=i18n("凑四句一切"),
                 interactive=True,
@@ -898,9 +907,9 @@ if __name__ == "__main__":
             gen = get_tts_wav(
                 ref_wav_path=file_name,
                 prompt_text="",
-                prompt_language=i18n("中文"),
                 text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
-                text_language=i18n("多语种混合"),
                 inp_refs=[],
             )
             next(gen)
@@ -909,5 +918,5 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         inbrowser=True,
         show_api=False,
-        allowed_paths=["/"]#,i18n=i18n
     )

 from module.models import SynthesizerTrn
 from text import cleaned_text_to_sequence
 from text.cleaner import clean_text
+# from tools.i18n.i18n import I18nAuto, scan_language_list
 from tools.my_utils import load_audio
+# language=os.environ.get("language","Auto")
+# language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
+# i18n = I18nAuto(language="Auto")
 # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
+i18n_dict={}
+json_root="tools/i18n/locale"
+for name in os.listdir(json_root):
+    with open("%s/%s"%(json_root,name),"r")as f:
+        data=json.loads(f.read())
+    i18n_dict[name.split(".json")[0].replace("_","-")]=data
+i18n=gr.I18n(**i18n_dict)
 if torch.cuda.is_available():
     device = "cuda"
     is_half = True  # eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
 #         data=json.loads(f.read())
 #     i18n_dict[name.split(".json")[0].replace("_","-")]=data
 # i18n=gr.I18n(**i18n_dict)
 dict_language_v1 = {
+    i18n("中文").key: "all_zh",  # 全部按中文识别
+    i18n("英文").key: "en",  # 全部按英文识别#######不变
+    i18n("日文").key: "all_ja",  # 全部按日文识别
+    i18n("中英混合").key: "zh",  # 按中英混合识别####不变
+    i18n("日英混合").key: "ja",  # 按日英混合识别####不变
+    i18n("多语种混合").key: "auto",  # 多语种启动切分识别语种
 }
 dict_language_v2 = {
+    i18n("中文").key: "all_zh",  # 全部按中文识别
+    i18n("英文").key: "en",  # 全部按英文识别#######不变
+    i18n("日文").key: "all_ja",  # 全部按日文识别
+    i18n("粤语").key: "all_yue",  # 全部按中文识别
+    i18n("韩文").key: "all_ko",  # 全部按韩文识别
+    i18n("中英混合").key: "zh",  # 按中英混合识别####不变
+    i18n("日英混合").key: "ja",  # 按日英混合识别####不变
+    i18n("粤英混合").key: "yue",  # 按粤英混合识别####不变
+    i18n("韩英混合").key: "ko",  # 按韩英混合识别####不变
+    i18n("多语种混合").key: "auto",  # 多语种启动切分识别语种
+    i18n("多语种混合(粤语)").key: "auto_yue",  # 多语种启动切分识别语种
 }
 dict_language = dict_language_v1 if version == "v1" else dict_language_v2
         prompt_text = prompt_text.strip("\n")
         if prompt_text[-1] not in splits:
             prompt_text += "。" if prompt_language != "en" else "."
+        print(i18n("实际输入的参考文本:").key, prompt_text)
     text = text.strip("\n")
     if text[0] not in splits and len(get_first(text)) < 4:
         text = "。" + text if text_language != "en" else "." + text
+    print(i18n("实际输入的目标文本:").key, text)
     zero_wav = np.zeros(
         int(hps.data.sampling_rate * 0.3),
         dtype=np.float16 if is_half == True else np.float32,
         text = cut5(text)
     while "\n\n" in text:
         text = text.replace("\n\n", "\n")
+    print(i18n("实际输入的目标文本(切句后):").key, text)
     texts = text.split("\n")
     texts = process_text(texts)
     texts = merge_short_text_in_array(texts, 5)
             continue
         if text[-1] not in splits:
             text += "。" if text_language != "en" else "."
+        print(i18n("实际输入的目标文本(每句):").key, text)
         phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
+        print(i18n("前端处理后的文本(每句):").key, norm_text2)
         if not ref_free:
             bert = torch.cat([bert1, bert2], 1)
             all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
             how_to_cut = gr.Dropdown(
                 label=i18n("怎么切"),
                 choices=[
+                    i18n("不切").key,
+                    i18n("凑四句一切").key,
+                    i18n("凑50字一切").key,
+                    i18n("按中文句号。切").key,
+                    i18n("按英文句号.切").key,
+                    i18n("按标点符号切").key,
                 ],
                 value=i18n("凑四句一切"),
                 interactive=True,
             gen = get_tts_wav(
                 ref_wav_path=file_name,
                 prompt_text="",
+                prompt_language=i18n("中文").key,
                 text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
+                text_language=i18n("多语种混合").key,
                 inp_refs=[],
             )
             next(gen)
         server_name="0.0.0.0",
         inbrowser=True,
         show_api=False,
+        allowed_paths=["/"],i18n=i18n
     )