Spaces:
Running
on
Zero
Running
on
Zero
Update inference_webui.py
Browse files- inference_webui.py +39 -31
inference_webui.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
|
3 |
os.makedirs("pretrained_models", exist_ok=True)
|
4 |
from huggingface_hub import snapshot_download
|
@@ -84,15 +85,22 @@ from module.mel_processing import spectrogram_torch
|
|
84 |
from module.models import SynthesizerTrn
|
85 |
from text import cleaned_text_to_sequence
|
86 |
from text.cleaner import clean_text
|
87 |
-
from tools.i18n.i18n import I18nAuto, scan_language_list
|
88 |
from tools.my_utils import load_audio
|
89 |
|
90 |
-
language=os.environ.get("language","Auto")
|
91 |
-
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
92 |
-
i18n = I18nAuto(language="Auto")
|
93 |
|
94 |
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
if torch.cuda.is_available():
|
98 |
device = "cuda"
|
@@ -102,25 +110,25 @@ else:
|
|
102 |
is_half = False
|
103 |
|
104 |
dict_language_v1 = {
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
}
|
112 |
dict_language_v2 = {
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
}
|
125 |
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
126 |
|
@@ -211,7 +219,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|
211 |
print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
212 |
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
213 |
if prompt_language is not None and text_language is not None:
|
214 |
-
if prompt_language in
|
215 |
prompt_text_update, prompt_language_update = (
|
216 |
{"__type__": "update"},
|
217 |
{"__type__": "update", "value": prompt_language},
|
@@ -219,7 +227,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|
219 |
else:
|
220 |
prompt_text_update = {"__type__": "update", "value": ""}
|
221 |
prompt_language_update = {"__type__": "update", "value": i18n("中文")}
|
222 |
-
if text_language in
|
223 |
text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
|
224 |
else:
|
225 |
text_update = {"__type__": "update", "value": ""}
|
@@ -468,12 +476,12 @@ def get_tts_wav(
|
|
468 |
prompt_text = prompt_text.strip("\n")
|
469 |
if prompt_text[-1] not in splits:
|
470 |
prompt_text += "。" if prompt_language != "en" else "."
|
471 |
-
print(i18n("实际输入的参考文本:"), prompt_text)
|
472 |
text = text.strip("\n")
|
473 |
if text[0] not in splits and len(get_first(text)) < 4:
|
474 |
text = "。" + text if text_language != "en" else "." + text
|
475 |
|
476 |
-
print(i18n("实际输入的目标文本:"), text)
|
477 |
zero_wav = np.zeros(
|
478 |
int(hps.data.sampling_rate * 0.3),
|
479 |
dtype=np.float16 if is_half == True else np.float32,
|
@@ -517,7 +525,7 @@ def get_tts_wav(
|
|
517 |
text = cut5(text)
|
518 |
while "\n\n" in text:
|
519 |
text = text.replace("\n\n", "\n")
|
520 |
-
print(i18n("实际输入的目标文本(切句后):"), text)
|
521 |
texts = text.split("\n")
|
522 |
texts = process_text(texts)
|
523 |
texts = merge_short_text_in_array(texts, 5)
|
@@ -533,9 +541,9 @@ def get_tts_wav(
|
|
533 |
continue
|
534 |
if text[-1] not in splits:
|
535 |
text += "。" if text_language != "en" else "."
|
536 |
-
print(i18n("实际输入的目标文本(每句):"), text)
|
537 |
phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
|
538 |
-
print(i18n("前端处理后的文本(每句):"), norm_text2)
|
539 |
if not ref_free:
|
540 |
bert = torch.cat([bert1, bert2], 1)
|
541 |
all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
|
@@ -892,9 +900,9 @@ if __name__ == "__main__":
|
|
892 |
gen = get_tts_wav(
|
893 |
ref_wav_path=file_name,
|
894 |
prompt_text="",
|
895 |
-
prompt_language=
|
896 |
text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
|
897 |
-
text_language=
|
898 |
inp_refs=[],
|
899 |
)
|
900 |
next(gen)
|
@@ -903,5 +911,5 @@ if __name__ == "__main__":
|
|
903 |
server_name="0.0.0.0",
|
904 |
inbrowser=True,
|
905 |
show_api=False,
|
906 |
-
allowed_paths=["/"]
|
907 |
)
|
|
|
1 |
import os
|
2 |
+
os.system("pip install gradio-client==1.10.4 gradio-5.35.0-py3-none-any.whl")
|
3 |
|
4 |
os.makedirs("pretrained_models", exist_ok=True)
|
5 |
from huggingface_hub import snapshot_download
|
|
|
85 |
from module.models import SynthesizerTrn
|
86 |
from text import cleaned_text_to_sequence
|
87 |
from text.cleaner import clean_text
|
88 |
+
# from tools.i18n.i18n import I18nAuto, scan_language_list
|
89 |
from tools.my_utils import load_audio
|
90 |
|
91 |
+
# language=os.environ.get("language","Auto")
|
92 |
+
# language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
93 |
+
# i18n = I18nAuto(language="Auto")
|
94 |
|
95 |
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
|
96 |
|
97 |
+
i18n_dict={}
|
98 |
+
json_root="tools/i18n/locale"
|
99 |
+
for name in os.listdir(json_root):
|
100 |
+
with open("%s/%s"%(json_root,name),"r")as f:
|
101 |
+
data=json.loads(f.read())
|
102 |
+
i18n_dict[name.split(".json")[0].replace("_","-")]=data
|
103 |
+
i18n=gr.I18n(**i18n_dict)
|
104 |
|
105 |
if torch.cuda.is_available():
|
106 |
device = "cuda"
|
|
|
110 |
is_half = False
|
111 |
|
112 |
dict_language_v1 = {
|
113 |
+
"中文": "all_zh", # 全部按中文识别
|
114 |
+
"英文": "en", # 全部按英文识别#######不变
|
115 |
+
"日文": "all_ja", # 全部按日文识别
|
116 |
+
"中英混合": "zh", # 按中英混合识别####不变
|
117 |
+
"日英混合": "ja", # 按日英混合识别####不变
|
118 |
+
"多语种混合": "auto", # 多语种启动切分识别语种
|
119 |
}
|
120 |
dict_language_v2 = {
|
121 |
+
"中文": "all_zh", # 全部按中文识别
|
122 |
+
"英文": "en", # 全部按英文识别#######不变
|
123 |
+
"日文": "all_ja", # 全部按日文识别
|
124 |
+
"粤语": "all_yue", # 全部按中文识别
|
125 |
+
"韩文": "all_ko", # 全部按韩文识别
|
126 |
+
"中英混合": "zh", # 按中英混合识别####不变
|
127 |
+
"日英混合": "ja", # 按日英混合识别####不变
|
128 |
+
"粤英混合": "yue", # 按粤英混合识别####不变
|
129 |
+
"韩英混合": "ko", # 按韩英混合识别####不变
|
130 |
+
"多语种混合": "auto", # 多语种启动切分识别语种
|
131 |
+
"多语种混合(粤语)": "auto_yue", # 多语种启动切分识别语种
|
132 |
}
|
133 |
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
134 |
|
|
|
219 |
print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
220 |
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
221 |
if prompt_language is not None and text_language is not None:
|
222 |
+
if prompt_language in dict_language:
|
223 |
prompt_text_update, prompt_language_update = (
|
224 |
{"__type__": "update"},
|
225 |
{"__type__": "update", "value": prompt_language},
|
|
|
227 |
else:
|
228 |
prompt_text_update = {"__type__": "update", "value": ""}
|
229 |
prompt_language_update = {"__type__": "update", "value": i18n("中文")}
|
230 |
+
if text_language in dict_language:
|
231 |
text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
|
232 |
else:
|
233 |
text_update = {"__type__": "update", "value": ""}
|
|
|
476 |
prompt_text = prompt_text.strip("\n")
|
477 |
if prompt_text[-1] not in splits:
|
478 |
prompt_text += "。" if prompt_language != "en" else "."
|
479 |
+
print(i18n("实际输入的参考文本:").key, prompt_text)
|
480 |
text = text.strip("\n")
|
481 |
if text[0] not in splits and len(get_first(text)) < 4:
|
482 |
text = "。" + text if text_language != "en" else "." + text
|
483 |
|
484 |
+
print(i18n("实际输入的目标文本:").key, text)
|
485 |
zero_wav = np.zeros(
|
486 |
int(hps.data.sampling_rate * 0.3),
|
487 |
dtype=np.float16 if is_half == True else np.float32,
|
|
|
525 |
text = cut5(text)
|
526 |
while "\n\n" in text:
|
527 |
text = text.replace("\n\n", "\n")
|
528 |
+
print(i18n("实际输入的目标文本(切句后):").key, text)
|
529 |
texts = text.split("\n")
|
530 |
texts = process_text(texts)
|
531 |
texts = merge_short_text_in_array(texts, 5)
|
|
|
541 |
continue
|
542 |
if text[-1] not in splits:
|
543 |
text += "。" if text_language != "en" else "."
|
544 |
+
print(i18n("实际输入的目标文本(每句):").key, text)
|
545 |
phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
|
546 |
+
print(i18n("前端处理后的文本(每句):").key, norm_text2)
|
547 |
if not ref_free:
|
548 |
bert = torch.cat([bert1, bert2], 1)
|
549 |
all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
|
|
|
900 |
gen = get_tts_wav(
|
901 |
ref_wav_path=file_name,
|
902 |
prompt_text="",
|
903 |
+
prompt_language="中文",
|
904 |
text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
|
905 |
+
text_language="多语种混合",
|
906 |
inp_refs=[],
|
907 |
)
|
908 |
next(gen)
|
|
|
911 |
server_name="0.0.0.0",
|
912 |
inbrowser=True,
|
913 |
show_api=False,
|
914 |
+
allowed_paths=["/"],i18n=i18n
|
915 |
)
|