lmx commited on
Commit
ec38d17
·
1 Parent(s): de18782
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ import time
3
+ import gradio as gr
4
+ # import utils
5
+ # import commons
6
+ # from models import SynthesizerTrn
7
+ # from text import text_to_sequence
8
+ # from torch import no_grad, LongTensor
9
+
10
+ # hps_ms = utils.get_hparams_from_file(r'./model/config.json')
11
+ # net_g_ms = SynthesizerTrn(
12
+ # len(hps_ms.symbols),
13
+ # hps_ms.data.filter_length // 2 + 1,
14
+ # hps_ms.train.segment_size // hps_ms.data.hop_length,
15
+ # n_speakers=hps_ms.data.n_speakers,
16
+ # **hps_ms.model)
17
+ # _ = net_g_ms.eval()
18
+ # speakers = hps_ms.speakers
19
+ # model, optimizer, learning_rate, epochs = utils.load_checkpoint(r'./model/G_953000.pth', net_g_ms, None)
20
+
21
+
22
+ # def get_text(text, hps):
23
+ # text_norm, clean_text = text_to_sequence(text, hps.symbols, hps.data.text_cleaners)
24
+ # if hps.data.add_blank:
25
+ # text_norm = commons.intersperse(text_norm, 0)
26
+ # text_norm = LongTensor(text_norm)
27
+ # return text_norm, clean_text
28
+ #
29
+ #
30
+ # def vits(text, language, speaker_id, noise_scale, noise_scale_w, length_scale):
31
+ # start = time.perf_counter()
32
+ # if not len(text):
33
+ # return "输入文本不能为空!", None, None
34
+ # text = text.replace('\n', ' ').replace('\r', '').replace(" ", "")
35
+ # if len(text) > 100:
36
+ # return f"输入文字过长!{len(text)}>100", None, None
37
+ # if language == 0:
38
+ # text = f"[ZH]{text}[ZH]"
39
+ # elif language == 1:
40
+ # text = f"[JA]{text}[JA]"
41
+ # else:
42
+ # text = f"{text}"
43
+ # stn_tst, clean_text = get_text(text, hps_ms)
44
+ # with no_grad():
45
+ # x_tst = stn_tst.unsqueeze(0)
46
+ # x_tst_lengths = LongTensor([stn_tst.size(0)])
47
+ # speaker_id = LongTensor([speaker_id])
48
+ # audio = \
49
+ # net_g_ms.infer(x_tst, x_tst_lengths, sid=speaker_id, noise_scale=noise_scale, noise_scale_w=noise_scale_w,
50
+ # length_scale=length_scale)[0][0, 0].data.float().numpy()
51
+ #
52
+ # return "生成成功!", (22050, audio), f"生成耗时 {round(time.perf_counter() - start, 2)} s"
53
+
54
+
55
+ # def search_speaker(search_value):
56
+ # for s in speakers:
57
+ # if search_value == s:
58
+ # return s
59
+ # for s in speakers:
60
+ # if search_value in s:
61
+ # return s
62
+ #
63
+ #
64
+ # def change_lang(language):
65
+ # if language == 0:
66
+ # return 0.6, 0.668, 1.2
67
+ # else:
68
+ # return 0.6, 0.668, 1.1
69
+
70
+
71
+ download_audio_js = """
72
+ () =>{{
73
+ let root = document.querySelector("body > gradio-app");
74
+ if (root.shadowRoot != null)
75
+ root = root.shadowRoot;
76
+ let audio = root.querySelector("#tts-audio").querySelector("audio");
77
+ let text = root.querySelector("#input-text").querySelector("textarea");
78
+ if (audio == undefined)
79
+ return;
80
+ text = text.value;
81
+ if (text == undefined)
82
+ text = Math.floor(Math.random()*100000000);
83
+ audio = audio.src;
84
+ let oA = document.createElement("a");
85
+ oA.download = text.substr(0, 20)+'.wav';
86
+ oA.href = audio;
87
+ document.body.appendChild(oA);
88
+ oA.click();
89
+ oA.remove();
90
+ }}
91
+ """
92
+
93
+ if __name__ == '__main__':
94
+ with gr.Blocks() as app:
95
+ gr.Markdown(
96
+ "# <center> VITS语音在线合成demo\n"
97
+ "# <center> 严禁将模型用于任何商业项目,否则后果自负\n"
98
+ "<div align='center'>主要有赛马娘,原神中文,原神日语,崩坏3的音色</div>"
99
+ '<div align="center"><a><font color="#dd0000">结果有随机性,语调可能很奇怪,可多次生成取最佳效果</font></a></div>'
100
+ '<div align="center"><a><font color="#dd0000">标点符号会影响生成的结果</font></a></div>'
101
+ )
102
+
103
+ with gr.Tabs():
104
+ with gr.Row():
105
+ with gr.Column():
106
+ input_text = gr.Textbox(label="Text (100 words limitation)", lines=5, value="今天晚上吃啥好呢。",
107
+ elem_id=f"input-text")
108
+ lang = gr.Dropdown(label="Language", choices=["中文", "日语", "中日混合(中文用[ZH][ZH]包裹起来,日文用[JA][JA]包裹起来)"],
109
+ type="index", value="中文")
110
+ btn = gr.Button(value="Submit")
111
+ with gr.Row():
112
+ search = gr.Textbox(label="Search Speaker", lines=1)
113
+ btn2 = gr.Button(value="Search")
114
+ # sid = gr.Dropdown(label="Speaker", choices=speakers, type="index", value=speakers[228])
115
+ with gr.Row():
116
+ ns = gr.Slider(label="noise_scale(控制感情变化程度)", minimum=0.1, maximum=1.0, step=0.1, value=0.6,
117
+ interactive=True)
118
+ nsw = gr.Slider(label="noise_scale_w(控制音素发音长度)", minimum=0.1, maximum=1.0, step=0.1,
119
+ value=0.668, interactive=True)
120
+ ls = gr.Slider(label="length_scale(控制整体语速)", minimum=0.1, maximum=2.0, step=0.1, value=1.2,
121
+ interactive=True)
122
+ with gr.Column():
123
+ o1 = gr.Textbox(label="Output Message")
124
+ o2 = gr.Audio(label="Output Audio", elem_id=f"tts-audio")
125
+ o3 = gr.Textbox(label="Extra Info")
126
+ download = gr.Button("Download Audio")
127
+ # btn.click(vits, inputs=[input_text, lang, sid, ns, nsw, ls], outputs=[o1, o2, o3], api_name="GetSpeech")
128
+ # download.click(None, [], [], _js=download_audio_js.format())
129
+ # btn2.click(search_speaker, inputs=[search], outputs=[sid])
130
+ # lang.change(change_lang, inputs=[lang], outputs=[ns, nsw, ls])
131
+
132
+ app.queue(concurrency_count=1).launch()