rcell commited on
Commit
cb3140f
·
1 Parent(s): 7bbcb75

update advance

Browse files
Files changed (1) hide show
  1. app.py +37 -9
app.py CHANGED
@@ -15,8 +15,8 @@ import utils
15
  from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
16
  from models import SynthesizerTrn
17
  from text.symbols import symbols
18
- from text import text_to_sequence
19
-
20
  from scipy.io.wavfile import write
21
 
22
 
@@ -64,15 +64,43 @@ def tts(text):
64
  # print(stn_tst.size())
65
  audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
66
  0, 0].data.float().numpy()
67
- return "成功", (hps.data.sampling_rate, audio)
 
 
 
 
 
 
 
 
 
 
 
68
 
 
 
 
 
 
 
69
  app = gr.Blocks()
70
  with app:
71
- tts_input1 = gr.TextArea(label="请输入日语文本", value="こんにちは。")
72
- # tts_input2 = gr.Dropdown(label="Speaker", choices=hps.speakers, type="index", value=hps.speakers[0])
73
- tts_submit = gr.Button("Generate", variant="primary")
74
- tts_output1 = gr.Textbox(label="Output Message")
75
- tts_output2 = gr.Audio(label="Output Audio")
76
- tts_submit.click(tts, [tts_input1], [tts_output1, tts_output2])
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  app.launch()
 
15
  from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
16
  from models import SynthesizerTrn
17
  from text.symbols import symbols
18
+ from text import text_to_sequence, cleaned_text_to_sequence
19
+ from text.cleaners import japanese_cleaners
20
  from scipy.io.wavfile import write
21
 
22
 
 
64
  # print(stn_tst.size())
65
  audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
66
  0, 0].data.float().numpy()
67
+ return (hps.data.sampling_rate, audio)
68
+
69
+ def clean_text(text):
70
+ return japanese_cleaners(text)
71
+
72
+ def generate_from_clean(text):
73
+ sid = torch.LongTensor([2]) # speaker identity
74
+
75
+ text_norm = cleaned_text_to_sequence(text)
76
+ if hps.data.add_blank:
77
+ text_norm = commons.intersperse(text_norm, 0)
78
+ stn_tst = torch.LongTensor(text_norm)
79
 
80
+ with torch.no_grad():
81
+ x_tst = stn_tst.unsqueeze(0)
82
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
83
+ audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
84
+ 0, 0].data.float().numpy()
85
+ return (hps.data.sampling_rate, audio)
86
  app = gr.Blocks()
87
  with app:
88
+ with gr.Tabs():
89
+ with gr.TabItem("基本"):
90
+ tts_input1 = gr.TextArea(label="请输入日语文本", value="こんにちは。")
91
+ # tts_input2 = gr.Dropdown(label="Speaker", choices=hps.speakers, type="index", value=hps.speakers[0])
92
+ tts_submit = gr.Button("生成", variant="primary")
93
+ # tts_output1 = gr.Textbox(label="Output Message")
94
+ tts_output2 = gr.Audio(label="输出")
95
+ tts_submit.click(tts, [tts_input1], [tts_output2])
96
+ with gr.TabItem("高级"):
97
+ tts_input3 = gr.TextArea(label="请输入日语文本", value="こんにちは。")
98
+ tts_s1 = gr.Button("清理", variant="primary")
99
+ tts_input4 = gr.TextArea(label="调整调形", value="ko↑Nniʧiwa.")
100
+ tts_s2 = gr.Button("生成", variant="primary")
101
+
102
+ tts_o = gr.Audio(label="输出")
103
+ tts_s1.click(clean_text, [tts_input3], [ tts_input4])
104
+ tts_s2.click(generate_from_clean, [tts_input4], [tts_o])
105
 
106
  app.launch()