AriaMei commited on
Commit
47716cc
·
1 Parent(s): bcbb246

更新app,可从9nineEmo文件夹中选择情感

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import commons
4
+ import utils
5
+ from models import SynthesizerTrn
6
+ from text.symbols import symbols
7
+ from text import text_to_sequence
8
+ import random
9
+ import os
10
+ import datetime
11
+ import numpy as np
12
+
13
+
14
+ def get_text(text, hps):
15
+ text_norm = text_to_sequence(text, hps.data.text_cleaners)
16
+ if hps.data.add_blank:
17
+ text_norm = commons.intersperse(text_norm, 0)
18
+ text_norm = torch.LongTensor(text_norm)
19
+ return text_norm
20
+
21
+
22
+ def tts(txt, emotion, index, hps, net_g, random_emotion_root):
23
+ """emotion为参考情感音频路径 或random_sample(随机抽取)"""
24
+ stn_tst = get_text(txt, hps)
25
+ rand_wav = ""
26
+ with torch.no_grad():
27
+ x_tst = stn_tst.unsqueeze(0)
28
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
29
+ sid = torch.LongTensor([index]) ##appoint character
30
+ if os.path.exists(f"{emotion}"):
31
+ emo = torch.FloatTensor(np.load(f"{emotion}")).unsqueeze(0)
32
+ rand_wav = emotion
33
+ elif emotion == "random_sample":
34
+ while True:
35
+ rand_wav = random.sample(os.listdir(random_emotion_root), 1)[0]
36
+ if os.path.exists(f"{random_emotion_root}/{rand_wav}"):
37
+ break
38
+ emo = torch.FloatTensor(np.load(f"{random_emotion_root}/{rand_wav}")).unsqueeze(0)
39
+ print(f"{random_emotion_root}/{rand_wav}")
40
+ else:
41
+ print("emotion参数不正确")
42
+
43
+ audio = \
44
+ net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1, emo=emo)[
45
+ 0][
46
+ 0, 0].data.float().numpy()
47
+ path = random_emotion_root+"/"+rand_wav
48
+ return audio,path
49
+
50
+
51
+ def random_generate(txt, index, hps, net_g, random_emotion_root):
52
+
53
+ audio ,rand_wav= tts(txt, emotion='random_sample', index=index, hps=hps, net_g=net_g,
54
+ random_emotion_root=random_emotion_root)
55
+ return audio,rand_wav
56
+
57
+
58
+ def charaterRoot(name):
59
+ global random_emotion_root
60
+ if name == '九条都':
61
+ random_emotion_root = "./9nineEmo/my"
62
+ index = 0
63
+ elif name == '新海天':
64
+ random_emotion_root = "./9nineEmo/sr"
65
+ index = 1
66
+ elif name == '结城希亚':
67
+ random_emotion_root = "./9nineEmo/na"
68
+ index = 2
69
+ elif name == '蕾娜':
70
+ random_emotion_root = "./9nineEmo/gt"
71
+ index = 3
72
+ elif name == '索菲':
73
+ random_emotion_root = "./9nineEmo/sf"
74
+ index = 4
75
+ return random_emotion_root, index
76
+
77
+
78
+ def configSelect(config):
79
+ global checkPonit, config_file
80
+ if config == 'mul':
81
+ config_file = "./configs/9nine_multi.json"
82
+ checkPonit = "logs/9nineM/G_252000.pth"
83
+ elif config == "single":
84
+ config_file = "./configs/sora.json"
85
+ checkPonit = "logs/sora/G_341200.pth"
86
+ return config_file, checkPonit
87
+
88
+
89
+ def runVits(name, config, txt,emotion):
90
+ config_file, checkPoint = configSelect(config)
91
+ random_emotion_root, index = charaterRoot(name=name)
92
+ checkPonit = checkPoint
93
+ hps = utils.get_hparams_from_file(config_file)
94
+ net_g = SynthesizerTrn(
95
+ len(symbols),
96
+ hps.data.filter_length // 2 + 1,
97
+ hps.train.segment_size // hps.data.hop_length,
98
+ n_speakers=hps.data.n_speakers,
99
+ **hps.model)
100
+ _ = net_g.eval()
101
+
102
+ _ = utils.load_checkpoint(checkPonit, net_g, None)
103
+ audio, rand_wav = tts(txt, emotion=emotion, index=index, hps=hps, net_g=net_g,
104
+ random_emotion_root=random_emotion_root)
105
+ return (hps.data.sampling_rate, audio),rand_wav
106
+
107
+
108
+ def nineMul(name, txt):
109
+ config = 'mul'
110
+ audio ,rand_wav= runVits(name, config, txt,'random_sample')
111
+ return "multiple model success", audio,rand_wav
112
+
113
+
114
+ def nineSingle(name,txt):
115
+ config = 'single'
116
+ # name = "新海天"
117
+ audio ,rand_wav= runVits(name, config, txt,'random_sample')
118
+ return "single model success", audio,rand_wav
119
+
120
+ def nineMul_select_emo(name, txt,emo):
121
+ config = 'mul'
122
+ # emo = "./9nine"emotion
123
+ print(emo)
124
+ audio, _ = runVits(name, config, txt, emo)
125
+ message = "情感依赖:" + emo + "sythesis success!"
126
+ return message,audio
127
+
128
+ app = gr.Blocks()
129
+ with app:
130
+ with gr.Tabs():
131
+ with gr.TabItem("9nine multiple model"):
132
+ character = gr.Radio(['九条都', '新海天', '结城希亚', '蕾娜', '索菲'], label='character',
133
+ info="select character you want")
134
+
135
+ text = gr.TextArea(label="input content,Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")
136
+
137
+ submit = gr.Button("generate", variant='privite')
138
+ message = gr.Textbox(label="Message")
139
+ audio = gr.Audio(label="output")
140
+ emotion = gr.Textbox(label="参照情感:")
141
+ submit.click(nineMul, [character, text], [message, audio,emotion])
142
+ with gr.TabItem("9nine single model"):
143
+ character = gr.Radio(['新海天'], label='character',
144
+ info="single model for 新海天 only")
145
+
146
+ text = gr.TextArea(label="input content,Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")
147
+
148
+ submit = gr.Button("generate", variant='privite')
149
+ message = gr.Textbox(label="Message")
150
+ audio = gr.Audio(label="output")
151
+ emotion = gr.Textbox(label="参照情感:")
152
+ submit.click(nineSingle, [character, text], [message, audio,emotion])
153
+ with gr.TabItem("Choose Emotion Embedding"):
154
+ character = gr.Radio(['九条都', '新海天', '结城希亚', '蕾娜', '索菲'], label='character',
155
+ info="select character you want")
156
+
157
+ text = gr.TextArea(label="input content, Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")
158
+ emotion = gr.Textbox(label="从多人模型中获得的情感依照。例如”./9nineEmo/sf/sf0207.wav.emo.npy“,尽量使用本人的情感他人的情感会串味")
159
+ submit = gr.Button("generate", variant='privite')
160
+ message = gr.Textbox(label="Message")
161
+ audio = gr.Audio(label="output")
162
+
163
+ submit.click(nineMul_select_emo, [character, text,emotion], [message, audio])
164
+ app.launch()