Spaces:

AriaMei
/

TTSdemo

Running

App Files Files Community

AriaMei commited on Apr 1, 2023

Commit

47716cc

1 Parent(s): bcbb246

更新app，可从9nineEmo文件夹中选择情感

Browse files

Files changed (1) hide show

app.py +164 -0

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import gradio as gr
+import torch
+import commons
+import utils
+from models import SynthesizerTrn
+from text.symbols import symbols
+from text import text_to_sequence
+import random
+import os
+import datetime
+import numpy as np
+def get_text(text, hps):
+    text_norm = text_to_sequence(text, hps.data.text_cleaners)
+    if hps.data.add_blank:
+        text_norm = commons.intersperse(text_norm, 0)
+    text_norm = torch.LongTensor(text_norm)
+    return text_norm
+def tts(txt, emotion, index, hps, net_g, random_emotion_root):
+    """emotion为参考情感音频路径 或random_sample（随机抽取）"""
+    stn_tst = get_text(txt, hps)
+    rand_wav = ""
+    with torch.no_grad():
+        x_tst = stn_tst.unsqueeze(0)
+        x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
+        sid = torch.LongTensor([index])  ##appoint character
+        if os.path.exists(f"{emotion}"):
+            emo = torch.FloatTensor(np.load(f"{emotion}")).unsqueeze(0)
+            rand_wav = emotion
+        elif emotion == "random_sample":
+            while True:
+                rand_wav = random.sample(os.listdir(random_emotion_root), 1)[0]
+                if os.path.exists(f"{random_emotion_root}/{rand_wav}"):
+                    break
+            emo = torch.FloatTensor(np.load(f"{random_emotion_root}/{rand_wav}")).unsqueeze(0)
+            print(f"{random_emotion_root}/{rand_wav}")
+        else:
+            print("emotion参数不正确")
+        audio = \
+            net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1, emo=emo)[
+                0][
+                0, 0].data.float().numpy()
+        path = random_emotion_root+"/"+rand_wav
+        return audio,path
+def random_generate(txt, index, hps, net_g, random_emotion_root):
+    audio ,rand_wav= tts(txt, emotion='random_sample', index=index, hps=hps, net_g=net_g,
+                random_emotion_root=random_emotion_root)
+    return audio,rand_wav
+def charaterRoot(name):
+    global random_emotion_root
+    if name == '九条都':
+        random_emotion_root = "./9nineEmo/my"
+        index = 0
+    elif name == '新海天':
+        random_emotion_root = "./9nineEmo/sr"
+        index = 1
+    elif name == '结城希亚':
+        random_emotion_root = "./9nineEmo/na"
+        index = 2
+    elif name == '蕾娜':
+        random_emotion_root = "./9nineEmo/gt"
+        index = 3
+    elif name == '索菲':
+        random_emotion_root = "./9nineEmo/sf"
+        index = 4
+    return random_emotion_root, index
+def configSelect(config):
+    global checkPonit, config_file
+    if config == 'mul':
+        config_file = "./configs/9nine_multi.json"
+        checkPonit = "logs/9nineM/G_252000.pth"
+    elif config == "single":
+        config_file = "./configs/sora.json"
+        checkPonit = "logs/sora/G_341200.pth"
+    return config_file, checkPonit
+def runVits(name, config, txt,emotion):
+    config_file, checkPoint = configSelect(config)
+    random_emotion_root, index = charaterRoot(name=name)
+    checkPonit = checkPoint
+    hps = utils.get_hparams_from_file(config_file)
+    net_g = SynthesizerTrn(
+        len(symbols),
+        hps.data.filter_length // 2 + 1,
+        hps.train.segment_size // hps.data.hop_length,
+        n_speakers=hps.data.n_speakers,
+        **hps.model)
+    _ = net_g.eval()
+    _ = utils.load_checkpoint(checkPonit, net_g, None)
+    audio, rand_wav = tts(txt, emotion=emotion, index=index, hps=hps, net_g=net_g,
+                          random_emotion_root=random_emotion_root)
+    return (hps.data.sampling_rate, audio),rand_wav
+def nineMul(name, txt):
+    config = 'mul'
+    audio ,rand_wav= runVits(name, config, txt,'random_sample')
+    return "multiple model success", audio,rand_wav
+def nineSingle(name,txt):
+    config = 'single'
+    # name = "新海天"
+    audio ,rand_wav= runVits(name, config, txt,'random_sample')
+    return "single model success", audio,rand_wav
+def nineMul_select_emo(name, txt,emo):
+    config = 'mul'
+    # emo = "./9nine"emotion
+    print(emo)
+    audio, _ = runVits(name, config, txt, emo)
+    message = "情感依赖：" + emo + "sythesis success!"
+    return message,audio
+app = gr.Blocks()
+with app:
+    with gr.Tabs():
+        with gr.TabItem("9nine multiple model"):
+            character = gr.Radio(['九条都', '新海天', '结城希亚', '蕾娜', '索菲'], label='character',
+                                 info="select character you want")
+            text = gr.TextArea(label="input content，Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")
+            submit = gr.Button("generate", variant='privite')
+            message = gr.Textbox(label="Message")
+            audio = gr.Audio(label="output")
+            emotion = gr.Textbox(label="参照情感:")
+            submit.click(nineMul, [character, text], [message, audio,emotion])
+        with gr.TabItem("9nine single model"):
+            character = gr.Radio(['新海天'], label='character',
+                                 info="single model for 新海天 only")
+            text = gr.TextArea(label="input content，Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")
+            submit = gr.Button("generate", variant='privite')
+            message = gr.Textbox(label="Message")
+            audio = gr.Audio(label="output")
+            emotion = gr.Textbox(label="参照情感:")
+            submit.click(nineSingle, [character, text], [message, audio,emotion])
+        with gr.TabItem("Choose Emotion Embedding"):
+            character = gr.Radio(['九条都', '新海天', '结城希亚', '蕾娜', '索菲'], label='character',
+                                 info="select character you want")
+            text = gr.TextArea(label="input content, Japanese support only", value="祭りに行っただよね、知らない女の子と一緒にいて。")
+            emotion = gr.Textbox(label="从多人模型中获得的情感依照。例如”./9nineEmo/sf/sf0207.wav.emo.npy“,尽量使用本人的情感他人的情感会串味")
+            submit = gr.Button("generate", variant='privite')
+            message = gr.Textbox(label="Message")
+            audio = gr.Audio(label="output")
+            submit.click(nineMul_select_emo, [character, text,emotion], [message, audio])
+app.launch()