Personal-TTS / app.py
kevinwang676's picture
Update app.py
3beb09a
raw
history blame
5.03 kB
import os
import gradio as gr
import random
os.system("pip install kantts -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html")
os.system("pip install numpy==1.22.0")
from modelscope.models.audio.tts import SambertHifigan
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import torch
import torchaudio
from denoiser import pretrained
from denoiser.dsp import convert_audio
# model_0
model_dir = os.path.abspath("./pretrain_work_dir")
custom_infer_abs = {
'voice_name':
'F7',
'am_ckpt':
os.path.join(model_dir, 'tmp_am', 'ckpt'),
'am_config':
os.path.join(model_dir, 'tmp_am', 'config.yaml'),
'voc_ckpt':
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
'voc_config':
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',
'config.yaml'),
'audio_config':
os.path.join(model_dir, 'data', 'audio_config.yaml'),
'se_file':
os.path.join(model_dir, 'data', 'se', 'se.npy')
}
kwargs = {'custom_ckpt': custom_infer_abs}
model_id = SambertHifigan(os.path.join(model_dir, "orig_model"), **kwargs)
inference = pipeline(task=Tasks.text_to_speech, model=model_id)
# model_1
model_dir1 = os.path.abspath("./jay/pretrain_work_dir")
custom_infer_abs1 = {
'voice_name':
'F7',
'am_ckpt':
os.path.join(model_dir1, 'tmp_am', 'ckpt'),
'am_config':
os.path.join(model_dir1, 'tmp_am', 'config.yaml'),
'voc_ckpt':
os.path.join(model_dir1, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
'voc_config':
os.path.join(model_dir1, 'orig_model', 'basemodel_16k', 'hifigan',
'config.yaml'),
'audio_config':
os.path.join(model_dir1, 'data', 'audio_config.yaml'),
'se_file':
os.path.join(model_dir1, 'data', 'se', 'se.npy')
}
kwargs1 = {'custom_ckpt': custom_infer_abs1}
model_id1 = SambertHifigan(os.path.join(model_dir1, "orig_model"), **kwargs1)
inference1 = pipeline(task=Tasks.text_to_speech, model=model_id1)
# functions
def infer(text):
output = inference(input=text)
filename = str(random.randint(1, 1000000000000))
with open(filename + "myfile.wav", mode='bx') as f:
f.write(output["output_wav"])
return filename + "myfile.wav"
def infer1(text):
output = inference1(input=text)
filename = str(random.randint(1, 1000000000000))
with open(filename + "file.wav", mode='bx') as f:
f.write(output["output_wav"])
return filename + "file.wav"
# upsample
import numpy as np
import torch
from hifi_gan_bwe import BandwidthExtender
from scipy.io.wavfile import write
MAX_LENGTH = 600.0
model = BandwidthExtender.from_pretrained("hifi-gan-bwe-10-42890e3-vctk-48kHz")
def extend(audio):
fs, x = audio
x = x[:int(MAX_LENGTH * fs)]
x = x.astype(np.float32) / 32767.0
if len(x.shape) == 1:
x = x[:, np.newaxis]
with torch.no_grad():
y = np.stack([model(torch.from_numpy(x), fs) for x in x.T]).T
y = (y * 32767.0).astype(np.int16)
fs = int(model.sample_rate)
write("upsample.wav", fs, y)
return "upsample.wav"
# denoise
model1 = pretrained.dns64().cuda()
def denoise(audio):
wav, sr = torchaudio.load(audio)
wav = convert_audio(wav.cuda(), sr, model1.sample_rate, model1.chin)
with torch.no_grad():
denoised = model1(wav[None])[0]
write("denoised.wav", model1.sample_rate, denoised.data.cpu().numpy())
return "denoised.wav"
app = gr.Blocks()
with app:
gr.Markdown("# <center>🥳🎶🎡 - KanTTS中文声音克隆</center>")
gr.Markdown("## <center>🌊 - 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
with gr.Row():
with gr.Column():
inp = gr.Textbox(lines=5, label="请填写您想要转换的中文文本")
with gr.Row():
btn = gr.Button("使用AI娜娜的声音", variant="primary")
btn1 = gr.Button("使用AI小杰的声音", variant="primary")
with gr.Column():
with gr.Row():
out = gr.Audio(label="为您生成的专属音频")
out1 = gr.Audio(label="更高采样率的专属音频")
out2 = gr.Audio(label="降噪后的高采样率音频")
btn2 = gr.Button("一键提高采样率并降噪")
btn.click(fn=infer, inputs=[inp], outputs=[out])
btn1.click(fn=infer1, inputs=[inp], outputs=[out])
btn2.click(fn=extend, inputs=[out], outputs=[out1])
btn2.click(fn=denoise, inputs=[out1], outputs=[out2])
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
gr.HTML('''
<div class="footer">
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
</p>
</div>
''')
app.launch(show_error=True)