Spaces:
Running
Running
import os | |
import gradio as gr | |
import random | |
os.system("pip install --upgrade Cython==0.29.35") | |
os.system("pip install pysptk --no-build-isolation") | |
os.system("pip install kantts -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html") | |
os.system("pip install librosa==0.9.2") | |
os.system("pip install numpy==1.22.4") | |
from modelscope.models.audio.tts import SambertHifigan | |
from modelscope.pipelines import pipeline | |
from modelscope.utils.constant import Tasks | |
from voicefixer import VoiceFixer | |
voicefixer = VoiceFixer() | |
# model_0 | |
model_dir = os.path.abspath("./pretrain_work_dir") | |
custom_infer_abs = { | |
'voice_name': | |
'F7', | |
'am_ckpt': | |
os.path.join(model_dir, 'tmp_am', 'ckpt'), | |
'am_config': | |
os.path.join(model_dir, 'tmp_am', 'config.yaml'), | |
'voc_ckpt': | |
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'), | |
'voc_config': | |
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', | |
'config.yaml'), | |
'audio_config': | |
os.path.join(model_dir, 'data', 'audio_config.yaml'), | |
'se_file': | |
os.path.join(model_dir, 'data', 'se', 'se.npy') | |
} | |
kwargs = {'custom_ckpt': custom_infer_abs} | |
model_id = SambertHifigan(os.path.join(model_dir, "orig_model"), **kwargs) | |
inference = pipeline(task=Tasks.text_to_speech, model=model_id) | |
# model_1 | |
model_dir1 = os.path.abspath("./jay/pretrain_work_dir") | |
custom_infer_abs1 = { | |
'voice_name': | |
'F7', | |
'am_ckpt': | |
os.path.join(model_dir1, 'tmp_am', 'ckpt'), | |
'am_config': | |
os.path.join(model_dir1, 'tmp_am', 'config.yaml'), | |
'voc_ckpt': | |
os.path.join(model_dir1, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'), | |
'voc_config': | |
os.path.join(model_dir1, 'orig_model', 'basemodel_16k', 'hifigan', | |
'config.yaml'), | |
'audio_config': | |
os.path.join(model_dir1, 'data', 'audio_config.yaml'), | |
'se_file': | |
os.path.join(model_dir1, 'data', 'se', 'se.npy') | |
} | |
kwargs1 = {'custom_ckpt': custom_infer_abs1} | |
model_id1 = SambertHifigan(os.path.join(model_dir1, "orig_model"), **kwargs1) | |
inference1 = pipeline(task=Tasks.text_to_speech, model=model_id1) | |
# functions | |
def infer(text): | |
output = inference(input=text) | |
filename = str(random.randint(1, 1000000000000)) | |
with open(filename + "myfile.wav", mode='bx') as f: | |
f.write(output["output_wav"]) | |
return filename + "myfile.wav" | |
def infer1(text): | |
output = inference1(input=text) | |
filename = str(random.randint(1, 1000000000000)) | |
with open(filename + "file.wav", mode='bx') as f: | |
f.write(output["output_wav"]) | |
return filename + "file.wav" | |
# upsample | |
import numpy as np | |
import torch | |
from hifi_gan_bwe import BandwidthExtender | |
from scipy.io.wavfile import write | |
MAX_LENGTH = 600.0 | |
model = BandwidthExtender.from_pretrained("hifi-gan-bwe-10-42890e3-vctk-48kHz") | |
def extend(audio): | |
fs, x = audio | |
x = x[:int(MAX_LENGTH * fs)] | |
x = x.astype(np.float32) / 32767.0 | |
if len(x.shape) == 1: | |
x = x[:, np.newaxis] | |
with torch.no_grad(): | |
y = np.stack([model(torch.from_numpy(x), fs) for x in x.T]).T | |
y = (y * 32767.0).astype(np.int16) | |
fs = int(model.sample_rate) | |
write("upsample.wav", fs, y) | |
return "upsample.wav" | |
# denoise | |
def inference_denoise(audio): | |
voicefixer.restore(input=audio, # input wav file path | |
output="output.wav", # output wav file path | |
cuda=False, # whether to use gpu acceleration | |
mode = int(0)) # You can try out mode 0, 1 to find out the best result | |
return 'output.wav' | |
app = gr.Blocks() | |
with app: | |
gr.Markdown("# <center>🥳🎶🎡 - Sambert中文声音克隆</center>") | |
gr.Markdown("## <center>🌟 - 训练3分钟,推理5秒钟,中英文自然发音、真实拟声 </center>") | |
gr.Markdown("### <center>🌊 - 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>") | |
with gr.Row(): | |
with gr.Column(): | |
inp = gr.Textbox(lines=5, label="请填写您想要转换的中文文本") | |
with gr.Row(): | |
btn = gr.Button("使用AI娜娜的声音", variant="primary") | |
btn1 = gr.Button("使用AI小杰的声音", variant="primary") | |
with gr.Column(): | |
with gr.Row(): | |
out = gr.Audio(label="为您生成的专属音频", interactive=False) | |
out1 = gr.Audio(label="更高采样率的专属音频", type="filepath", interactive=False) | |
out2 = gr.Audio(label="降噪后的高采样率音频", type="filepath", interactive=False) | |
with gr.Row(): | |
btn2 = gr.Button("一键提高采样率") | |
btn3 = gr.Button("一键降噪") | |
btn.click(fn=infer, inputs=[inp], outputs=[out]) | |
btn1.click(fn=infer1, inputs=[inp], outputs=[out]) | |
btn2.click(fn=extend, inputs=[out], outputs=[out1]) | |
btn3.click(fn=inference_denoise, inputs=[out1], outputs=[out2]) | |
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>") | |
gr.HTML(''' | |
<div class="footer"> | |
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘 | |
</p> | |
</div> | |
''') | |
app.launch(show_error=True) |