tts / test.py
zxsipola123456's picture
Create test.py
7c37268 verified
raw
history blame
2.88 kB
import os
import tempfile
from openai import OpenAI
from tts_voice import tts_order_voice
import edge_tts
import anyio
import torch
import torchaudio
import gradio as gr
from scipy.io import wavfile
from scipy.io.wavfile import write
import numpy as np
# 创建 KNN-VC 模型
knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')
# 初始化 language_dict
language_dict = tts_order_voice
# 异步文字转语音函数
async def text_to_speech_edge(text, language_code):
voice = language_dict[language_code]
communicate = edge_tts.Communicate(text, voice)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return "语音合成完成:{}".format(text), tmp_path
# 音频填充函数
def pad_audio(data, target_length):
if len(data) < target_length:
pad_length = target_length - len(data)
data = np.pad(data, (0, pad_length), mode='constant')
return data
# 声音更改函数
def voice_change(audio_in, audio_ref):
samplerate1, data1 = wavfile.read(audio_in)
samplerate2, data2 = wavfile.read(audio_ref)
# 使两个音频长度一致
target_length = max(len(data1), len(data2))
data1 = pad_audio(data1, target_length)
data2 = pad_audio(data2, target_length)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_in, \
tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_ref:
audio_in_path = tmp_audio_in.name
audio_ref_path = tmp_audio_ref.name
write(audio_in_path, samplerate1, data1)
write(audio_ref_path, samplerate2, data2)
query_seq = knn_vc.get_features(audio_in_path)
matching_set = knn_vc.get_matching_set([audio_ref_path])
print("query_seq shape:", query_seq.shape)
print("matching_set shape:", matching_set.shape)
# 确保 query_seq 和 matching_set 维度一致
if query_seq.shape[0] > matching_set.shape[1]:
query_seq = query_seq[:matching_set.shape[1]]
elif query_seq.shape[0] < matching_set.shape[1]:
matching_set = matching_set[:, :query_seq.shape[0], :]
out_wav = knn_vc.match(query_seq, matching_set, topk=4)
# 确保 out_wav 是二维张量
if len(out_wav.shape) == 1:
out_wav = out_wav.unsqueeze(0)
output_path = 'output.wav'
torchaudio.save(output_path, out_wav, 16000)
return output_path
# 示例使用 gradio 界面
def gradio_interface(audio_in, audio_ref):
return voice_change(audio_in, audio_ref)
# 创建 Gradio 界面
iface = gr.Interface(fn=gradio_interface,
inputs=["audio", "audio"],
outputs="audio",
title="KNN-VC Voice Changer")
if __name__ == "__main__":
iface.launch()