tts

Paused

App Files Files Community

zxsipola123456 commited on Jul 16, 2024

Commit

7c37268

•

1 Parent(s): 36e8f2b

Create test.py

Browse files

Files changed (1) hide show

test.py +87 -0

test.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+import tempfile
+from openai import OpenAI
+from tts_voice import tts_order_voice
+import edge_tts
+import anyio
+import torch
+import torchaudio
+import gradio as gr
+from scipy.io import wavfile
+from scipy.io.wavfile import write
+import numpy as np
+# 创建 KNN-VC 模型
+knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')
+# 初始化 language_dict
+language_dict = tts_order_voice
+# 异步文字转语音函数
+async def text_to_speech_edge(text, language_code):
+    voice = language_dict[language_code]
+    communicate = edge_tts.Communicate(text, voice)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+        tmp_path = tmp_file.name
+    await communicate.save(tmp_path)
+    return "语音合成完成：{}".format(text), tmp_path
+# 音频填充函数
+def pad_audio(data, target_length):
+    if len(data) < target_length:
+        pad_length = target_length - len(data)
+        data = np.pad(data, (0, pad_length), mode='constant')
+    return data
+# 声音更改函数
+def voice_change(audio_in, audio_ref):
+    samplerate1, data1 = wavfile.read(audio_in)
+    samplerate2, data2 = wavfile.read(audio_ref)
+    # 使两个音频长度一致
+    target_length = max(len(data1), len(data2))
+    data1 = pad_audio(data1, target_length)
+    data2 = pad_audio(data2, target_length)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_in, \
+         tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_ref:
+        audio_in_path = tmp_audio_in.name
+        audio_ref_path = tmp_audio_ref.name
+        write(audio_in_path, samplerate1, data1)
+        write(audio_ref_path, samplerate2, data2)
+    query_seq = knn_vc.get_features(audio_in_path)
+    matching_set = knn_vc.get_matching_set([audio_ref_path])
+    print("query_seq shape:", query_seq.shape)
+    print("matching_set shape:", matching_set.shape)
+    # 确保 query_seq 和 matching_set 维度一致
+    if query_seq.shape[0] > matching_set.shape[1]:
+        query_seq = query_seq[:matching_set.shape[1]]
+    elif query_seq.shape[0] < matching_set.shape[1]:
+        matching_set = matching_set[:, :query_seq.shape[0], :]
+    out_wav = knn_vc.match(query_seq, matching_set, topk=4)
+    # 确保 out_wav 是二维张量
+    if len(out_wav.shape) == 1:
+        out_wav = out_wav.unsqueeze(0)
+    output_path = 'output.wav'
+    torchaudio.save(output_path, out_wav, 16000)
+    return output_path
+# 示例使用 gradio 界面
+def gradio_interface(audio_in, audio_ref):
+    return voice_change(audio_in, audio_ref)
+# 创建 Gradio 界面
+iface = gr.Interface(fn=gradio_interface,
+                     inputs=["audio", "audio"],
+                     outputs="audio",
+                     title="KNN-VC Voice Changer")
+if __name__ == "__main__":
+    iface.launch()