zxsipola123456 commited on
Commit
7c37268
1 Parent(s): 36e8f2b

Create test.py

Browse files
Files changed (1) hide show
  1. test.py +87 -0
test.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from openai import OpenAI
4
+ from tts_voice import tts_order_voice
5
+ import edge_tts
6
+ import anyio
7
+ import torch
8
+ import torchaudio
9
+ import gradio as gr
10
+ from scipy.io import wavfile
11
+ from scipy.io.wavfile import write
12
+ import numpy as np
13
+
14
+ # 创建 KNN-VC 模型
15
+ knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')
16
+
17
+ # 初始化 language_dict
18
+ language_dict = tts_order_voice
19
+
20
+ # 异步文字转语音函数
21
+ async def text_to_speech_edge(text, language_code):
22
+ voice = language_dict[language_code]
23
+ communicate = edge_tts.Communicate(text, voice)
24
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
25
+ tmp_path = tmp_file.name
26
+
27
+ await communicate.save(tmp_path)
28
+
29
+ return "语音合成完成:{}".format(text), tmp_path
30
+
31
+ # 音频填充函数
32
+ def pad_audio(data, target_length):
33
+ if len(data) < target_length:
34
+ pad_length = target_length - len(data)
35
+ data = np.pad(data, (0, pad_length), mode='constant')
36
+ return data
37
+
38
+ # 声音更改函数
39
+ def voice_change(audio_in, audio_ref):
40
+ samplerate1, data1 = wavfile.read(audio_in)
41
+ samplerate2, data2 = wavfile.read(audio_ref)
42
+
43
+ # 使两个音频长度一致
44
+ target_length = max(len(data1), len(data2))
45
+ data1 = pad_audio(data1, target_length)
46
+ data2 = pad_audio(data2, target_length)
47
+
48
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_in, \
49
+ tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_ref:
50
+ audio_in_path = tmp_audio_in.name
51
+ audio_ref_path = tmp_audio_ref.name
52
+ write(audio_in_path, samplerate1, data1)
53
+ write(audio_ref_path, samplerate2, data2)
54
+
55
+ query_seq = knn_vc.get_features(audio_in_path)
56
+ matching_set = knn_vc.get_matching_set([audio_ref_path])
57
+ print("query_seq shape:", query_seq.shape)
58
+ print("matching_set shape:", matching_set.shape)
59
+
60
+ # 确保 query_seq 和 matching_set 维度一致
61
+ if query_seq.shape[0] > matching_set.shape[1]:
62
+ query_seq = query_seq[:matching_set.shape[1]]
63
+ elif query_seq.shape[0] < matching_set.shape[1]:
64
+ matching_set = matching_set[:, :query_seq.shape[0], :]
65
+
66
+ out_wav = knn_vc.match(query_seq, matching_set, topk=4)
67
+
68
+ # 确保 out_wav 是二维张量
69
+ if len(out_wav.shape) == 1:
70
+ out_wav = out_wav.unsqueeze(0)
71
+
72
+ output_path = 'output.wav'
73
+ torchaudio.save(output_path, out_wav, 16000)
74
+ return output_path
75
+
76
+ # 示例使用 gradio 界面
77
+ def gradio_interface(audio_in, audio_ref):
78
+ return voice_change(audio_in, audio_ref)
79
+
80
+ # 创建 Gradio 界面
81
+ iface = gr.Interface(fn=gradio_interface,
82
+ inputs=["audio", "audio"],
83
+ outputs="audio",
84
+ title="KNN-VC Voice Changer")
85
+
86
+ if __name__ == "__main__":
87
+ iface.launch()