Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -4,9 +4,9 @@ from TTS.utils.manage import ModelManager
|
|
4 |
from TTS.utils.synthesizer import Synthesizer
|
5 |
|
6 |
manager = ModelManager()
|
7 |
-
|
8 |
synthesizer = Synthesizer(
|
9 |
-
|
10 |
)
|
11 |
|
12 |
import os
|
@@ -142,15 +142,9 @@ SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encod
|
|
142 |
|
143 |
# Define helper function
|
144 |
|
145 |
-
def compute_spec(ref_file):
|
146 |
-
y, sr = librosa.load(ref_file, sr=ap.sample_rate)
|
147 |
-
spec = ap.spectrogram(y)
|
148 |
-
spec = torch.FloatTensor(spec).unsqueeze(0)
|
149 |
-
return spec
|
150 |
-
|
151 |
-
|
152 |
-
def voice_conversion(apikey, upload, audio):
|
153 |
|
|
|
|
|
154 |
openai.api_key = apikey
|
155 |
|
156 |
# load audio and pad/trim it to fit 30 seconds
|
@@ -186,22 +180,26 @@ def voice_conversion(apikey, upload, audio):
|
|
186 |
wavs = synthesizer.tts(chat_response + "。")
|
187 |
|
188 |
synthesizer.save_wav(wavs, "output.wav")
|
189 |
-
#tts.tts_to_file(chat_response + "。", file_path="output.wav")
|
190 |
-
|
191 |
-
target_audio = 'target.wav'
|
192 |
-
reference_audio = 'reference.wav'
|
193 |
-
driving_audio = 'driving.wav'
|
194 |
|
195 |
-
|
196 |
|
197 |
-
|
|
|
|
|
|
|
|
|
198 |
|
199 |
-
#data1 = np.asarray(data1, dtype=np.int16)
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
# !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
|
206 |
# !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
|
207 |
# !ffmpeg-normalize $driving_audio -nt rms -t=-27 -o $driving_audio -ar 16000 -f
|
@@ -245,7 +243,7 @@ def voice_conversion(apikey, upload, audio):
|
|
245 |
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
246 |
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
|
247 |
|
248 |
-
return
|
249 |
|
250 |
c1=gr.Interface(
|
251 |
fn=voice_conversion,
|
@@ -278,4 +276,61 @@ c2=gr.Interface(
|
|
278 |
)
|
279 |
|
280 |
demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!')
|
281 |
-
demo.launch(show_error = True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from TTS.utils.synthesizer import Synthesizer
|
5 |
|
6 |
manager = ModelManager()
|
7 |
+
model_path1, config_path, model_item = manager.download_model("tts_models/zh-CN/baker/tacotron2-DDC-GST")
|
8 |
synthesizer = Synthesizer(
|
9 |
+
model_path1, config_path, None, None, None,
|
10 |
)
|
11 |
|
12 |
import os
|
|
|
142 |
|
143 |
# Define helper function
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
+
def chatgpt(apikey, audio):
|
147 |
+
|
148 |
openai.api_key = apikey
|
149 |
|
150 |
# load audio and pad/trim it to fit 30 seconds
|
|
|
180 |
wavs = synthesizer.tts(chat_response + "。")
|
181 |
|
182 |
synthesizer.save_wav(wavs, "output.wav")
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
+
return [result.text, chat_response, "output.wav"]
|
185 |
|
186 |
+
def compute_spec(ref_file):
|
187 |
+
y, sr = librosa.load(ref_file, sr=ap.sample_rate)
|
188 |
+
spec = ap.spectrogram(y)
|
189 |
+
spec = torch.FloatTensor(spec).unsqueeze(0)
|
190 |
+
return spec
|
191 |
|
|
|
192 |
|
193 |
+
def voice_conversion(ta, ra, da):
|
194 |
+
|
195 |
+
target_audio = 'target.wav'
|
196 |
+
reference_audio = 'reference.wav'
|
197 |
+
driving_audio = 'driving.wav'
|
198 |
+
|
199 |
+
write(target_audio, ta[0], ta[1])
|
200 |
+
write(reference_audio, ra[0], ra[1])
|
201 |
+
write(driving_audio, da[0], da[1])
|
202 |
+
|
203 |
# !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
|
204 |
# !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
|
205 |
# !ffmpeg-normalize $driving_audio -nt rms -t=-27 -o $driving_audio -ar 16000 -f
|
|
|
243 |
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
244 |
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
|
245 |
|
246 |
+
return "enhanced.wav"
|
247 |
|
248 |
c1=gr.Interface(
|
249 |
fn=voice_conversion,
|
|
|
276 |
)
|
277 |
|
278 |
demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!')
|
279 |
+
demo.launch(show_error = True)
|
280 |
+
block = gr.Blocks()
|
281 |
+
|
282 |
+
with block:
|
283 |
+
with gr.Group():
|
284 |
+
gr.Markdown(
|
285 |
+
""" # <center>🥳💬💕 - TalktoAI,随时随地,谈天说地!</center>
|
286 |
+
|
287 |
+
## <center>🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!</center>
|
288 |
+
|
289 |
+
"""
|
290 |
+
)
|
291 |
+
|
292 |
+
with gr.Box():
|
293 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
294 |
+
|
295 |
+
inp1 = gr.components.Textbox(lines=2, label="请填写您的OpenAI-API-key")
|
296 |
+
inp2 = gr.Audio(source="microphone", type="filepath",label="说些什么吧")
|
297 |
+
|
298 |
+
btn = gr.Button("开始对话吧")
|
299 |
+
|
300 |
+
yousay = gr.Textbox(lines=3, label="您的提问")
|
301 |
+
texts = gr.Textbox(lines=5, label="ChatGPT的回答")
|
302 |
+
audio_tts = gr.Audio(label="自动合成的声音")
|
303 |
+
|
304 |
+
btn.click(chatgpt, [inp1, inp2], [yousay, texts, audio_tts])
|
305 |
+
|
306 |
+
with gr.Box():
|
307 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
308 |
+
inp3 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件, max. 30mb)", type="filepath")
|
309 |
+
inp4 = audio_tts
|
310 |
+
inp5 = audio_tts
|
311 |
+
|
312 |
+
btn1 = gr.Button("用喜欢的声音听一听吧")
|
313 |
+
|
314 |
+
out1 = gr.Audio(label="声音拟合的专属声音")
|
315 |
+
|
316 |
+
btn1.click(voice_conversion, [inp3, inp4, inp5], [out1])
|
317 |
+
|
318 |
+
gr.Markdown(
|
319 |
+
"""
|
320 |
+
|
321 |
+
### <center>注意❗:请不要输入或生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关,请自觉合法合规使用,违反者一切后果自负。</center>
|
322 |
+
|
323 |
+
### <center>Model by [Raven](https://huggingface.co/spaces/BlinkDL/Raven-RWKV-7B). Thanks to [PENG Bo](https://github.com/BlinkDL). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
|
324 |
+
|
325 |
+
"""
|
326 |
+
)
|
327 |
+
|
328 |
+
gr.HTML('''
|
329 |
+
<div class="footer">
|
330 |
+
<p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
|
331 |
+
</p>
|
332 |
+
</div>
|
333 |
+
''')
|
334 |
+
|
335 |
+
|
336 |
+
block.launch(show_error=True)
|