Kevin676 commited on
Commit
5ab9daa
·
1 Parent(s): 5ce216b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -24
app.py CHANGED
@@ -4,9 +4,9 @@ from TTS.utils.manage import ModelManager
4
  from TTS.utils.synthesizer import Synthesizer
5
 
6
  manager = ModelManager()
7
- model_path, config_path, model_item = manager.download_model("tts_models/zh-CN/baker/tacotron2-DDC-GST")
8
  synthesizer = Synthesizer(
9
- model_path, config_path, None, None, None,
10
  )
11
 
12
  import os
@@ -142,15 +142,9 @@ SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encod
142
 
143
  # Define helper function
144
 
145
- def compute_spec(ref_file):
146
- y, sr = librosa.load(ref_file, sr=ap.sample_rate)
147
- spec = ap.spectrogram(y)
148
- spec = torch.FloatTensor(spec).unsqueeze(0)
149
- return spec
150
-
151
-
152
- def voice_conversion(apikey, upload, audio):
153
 
 
 
154
  openai.api_key = apikey
155
 
156
  # load audio and pad/trim it to fit 30 seconds
@@ -186,22 +180,26 @@ def voice_conversion(apikey, upload, audio):
186
  wavs = synthesizer.tts(chat_response + "。")
187
 
188
  synthesizer.save_wav(wavs, "output.wav")
189
- #tts.tts_to_file(chat_response + "。", file_path="output.wav")
190
-
191
- target_audio = 'target.wav'
192
- reference_audio = 'reference.wav'
193
- driving_audio = 'driving.wav'
194
 
195
- rate1, data1 = wavfile.read("output.wav")
196
 
197
- #data1 = (data1 * 32767).astype(np.int16)
 
 
 
 
198
 
199
- #data1 = np.asarray(data1, dtype=np.int16)
200
 
201
- write(target_audio, upload[0], upload[1])
202
- write(reference_audio, rate1, data1)
203
- write(driving_audio, rate1, data1)
204
-
 
 
 
 
 
 
205
  # !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
206
  # !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
207
  # !ffmpeg-normalize $driving_audio -nt rms -t=-27 -o $driving_audio -ar 16000 -f
@@ -245,7 +243,7 @@ def voice_conversion(apikey, upload, audio):
245
  enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
246
  torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
247
 
248
- return [result.text, chat_response, "enhanced.wav"]
249
 
250
  c1=gr.Interface(
251
  fn=voice_conversion,
@@ -278,4 +276,61 @@ c2=gr.Interface(
278
  )
279
 
280
  demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!')
281
- demo.launch(show_error = True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from TTS.utils.synthesizer import Synthesizer
5
 
6
  manager = ModelManager()
7
+ model_path1, config_path, model_item = manager.download_model("tts_models/zh-CN/baker/tacotron2-DDC-GST")
8
  synthesizer = Synthesizer(
9
+ model_path1, config_path, None, None, None,
10
  )
11
 
12
  import os
 
142
 
143
  # Define helper function
144
 
 
 
 
 
 
 
 
 
145
 
146
+ def chatgpt(apikey, audio):
147
+
148
  openai.api_key = apikey
149
 
150
  # load audio and pad/trim it to fit 30 seconds
 
180
  wavs = synthesizer.tts(chat_response + "。")
181
 
182
  synthesizer.save_wav(wavs, "output.wav")
 
 
 
 
 
183
 
184
+ return [result.text, chat_response, "output.wav"]
185
 
186
+ def compute_spec(ref_file):
187
+ y, sr = librosa.load(ref_file, sr=ap.sample_rate)
188
+ spec = ap.spectrogram(y)
189
+ spec = torch.FloatTensor(spec).unsqueeze(0)
190
+ return spec
191
 
 
192
 
193
+ def voice_conversion(ta, ra, da):
194
+
195
+ target_audio = 'target.wav'
196
+ reference_audio = 'reference.wav'
197
+ driving_audio = 'driving.wav'
198
+
199
+ write(target_audio, ta[0], ta[1])
200
+ write(reference_audio, ra[0], ra[1])
201
+ write(driving_audio, da[0], da[1])
202
+
203
  # !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
204
  # !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
205
  # !ffmpeg-normalize $driving_audio -nt rms -t=-27 -o $driving_audio -ar 16000 -f
 
243
  enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
244
  torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
245
 
246
+ return "enhanced.wav"
247
 
248
  c1=gr.Interface(
249
  fn=voice_conversion,
 
276
  )
277
 
278
  demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!')
279
+ demo.launch(show_error = True)
280
+ block = gr.Blocks()
281
+
282
+ with block:
283
+ with gr.Group():
284
+ gr.Markdown(
285
+ """ # <center>🥳💬💕 - TalktoAI,随时随地,谈天说地!</center>
286
+
287
+ ## <center>🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!</center>
288
+
289
+ """
290
+ )
291
+
292
+ with gr.Box():
293
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
294
+
295
+ inp1 = gr.components.Textbox(lines=2, label="请填写您的OpenAI-API-key")
296
+ inp2 = gr.Audio(source="microphone", type="filepath",label="说些什么吧")
297
+
298
+ btn = gr.Button("开始对话吧")
299
+
300
+ yousay = gr.Textbox(lines=3, label="您的提问")
301
+ texts = gr.Textbox(lines=5, label="ChatGPT的回答")
302
+ audio_tts = gr.Audio(label="自动合成的声音")
303
+
304
+ btn.click(chatgpt, [inp1, inp2], [yousay, texts, audio_tts])
305
+
306
+ with gr.Box():
307
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
308
+ inp3 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件, max. 30mb)", type="filepath")
309
+ inp4 = audio_tts
310
+ inp5 = audio_tts
311
+
312
+ btn1 = gr.Button("用喜欢的声音听一听吧")
313
+
314
+ out1 = gr.Audio(label="声音拟合的专属声音")
315
+
316
+ btn1.click(voice_conversion, [inp3, inp4, inp5], [out1])
317
+
318
+ gr.Markdown(
319
+ """
320
+
321
+ ### <center>注意❗:请不要输入或生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关,请自觉合法合规使用,违反者一切后果自负。</center>
322
+
323
+ ### <center>Model by [Raven](https://huggingface.co/spaces/BlinkDL/Raven-RWKV-7B). Thanks to [PENG Bo](https://github.com/BlinkDL). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
324
+
325
+ """
326
+ )
327
+
328
+ gr.HTML('''
329
+ <div class="footer">
330
+ <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
331
+ </p>
332
+ </div>
333
+ ''')
334
+
335
+
336
+ block.launch(show_error=True)