Kevin676 commited on
Commit
767efd9
·
1 Parent(s): 0e3ff13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -22
app.py CHANGED
@@ -36,6 +36,7 @@ from pydub import AudioSegment
36
  import librosa
37
 
38
  from scipy.io.wavfile import write, read
 
39
 
40
  import subprocess
41
 
@@ -194,16 +195,15 @@ def voice_conversion(apikey, ta, audio, choice1):
194
  tts = gTTS(chat_response, lang='zh-CN')
195
  tts.save("output.wav")
196
 
197
- target_audio = "target.wav"
198
- reference_audio = "output.wav"
199
- driving_audio = "output.wav"
200
 
201
- ra = "output.wav"
202
- da = "output.wav"
203
 
204
  write(target_audio, ta[0], ta[1])
205
- write(reference_audio, ra[0], ra[1])
206
- write(driving_audio, da[0], da[1])
207
 
208
  # !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
209
  # !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
@@ -236,20 +236,19 @@ def voice_conversion(apikey, ta, audio, choice1):
236
  # print("Reference Audio after decoder:")
237
  # IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
238
 
239
- # voicefixer.restore(input=ref_wav_voc, # input wav file path
240
- # output="audio1.wav", # output wav file path
241
- # cuda=True, # whether to use gpu acceleration
242
- # mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
243
 
244
- # noisy = enhance_model.load_audio(
245
- # "audio1.wav"
246
- # ).unsqueeze(0)
247
 
248
- # enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
249
- # torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
250
 
251
- # return [result.text, chat_response, "enhanced.wav"]
252
- return (ap.sample_rate, ref_wav_voc)
253
 
254
  c1=gr.Interface(
255
  fn=voice_conversion,
@@ -260,8 +259,7 @@ c1=gr.Interface(
260
  gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
261
  ],
262
  outputs=[
263
- # gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
264
- gr.Audio(label="Audio with Custom Voice"),
265
  ],
266
  #theme="huggingface",
267
  description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
@@ -276,8 +274,7 @@ c2=gr.Interface(
276
  gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
277
  ],
278
  outputs=[
279
- # gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
280
- gr.Audio(label="Audio with Custom Voice"),
281
  ],
282
  #theme="huggingface",
283
  description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
 
36
  import librosa
37
 
38
  from scipy.io.wavfile import write, read
39
+ from scipy.io import wavfile
40
 
41
  import subprocess
42
 
 
195
  tts = gTTS(chat_response, lang='zh-CN')
196
  tts.save("output.wav")
197
 
198
+ target_audio = 'target.wav'
199
+ reference_audio = 'reference.wav'
200
+ driving_audio = 'driving.wav'
201
 
202
+ rate1, data1 = wavfile.read("mywav.wav")
 
203
 
204
  write(target_audio, ta[0], ta[1])
205
+ write(reference_audio, rate1, data1)
206
+ write(driving_audio, rate1, data1)
207
 
208
  # !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
209
  # !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
 
236
  # print("Reference Audio after decoder:")
237
  # IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
238
 
239
+ voicefixer.restore(input=ref_wav_voc, # input wav file path
240
+ output="audio1.wav", # output wav file path
241
+ cuda=True, # whether to use gpu acceleration
242
+ mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
243
 
244
+ noisy = enhance_model.load_audio(
245
+ "audio1.wav"
246
+ ).unsqueeze(0)
247
 
248
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
249
+ torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
250
 
251
+ return [result.text, chat_response, "enhanced.wav"]
 
252
 
253
  c1=gr.Interface(
254
  fn=voice_conversion,
 
259
  gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
260
  ],
261
  outputs=[
262
+ gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
 
263
  ],
264
  #theme="huggingface",
265
  description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
 
274
  gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
275
  ],
276
  outputs=[
277
+ gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
 
278
  ],
279
  #theme="huggingface",
280
  description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",