ChatGPT-with-Voice-Conversion

Build error

App Files Files Community

Kevin676 commited on Apr 4, 2023

Commit

767efd9

1 Parent(s): 0e3ff13

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -22

app.py CHANGED Viewed

@@ -36,6 +36,7 @@ from pydub import AudioSegment
 import librosa
 from scipy.io.wavfile import write, read
 import subprocess
@@ -194,16 +195,15 @@ def voice_conversion(apikey, ta, audio, choice1):
     tts = gTTS(chat_response, lang='zh-CN')
     tts.save("output.wav")
-    target_audio = "target.wav"
-    reference_audio = "output.wav"
-    driving_audio = "output.wav"
-    ra = "output.wav"
-    da = "output.wav"
     write(target_audio, ta[0], ta[1])
-    write(reference_audio, ra[0], ra[1])
-    write(driving_audio, da[0], da[1])
   # !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
   # !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
@@ -236,20 +236,19 @@ def voice_conversion(apikey, ta, audio, choice1):
   # print("Reference Audio after decoder:")
   # IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
-#    voicefixer.restore(input=ref_wav_voc, # input wav file path
- #                   output="audio1.wav", # output wav file path
-  #                  cuda=True, # whether to use gpu acceleration
-   #                 mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
-   # noisy = enhance_model.load_audio(
-   # "audio1.wav"
-   # ).unsqueeze(0)
-   # enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
-   # torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
-   # return [result.text, chat_response, "enhanced.wav"]
-    return (ap.sample_rate, ref_wav_voc)
 c1=gr.Interface(
     fn=voice_conversion,
@@ -260,8 +259,7 @@ c1=gr.Interface(
         gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
     ],
     outputs=[
-      #  gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
-        gr.Audio(label="Audio with Custom Voice"),
     ],
     #theme="huggingface",
     description = "🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！",
@@ -276,8 +274,7 @@ c2=gr.Interface(
         gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
     ],
     outputs=[
-       # gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
-        gr.Audio(label="Audio with Custom Voice"),
     ],
     #theme="huggingface",
     description = "🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！",

 import librosa
 from scipy.io.wavfile import write, read
+from scipy.io import wavfile
 import subprocess
     tts = gTTS(chat_response, lang='zh-CN')
     tts.save("output.wav")
+    target_audio = 'target.wav'
+    reference_audio = 'reference.wav'
+    driving_audio = 'driving.wav'
+    rate1, data1 = wavfile.read("mywav.wav")
     write(target_audio, ta[0], ta[1])
+    write(reference_audio, rate1, data1)
+    write(driving_audio, rate1, data1)
   # !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
   # !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
   # print("Reference Audio after decoder:")
   # IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
+    voicefixer.restore(input=ref_wav_voc, # input wav file path
+                    output="audio1.wav", # output wav file path
+                    cuda=True, # whether to use gpu acceleration
+                    mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
+    noisy = enhance_model.load_audio(
+    "audio1.wav"
+    ).unsqueeze(0)
+    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
+    torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
+    return [result.text, chat_response, "enhanced.wav"]
 c1=gr.Interface(
     fn=voice_conversion,
         gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
     ],
     outputs=[
+        gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
     ],
     #theme="huggingface",
     description = "🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！",
         gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
     ],
     outputs=[
+        gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
     ],
     #theme="huggingface",
     description = "🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！",