Spaces:

Kevin676
/

ChatGPT-with-Speech-Enhancement

Runtime error

Kevin676 commited on Apr 1, 2023

Commit

0ecdad8

1 Parent(s): 980d7c0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,15 +2,11 @@ from TTS.api import TTS
 tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
 import whisper
 model = whisper.load_model("small")
-import openai
-import os
-os.system('pip install gradio==2.3.0a0')
-os.system('pip install voicefixer --upgrade')
 import gradio as gr
-from voicefixer import VoiceFixer
-voicefixer = VoiceFixer()
 mes1 = [
     {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback. Replace the Arabic numerals with the corresponding English words in your response."}
@@ -72,12 +68,19 @@ def transcribe(apikey, upload, audio, choice1):
     audio_in = "output.wav"
-    voicefixer.restore(input=audio_in, # input wav file path
-                    output="output1.wav", # output wav file path
-                    cuda=True, # whether to use gpu acceleration
-                    mode = 0) # You can try out mode 0, 1 to find out the best result
-    return [result.text, chat_response, "output1.wav"]
 output_1 = gr.Textbox(label="Speech to Text")
 output_2 = gr.Textbox(label="ChatGPT Output")

 tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
 import whisper
 model = whisper.load_model("small")
+import torch
+import torchaudio
+from speechbrain.pretrained import SpectralMaskEnhancement
 import gradio as gr
+import openai
 mes1 = [
     {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback. Replace the Arabic numerals with the corresponding English words in your response."}
     audio_in = "output.wav"
+    enhance_model = SpectralMaskEnhancement.from_hparams(
+    source="speechbrain/metricgan-plus-voicebank",
+    savedir="pretrained_models/metricgan-plus-voicebank",
+    )
+    noisy = enhance_model.load_audio(
+    audio_in
+    ).unsqueeze(0)
+    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
+    torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
+    return [result.text, chat_response, "enhanced.wav"]
 output_1 = gr.Textbox(label="Speech to Text")
 output_2 = gr.Textbox(label="ChatGPT Output")