Kevin676 commited on
Commit
0ecdad8
·
1 Parent(s): 980d7c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -13
app.py CHANGED
@@ -2,15 +2,11 @@ from TTS.api import TTS
2
  tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
3
  import whisper
4
  model = whisper.load_model("small")
5
- import openai
6
- import os
7
- os.system('pip install gradio==2.3.0a0')
8
- os.system('pip install voicefixer --upgrade')
9
-
10
  import gradio as gr
11
-
12
- from voicefixer import VoiceFixer
13
- voicefixer = VoiceFixer()
14
 
15
  mes1 = [
16
  {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback. Replace the Arabic numerals with the corresponding English words in your response."}
@@ -72,12 +68,19 @@ def transcribe(apikey, upload, audio, choice1):
72
 
73
  audio_in = "output.wav"
74
 
75
- voicefixer.restore(input=audio_in, # input wav file path
76
- output="output1.wav", # output wav file path
77
- cuda=True, # whether to use gpu acceleration
78
- mode = 0) # You can try out mode 0, 1 to find out the best result
 
 
 
 
79
 
80
- return [result.text, chat_response, "output1.wav"]
 
 
 
81
 
82
  output_1 = gr.Textbox(label="Speech to Text")
83
  output_2 = gr.Textbox(label="ChatGPT Output")
 
2
  tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
3
  import whisper
4
  model = whisper.load_model("small")
5
+ import torch
6
+ import torchaudio
7
+ from speechbrain.pretrained import SpectralMaskEnhancement
 
 
8
  import gradio as gr
9
+ import openai
 
 
10
 
11
  mes1 = [
12
  {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback. Replace the Arabic numerals with the corresponding English words in your response."}
 
68
 
69
  audio_in = "output.wav"
70
 
71
+ enhance_model = SpectralMaskEnhancement.from_hparams(
72
+ source="speechbrain/metricgan-plus-voicebank",
73
+ savedir="pretrained_models/metricgan-plus-voicebank",
74
+ )
75
+
76
+ noisy = enhance_model.load_audio(
77
+ audio_in
78
+ ).unsqueeze(0)
79
 
80
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
81
+ torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
82
+
83
+ return [result.text, chat_response, "enhanced.wav"]
84
 
85
  output_1 = gr.Textbox(label="Speech to Text")
86
  output_2 = gr.Textbox(label="ChatGPT Output")