Kevin676 commited on
Commit
26b2fa8
·
1 Parent(s): e10209c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -9
app.py CHANGED
@@ -38,6 +38,38 @@ from TTS.tts.models import setup_model
38
  from TTS.config import load_config
39
  from TTS.tts.models.vits import *
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  OUT_PATH = 'out/'
42
 
43
  # create output path
@@ -103,8 +135,47 @@ def compute_spec(ref_file):
103
 
104
 
105
 
106
- def greet(Text,Voicetoclone,VoiceMicrophone):
107
- text= "%s" % (Text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  if Voicetoclone is not None:
109
  reference_files= "%s" % (Voicetoclone)
110
  print("path url")
@@ -154,12 +225,40 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
154
  out_path = os.path.join(OUT_PATH, file_name)
155
  print(" > Saving output to {}".format(out_path))
156
  ap.save_wav(wav, out_path)
157
- return out_path
158
 
159
- demo = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  fn=greet,
161
- inputs=[gr.inputs.Textbox(label='What would you like the voice to say? (max. 2000 characters per request)'),gr.Audio(type="filepath", source="upload",label='Please upload a voice to clone (max. 30mb)'),gr.Audio(source="microphone", type="filepath", streaming=True)],
162
- outputs="audio",
163
- title="Bilal's Voice Cloning Tool"
164
- )
165
- demo.launch()
 
 
 
 
 
 
 
38
  from TTS.config import load_config
39
  from TTS.tts.models.vits import *
40
 
41
+ import whisper
42
+ model = whisper.load_model("small")
43
+ import os
44
+ os.system('pip install voicefixer --upgrade')
45
+ from voicefixer import VoiceFixer
46
+ voicefixer = VoiceFixer()
47
+ import gradio as gr
48
+ import openai
49
+ import torch
50
+ import torchaudio
51
+ from speechbrain.pretrained import SpectralMaskEnhancement
52
+
53
+ enhance_model = SpectralMaskEnhancement.from_hparams(
54
+ source="speechbrain/metricgan-plus-voicebank",
55
+ savedir="pretrained_models/metricgan-plus-voicebank",
56
+ run_opts={"device":"cuda"},
57
+ )
58
+
59
+ mes1 = [
60
+ {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback."}
61
+ ]
62
+
63
+ mes2 = [
64
+ {"role": "system", "content": "You are a mental health therapist. Your name is Tina."}
65
+ ]
66
+
67
+ mes3 = [
68
+ {"role": "system", "content": "You are my personal assistant. Your name is Alice."}
69
+ ]
70
+
71
+ res = []
72
+
73
  OUT_PATH = 'out/'
74
 
75
  # create output path
 
135
 
136
 
137
 
138
+ def greet(apikey,Voicetoclone,VoiceMicrophone, audio, choice1):
139
+
140
+ openai.api_key = apikey
141
+
142
+ audio = whisper.load_audio(audio)
143
+ audio = whisper.pad_or_trim(audio)
144
+
145
+ # make log-Mel spectrogram and move to the same device as the model
146
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
147
+
148
+ # detect the spoken language
149
+ _, probs = model.detect_language(mel)
150
+ print(f"Detected language: {max(probs, key=probs.get)}")
151
+
152
+ # decode the audio
153
+ options = whisper.DecodingOptions()
154
+ result = whisper.decode(model, mel, options)
155
+ res.append(result.text)
156
+
157
+ if choice1 == "TOEFL":
158
+ messages = mes1
159
+ elif choice1 == "Therapist":
160
+ messages = mes2
161
+ elif choice1 == "Alice":
162
+ messages = mes3
163
+
164
+ # chatgpt
165
+ n = len(res)
166
+ content = res[n-1]
167
+ messages.append({"role": "user", "content": content})
168
+
169
+ completion = openai.ChatCompletion.create(
170
+ model = "gpt-3.5-turbo",
171
+ messages = messages
172
+ )
173
+
174
+ chat_response = completion.choices[0].message.content
175
+
176
+ messages.append({"role": "assistant", "content": chat_response})
177
+
178
+ text= chat_response
179
  if Voicetoclone is not None:
180
  reference_files= "%s" % (Voicetoclone)
181
  print("path url")
 
225
  out_path = os.path.join(OUT_PATH, file_name)
226
  print(" > Saving output to {}".format(out_path))
227
  ap.save_wav(wav, out_path)
 
228
 
229
+ voicefixer.restore(input=out_path, # input wav file path
230
+ output="audio1.wav", # output wav file path
231
+ cuda=True, # whether to use gpu acceleration
232
+ mode = 0) # You can try out mode 0, 1 to find out the best result
233
+
234
+
235
+
236
+ noisy = enhance_model.load_audio(
237
+ "audio1.wav"
238
+ ).unsqueeze(0)
239
+
240
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
241
+ torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
242
+
243
+ return [result.text, chat_response, "enhanced.wav"]
244
+
245
+ output_1 = gr.Textbox(label="Speech to Text")
246
+ output_2 = gr.Textbox(label="ChatGPT Output")
247
+ output_3 = gr.Audio(label="Audio with Custom Voice")
248
+
249
+ gr.Interface(
250
+ title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!',
251
+ theme="huggingface",
252
+ description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
253
  fn=greet,
254
+ inputs=[
255
+ gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
256
+ gr.inputs.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)", type="filepath"),
257
+ gr.inputs.Audio(source="microphone", label = "请用语音上传您喜欢的声音,语音和文件上传二选一即可", type="filepath"),
258
+ gr.inputs.Audio(source="microphone", type="filepath"),
259
+ gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
260
+ ],
261
+ outputs=[
262
+ output_1, output_2, output_3
263
+ ],
264
+ ).launch()