Kevin676 commited on
Commit
5be928f
·
1 Parent(s): 4561688

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from TTS.api import TTS
2
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
3
+ from voicefixer import VoiceFixer
4
+ import whisper
5
+ model = whisper.load_model("small")
6
+ import gradio as gr
7
+ import openai
8
+
9
+ voicefixer = VoiceFixer()
10
+
11
+ mes1 = [
12
+ {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback. Replace the Arabic numerals with the corresponding English words in your response."}
13
+ ]
14
+
15
+ mes2 = [
16
+ {"role": "system", "content": "You are a mental health therapist. Your name is Tina. Replace the Arabic numerals with the corresponding English words in your response."}
17
+ ]
18
+
19
+ mes3 = [
20
+ {"role": "system", "content": "You are my personal assistant. Your name is Alice. Replace the Arabic numerals with the corresponding English words in your response."}
21
+ ]
22
+
23
+ res = []
24
+
25
+ def transcribe(apikey, upload, audio, choice1):
26
+
27
+ openai.api_key = apikey
28
+
29
+ # time.sleep(3)
30
+ # load audio and pad/trim it to fit 30 seconds
31
+ audio = whisper.load_audio(audio)
32
+ audio = whisper.pad_or_trim(audio)
33
+
34
+ # make log-Mel spectrogram and move to the same device as the model
35
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
36
+
37
+ # detect the spoken language
38
+ _, probs = model.detect_language(mel)
39
+ print(f"Detected language: {max(probs, key=probs.get)}")
40
+
41
+ # decode the audio
42
+ options = whisper.DecodingOptions()
43
+ result = whisper.decode(model, mel, options)
44
+ res.append(result.text)
45
+
46
+ if choice1 == "TOEFL":
47
+ messages = mes1
48
+ elif choice1 == "Therapist":
49
+ messages = mes2
50
+ elif choice1 == "Alice":
51
+ messages = mes3
52
+
53
+ # chatgpt
54
+ n = len(res)
55
+ content = res[n-1]
56
+ messages.append({"role": "user", "content": content})
57
+
58
+ completion = openai.ChatCompletion.create(
59
+ model = "gpt-3.5-turbo",
60
+ messages = messages
61
+ )
62
+
63
+ chat_response = completion.choices[0].message.content
64
+
65
+ messages.append({"role": "assistant", "content": chat_response})
66
+
67
+ tts.tts_to_file(chat_response, speaker_wav = upload, language="en", file_path="output.wav")
68
+
69
+ audio_in = "output.wav"
70
+
71
+ voicefixer.restore(input=audio_in, # input wav file path
72
+ output="output1.wav", # output wav file path
73
+ cuda=True, # whether to use gpu acceleration
74
+ mode = 0) # You can try out mode 0, 1 to find out the best result
75
+
76
+ return [result.text, chat_response, "output1.wav"]
77
+
78
+ output_1 = gr.Textbox(label="Speech to Text")
79
+ output_2 = gr.Textbox(label="ChatGPT Output")
80
+ output_3 = gr.Audio(label="Audio")
81
+
82
+ gr.Interface(
83
+ title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!',
84
+ theme="huggingface",
85
+ description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI,Enable the future!",
86
+ fn=transcribe,
87
+ inputs=[
88
+ gr.Textbox(lines=1, label = "请填写您的OpenAI_API_key"),
89
+ gr.inputs.Audio(source="upload", label = "请上传您喜欢的声音", type="filepath"),
90
+ gr.inputs.Audio(source="microphone", type="filepath"),
91
+ gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
92
+ ],
93
+ outputs=[
94
+ output_1, output_2, output_3
95
+ ],
96
+ ).launch()