File size: 4,836 Bytes
eb47f12
cd87708
 
3ea90d5
eb47f12
cd87708
786bfcc
eb47f12
cd87708
 
 
 
eb47f12
0b0500e
42b0cc1
eb47f12
 
62848ce
cd87708
0e5f7d6
cd87708
 
 
17693a4
cd87708
 
eb47f12
cd87708
 
0e5f7d6
eb47f12
 
0e5f7d6
62848ce
 
0e5f7d6
 
 
974e3f8
0e5f7d6
 
eb47f12
0e5f7d6
 
 
 
 
eb47f12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd87708
eb47f12
 
 
 
 
81240b7
eb47f12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5721f7
 
eb47f12
 
 
 
 
 
 
 
 
 
 
 
81240b7
eb47f12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ea90d5
eb47f12
81240b7
eb47f12
 
 
 
0b0500e
eb47f12
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
import openai
import whisper
import gradio as gr

openai.api_key = os.environ.get('SessionToken')

whisper_model = whisper.load_model("small")

conversation = ""
user_name = "MH"
bot_name = "bbDemo"

def chat_hf(audio):
    conversation = ""
    try:
        whisper_text = translate(audio)
        user_input = whisper_text

        # Conversation route
        prompt = user_name + ": " + user_input + "\n" + bot_name+ ": "
        conversation += prompt  # allows for context
        # fetch the response from open AI api
        response = openai.Completion.create(engine='text-davinci-003', prompt=conversation, max_tokens=50)
        response_str = response["choices"][0]["text"].replace("\n", "")
        response_str = response_str.split(user_name + ": ", 1)[0].split(bot_name + ": ", 1)[0]
        
        conversation += response_str + "\n"
    
        gpt_response = response_str

    except:
        # Conversation route
        whisper_text = translate(audio)
        user_input = whisper_text
        prompt = user_name + ": " + user_input + "\n" + bot_name+ ": "
        conversation += prompt  # allows for context
        # fetch the response from open AI api
        response = openai.Completion.create(engine='text-davinci-003', prompt=conversation, max_tokens=1024)
        response_str = response["choices"][0]["text"].replace("\n", "")
        response_str = response_str.split(user_name + ": ", 1)[0].split(bot_name + ": ", 1)[0]
        
        conversation += response_str + "\n"
    
        gpt_response = response_str
        print("Error")
 
    
    return whisper_text, gpt_response


def translate(audio):
    print("""
    β€”
    Sending audio to Whisper ...
    β€”
    """)
    
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)
    
    mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
    
    _, probs = whisper_model.detect_language(mel)
    
    transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)    
    transcription = whisper.decode(whisper_model, mel, transcript_options)
    
    print("language spoken: " + transcription.language)
    print("transcript: " + transcription.text)
    print("β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”")  

    return transcription.text

title = """
    <div style="text-align: center; max-width: 500px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
            margin-bottom: 10px;
        "
        >
        <h1 style="font-weight: 600; margin-bottom: 7px;">
            Whisper to chatGPT
        </h1>
        </div>
        <p style="margin-bottom: 10px;font-size: 94%;font-weight: 100;line-height: 1.5em;">
        Chat with GPT with your voice in your native language!
        <br />
        </p>
        <p style="font-size: 94%">
            <span style="display: flex;align-items: center;justify-content: center;height: 30px;">
            </span>
        </p>
    </div>
"""

article = """
    <div class="footer">
        <p><a href="https://chat.openai.com/chat" target="_blank">chatGPT</a> 
        by <a href="https://openai.com/" style="text-decoration: underline;" target="_blank">OpenAI</a> - 
        Gradio Demo by πŸ€— <a href="https://englishphonetics.net/" target="_blank">Eriberto Oliveira</a>
        </p>
    </div>
"""

css = '''
    #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
    a {text-decoration-line: underline; font-weight: 600;}
    .footer {
            margin-bottom: 45px;
            margin-top: 35px;
            text-align: center;
            border-bottom: 1px solid #e5e5e5;
        }
        .footer>p {
            font-size: .8rem;
            display: inline-block;
            padding: 0 10px;
            transform: translateY(10px);
            background: white;
        }
        .dark .footer {
            border-color: #303030;
        }
        .dark .footer>p {
            background: #0b0f19;
        }
'''
 

with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        
        gr.HTML(title)
        
        with gr.Row():
            record_input = gr.Audio(source="microphone",type="filepath", show_label=False)
            send_btn = gr.Button("Send my message !")
   
    with gr.Column():
        audio_translation = gr.Textbox(type="text",label="Whisper transcription")
        gpt_response = gr.Textbox(type="text",label="chatGPT response")

        gr.HTML(article)
    
    send_btn.click(chat_hf, inputs=[record_input], outputs=[audio_translation, gpt_response])

demo.queue(max_size=32, concurrency_count=20).launch(debug=True)