File size: 3,754 Bytes
98b5314
7f93957
926bc70
7f93957
1b44681
7f93957
b1a4f5a
926bc70
 
 
 
 
 
a2618f3
926bc70
 
 
 
 
2a9c85c
 
926bc70
 
 
 
 
 
 
 
 
 
 
7f93957
 
 
 
 
01bdfbb
7f93957
 
 
 
 
7ab8364
4516d66
 
 
 
 
 
 
 
 
 
 
 
 
 
9869d32
752ff58
03de940
 
 
9869d32
bad025d
 
 
 
 
4516d66
 
 
 
03de940
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afc99ad
4516d66
 
5166d46
4516d66
 
bad025d
4516d66
 
07e0994
4516d66
bad025d
07e0994
1b44681
5166d46
bad025d
3ca231d
bad025d
9094213
1b44681
1fa0886
4516d66
 
 
1b44681
4516d66
 
7ab8364
98b5314
7f93957
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import time
import random
import whisper
import gradio as gr
from transformers import pipeline

transcription_model = whisper.load_model("base")

def transcribe(audio):
    
    # time.sleep(3)
    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)

    # make log-Mel spectrogram and move to the same device as the transcription_model
    mel = whisper.log_mel_spectrogram(audio).to(transcription_model.device)

    # detect the spoken language
    _, probs = transcription_model.detect_language(mel)
    print(f"Detected language: {max(probs, key=probs.get)}")

    # decode the audio
    options = whisper.DecodingOptions(fp16 = False)
    result = whisper.decode(transcription_model, mel, options)
    return result.text

# p = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")

# def transcribe(audio):
#     text = p(audio)["text"]
#     return text

def user(user_message, history):
    return "", history + [[user_message, None]]

def bot(history):
    bot_message = random.choice(["How are you?", "I wanted to tell you that...", "hehehe", "huihuihuihui", "I'm very hungry"])
    history[-1][1] = ""
    for character in bot_message:
        history[-1][1] += character
        time.sleep(0.05)
        yield history

css = """
        .gradio-container {
            font-family: 'IBM Plex Sans', sans-serif;
        }
        .gr-button {
            color: white;
            border-color: black;
            background: black;
        }
        .container {
            max-width: 730px;
            margin: auto;
            padding-top: 1.5rem;
        }
        #chatbot {
            min-height: 30rem;
            margin-bottom: 15px;
            margin-left: auto;
            margin-right: auto;
        }
        #prompt-container {
            margin-bottom: 15px;
            margin-left: auto;
            margin-right: auto;
        }
"""

with gr.Blocks(css=css) as demo:

    gr.HTML(
        """
            <div style="text-align: center; margin: 0 auto;">
              <div
                style="
                  display: inline-flex;
                  align-items: center;
                  gap: 0.8rem;
                  font-size: 1.75rem;
                "
              >
                <h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
                  Interview with AI (Really?)
                </h1>
              </div>
            </div>
        """
    )
    
    with gr.Box():
        
        chatbot = gr.Chatbot([], show_label=False, elem_id="chatbot").style(height="auto")
    
        with gr.Row(elem_id="prompt-container").style(mobile_collapse=False, equal_height=True):
            with gr.Column(scale=0.8):
                txt = gr.Textbox(
                    show_label=False,
                    placeholder="Type and press enter, or record your response...",
                ).style(container=False)
            with gr.Column(scale=0.2, min_width=0):
                send = gr.Button("Send")

        with gr.Row(elem_id="audio-container").style(equal_height=True):
            with gr.Column(scale=0.8):
                recorder = gr.Audio(source="microphone", type="filepath", show_label=False).style(container=False)  
            with gr.Column(scale=0.2, min_width=0):
                speech = gr.Button("Submit speech").style(height="auto")

        speech.click(transcribe, inputs=recorder, outputs=txt)
        txt.submit(user, [txt, chatbot], [txt, chatbot], queue=False).then(
            bot, chatbot, chatbot
        )
        send.click(user, [txt, chatbot], [txt, chatbot], queue=False).then(
            bot, chatbot, chatbot
        )

demo.queue()
demo.launch()