File size: 1,006 Bytes
fd585a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
import gradio as gr
import whisper
import time

model = whisper.load_model("base")

def transcribe(audio, state="", delay=0.2):
    time.sleep(delay)
    result = model.transcribe(audio, language="english")
    state += result['text'] + " "
    # return f"Language: {result['language']}\
    #         \n\nText: {state}"
    return state, state

def debug(audio, state="", delay=0.2):
  print(whisper.load_audio(audio).shape)
  state += str(whisper.load_audio(audio))
  # print(state)
  return state, state

delay_slider = gr.inputs.Slider(minimum=0, maximum=10, default=0.2, label="Delay (seconds)")

gr.Interface(
    fn=transcribe,
    # fn=debug,
    inputs=[
        # gr.Audio(source="upload", type="filepath"),
        gr.Audio(source="microphone", type="filepath", streaming=True),
        "state",
        delay_slider
        ], 
    outputs=[
        gr.Textbox(label="Transcription", lines=10, max_lines=20),
        "state"
    ],
    live=True,
    allow_flagging='never'
).launch()