import gradio as gr from transformers import pipeline import time pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") def transcribe(audio, state=""): #print(audio) time.sleep(0.5) text = pipe(audio)["text"] state += text + " " return state, state with gr.Blocks() as demo: state = gr.State(value="") with gr.Row(): with gr.Column(): audio = gr.Audio(sources="microphone", type="filepath") with gr.Column(): textbox = gr.Textbox() audio.stream(fn=transcribe, inputs=[audio, state], outputs=[textbox, state]) demo.launch(debug=True)