import gradio as gr from transformers import pipeline import time import torch device = "cuda:0" if torch.cuda.is_available() else "cpu" pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", device=device) def transcribe(audio, state=""): #print(audio) time.sleep(2) text = pipe(audio)["text"] state += text + " " return state, state with gr.Blocks() as demo: state = gr.State(value="") with gr.Row(): with gr.Column(): audio = gr.Audio(sources="microphone", type="filepath") with gr.Column(): textbox = gr.Textbox() audio.stream(fn=transcribe, inputs=[audio, state], outputs=[textbox, state]) demo.launch(debug=True)