import gradio as gr from transformers import pipeline p = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") def transcribe(audio): text = p(audio)["text"] return text gr.Interface( fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text").launch()