import gradio as gr import torch from transformers import pipeline from datasets import load_dataset # device = "cuda:0" if torch.cuda.is_available() else "cpu" # ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # sample = ds[0]["audio"] def transcribe_audio(sample): pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-small", chunk_length_s=30, ) prediction = pipe(sample.copy(), batch_size=8)["text"] #prediction = pipe(sample, batch_size=8, return_timestamps=True)["chunks"] return prediction # we can also return timestamps for the predictions interface = gr.Interface( fn=transcribe_audio, # The function to be applied to the audio input inputs=gr.Audio(type="filepath"), # Users can record or upload audio outputs="text", # The output is the transcription (text) title="Whisper Small ASR", # Title of your app description="Transcription using Whisper Small." # Description of your app ) # **This line starts the Gradio app** interface.launch()