Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import pipeline | |
from datasets import load_dataset | |
# device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
# ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") | |
# sample = ds[0]["audio"] | |
def transcribe_audio(sample): | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model="openai/whisper-small", | |
chunk_length_s=30, | |
) | |
prediction = pipe(sample.copy(), batch_size=8)["text"] | |
#prediction = pipe(sample, batch_size=8, return_timestamps=True)["chunks"] | |
return prediction | |
# we can also return timestamps for the predictions | |
interface = gr.Interface( | |
fn=transcribe_audio, # The function to be applied to the audio input | |
inputs=gr.Audio(type="filepath"), # Users can record or upload audio | |
outputs="text", # The output is the transcription (text) | |
title="Whisper Small ASR", # Title of your app | |
description="Transcription using Whisper Small." # Description of your app | |
) | |
# **This line starts the Gradio app** | |
interface.launch() | |