File size: 1,066 Bytes
6892880
9bcf657
 
 
 
6892880
9bcf657
6892880
 
 
 
 
9bcf657
 
 
6892880
 
 
 
9bcf657
6892880
 
9bcf657
 
6892880
 
 
 
 
 
 
9bcf657
6892880
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
import torch
from transformers import pipeline
from datasets import load_dataset

# device = "cuda:0" if torch.cuda.is_available() else "cpu"

# ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# sample = ds[0]["audio"]

def transcribe_audio(sample):
 pipe = pipeline(
  "automatic-speech-recognition",
  model="openai/whisper-small",
  chunk_length_s=30,
 )
#  prediction = pipe(sample.copy(), batch_size=8)["text"]
 prediction = pipe(sample.copy(), batch_size=8, return_timestamps=True)["chunks"]
 return prediction

# we can also return timestamps for the predictions
 


interface = gr.Interface(
    fn=transcribe_audio,  # The function to be applied to the audio input
    inputs=gr.Audio(type="filepath"),  # Users can record or upload audio
    outputs="text",  # The output is the transcription (text)
    title="Whisper Small ASR",  # Title of your app
    description="Transcription using Whisper Small."  # Description of your app
)

# **This line starts the Gradio app**
interface.launch()