whisper_gradio / app.py
demavior's picture
Update app.py
42aa5ee verified
raw
history blame
895 Bytes
import gradio as gr
from transformers import pipeline
import torch
import torchaudio
device = "cuda:0" if torch.cuda.is_available() else "cpu"
def transcribe(audio):
# Extract the audio data from the tuple
audio_data = audio[0] if isinstance(audio, tuple) else audio
# Load the audio data using torchaudio
waveform, sample_rate = torchaudio.load(audio_data)
# Convert the waveform to numpy array
waveform_np = waveform.numpy()
pipe = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small",
chunk_length_s=30,
device=device,
)
prediction = pipe(waveform_np)["text"]
return prediction
gradio_app = gr.Interface(
fn=transcribe,
inputs=gr.Audio(label="Input"),
outputs=gr.Textbox(label="Result"),
title="Transcribed",
)
if __name__ == "__main__":
gradio_app.launch()