Wave2Vec_EN / app.py
Simonlob's picture
Update app.py
2c1f701 verified
raw
history blame contribute delete
955 Bytes
import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
import numpy as np
import re
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
model.to("cpu") #.to("cuda")
def transcribe(file_):
arr_audio, _ = librosa.load(file_, sr=16000)
input_values = processor(arr_audio, return_tensors="pt", padding="longest").input_values
logits = model(input_values).logits #model(input_values.to("cuda")).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)
return transcription[0].lower()
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Wave2Vec EN",
description="Realtime demo for English speech recognition using a wave2vec model.",
)
iface.launch()