Wave2Vec_EN / app.py
Simonlob's picture
Update app.py
75dfdb4 verified
raw
history blame
953 Bytes
import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
import numpy as np
import re
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
# model.to("cuda")
def transcribe(file_):
arr_audio, _ = librosa.load(file_, sr=16000)
inputs = processor(arr_audio, sampling_rate=16_000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
pred_ids = torch.argmax(logits, dim=-1)
text = processor.batch_decode(pred_ids)[0]
return text.lower()
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Wave2Vec EN",
description="Realtime demo for English speech recognition using a wave2vec model.",
)
iface.launch()