Wave2Vec_EN / app.py
Simonlob's picture
Upload 3 files
a9569f0 verified
raw
history blame
960 Bytes
import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
import numpy as np
import re
processor = Wav2Vec2Processor.from_pretrained("the-cramer-project/Wav2vec-Kyrgyz")
model = Wav2Vec2ForCTC.from_pretrained("the-cramer-project/Wav2vec-Kyrgyz")
# model.to("cuda")
def transcribe(file_):
arr_audio, _ = librosa.load(file_, sr=16000)
inputs = processor(arr_audio, sampling_rate=16_000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
pred_ids = torch.argmax(logits, dim=-1)
text = processor.batch_decode(pred_ids)[0]
return text
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Wave2Vec Kyrgyz",
description="Realtime demo for Kyrgyz speech recognition using a wave2vec model.",
)
iface.launch()