Spaces:
Runtime error
Runtime error
File size: 960 Bytes
a9569f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
import numpy as np
import re
processor = Wav2Vec2Processor.from_pretrained("the-cramer-project/Wav2vec-Kyrgyz")
model = Wav2Vec2ForCTC.from_pretrained("the-cramer-project/Wav2vec-Kyrgyz")
# model.to("cuda")
def transcribe(file_):
arr_audio, _ = librosa.load(file_, sr=16000)
inputs = processor(arr_audio, sampling_rate=16_000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
pred_ids = torch.argmax(logits, dim=-1)
text = processor.batch_decode(pred_ids)[0]
return text
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Wave2Vec Kyrgyz",
description="Realtime demo for Kyrgyz speech recognition using a wave2vec model.",
)
iface.launch()
|