Spaces:
Build error
Build error
import gradio as gr | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
from datasets import load_dataset | |
import soundfile as sf | |
import torch | |
# load model and tokenizer | |
processor = Wav2Vec2Processor.from_pretrained("nguyenvulebinh/wav2vec2-base-vietnamese-250h") | |
model = Wav2Vec2ForCTC.from_pretrained("nguyenvulebinh/wav2vec2-base-vietnamese-250h") | |
# define function to read in sound file | |
def map_to_array(batch): | |
speech, _ = sf.read(batch["file"]) | |
batch["speech"] = speech | |
return batch | |
# tokenize | |
def recognize_speech(audio): | |
ds = map_to_array({ | |
"file": audio | |
}) | |
input_values = processor(ds["speech"], return_tensors="pt", padding="longest").input_values # Batch size 1 | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = processor.batch_decode(predicted_ids) | |
return transcription[0] | |
# launch a simple UI | |
gr.Interface(fn=recognize_speech, inputs="microphone", outputs="text").launch() | |