Spaces:
Runtime error
Runtime error
File size: 989 Bytes
feb2a2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from typing import Tuple
import numpy as np
import torch
from torchaudio.transforms import Resample
from huggingface_hub import hf_hub_download
import gradio as gr
from pipeline import PreTrainedPipeline
HF_HUB_URL = 'ales/wav2vec2-cv-be'
LM_HUB_FP = 'language_model/cv8be_5gram.bin'
def main(rate_audio_tuple: Tuple[int, np.ndarray]):
sampling_rate, audio = rate_audio_tuple
# resample audio to 16kHz
resampler = Resample(orig_freq=sampling_rate, new_freq=16_000)
audio_resampled = resampler(torch.tensor(audio)).numpy().flatten()
# download Language Model from HF Hub
lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
# init pipeline
pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)
# recognize speech
text_recognized = pipeline(inputs=audio_resampled)['text'][0]
return text_recognized
iface = gr.Interface(
fn=main,
inputs='microphone',
outputs="text"
)
iface.launch()
|