w2v2asr / app.py
clr's picture
Update app.py
d884e8f
raw
history blame
2.36 kB
import gradio as gr
import soundfile as sf
import numpy as np
import torch, torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from datasets import load_dataset, Audio
import matplotlib.pyplot as plt
MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
torch.random.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) # do i need this? can't remember
#ds = load_dataset("language-and-voice-lab/samromur_asr",split='train',streaming=True)
#ds = load_dataset("language-and-voice-lab/samromur_asr",split='test')
#ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
def show_ex(exnum):
#return(ds['audio_id'][exnum])
return(exnum)
def recc(a_f):
wav, sr = sf.read(a_f, dtype=np.float32)
if len(wav.shape) == 2:
wav = wav.mean(1)
if sr != 16000:
wlen = int(wav.shape[0] / sr * 16000)
wav = signal.resample(wav, wlen)
with torch.inference_mode():
#wav = torch.from_numpy(wav).unsqueeze(0)
#if torch.cuda.is_available():
# wav = wav.cuda()
input_values = processor(wav,sampling_rate=16000).input_values[0]
input_values = torch.tensor(input_values, device=device).unsqueeze(0)
logits = model(input_values).logits
pred_ids = torch.argmax(logits, dim=-1)
#pred_ids= pred_ids[0].cpu().detach()
return pred_ids
#xcp = processor.decode(pred_ids)
#return xcp
def dec(pids):
xccp = processor.decode(pids)
return xccp
# decode torch.cat( pids ??
bl = gr.Blocks()
with bl:
audio_file = gr.Audio(type="filepath")
text_button = gr.Button("Recognise")
text_output = gr.Textbox()
text_button.click(recc, inputs=audio_file, outputs=text_output)
text_button2 = gr.Button("Dec")
text_output2 = gr.Textbox()
text_button2.click(dec, inputs=text_output, outputs=text_output2)
bl.launch()
#https://mercury-docs.readthedocs.io/en/latest/deploy/hugging-face-spaces/
#https://huggingface.co/spaces/pplonski/deploy-mercury
#https://discuss.huggingface.co/t/deploy-interactive-jupyter-notebook-on-spaces-with-mercury/17000
#https://huggingface.co/docs/transformers/notebooks