RasmusToivanen
add secrets
ef1a65c
raw
history blame
1.77 kB
import gradio as gr
import librosa
import soundfile as sf
import torch
import warnings
import os
from transformers import Wav2Vec2ProcessorWithLM, Wav2Vec2CTCTokenizer
warnings.filterwarnings("ignore")
#load wav2vec2 tokenizer and model
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from fastapi import FastAPI, HTTPException, File
from transformers import pipeline
pipe = pipeline(model="Finnish-NLP/wav2vec2-xlsr-1b-finnish-lm-v2",chunk_length_s=20, stride_length_s=(4, 2))
token = os.getenv()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_checkpoint = 'Finnish-NLP/t5x-small-nl24-finnish'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_auth_token=os.environ.get('hf_token'))
model = AutoModelForSeq2SeqLM.from_pretrained('Finnish-NLP/case_correction_model', from_flax=False, torch_dtype=torch.float32, use_auth_token=os.environ.get('hf_token')).to(device)
# define speech-to-text function
def asr_transcript(audio):
text = ""
if audio:
text = pipe(audio.name)
input_ids = tokenizer(text['text'], return_tensors="pt").input_ids.to(device)
outputs = model.generate(input_ids, max_length=128)
case_corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"text_asr": text['text'], "text_case_corrected": case_corrected_text}
else:
return "File not valid"
gradio_ui = gr.Interface(
fn=asr_transcript,
title="Speech-to-Text with HuggingFace+Wav2Vec2",
description="Upload an audio clip, and let AI do the hard work of transcribing",
inputs=gr.inputs.Audio(label="Upload Audio File", type="file"),
outputs=gr.outputs.Textbox(label="Auto-Transcript"),
)
gradio_ui.launch()