Spaces:
Running
Running
File size: 975 Bytes
17c1345 c39aecf 17c1345 c39aecf 17c1345 fb6eab4 bdf398a fb6eab4 c39aecf 17c1345 c39aecf 17c1345 bdf398a c39aecf 17c1345 bdf398a 17c1345 c39aecf 17c1345 c39aecf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import spaces
from transformers import pipeline
import gradio as gr
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
asr = pipeline(model="asif00/whisper-bangla")
# asr.to(device=device)
ser = pipeline("text2text-generation", model="asif00/mbart_bn_error_correction")
ser.to(device=device)
@spaces.GPU
def transcribe(audio):
text = asr(audio)["text"]
print(text)
return text
@spaces.GPU
def correction(text):
corrected_text = ser(text)["generated_text"]
print(corrected_text)
return corrected_text
def transcribe_and_correct(audio):
text = transcribe(audio)
corrected_text = correction(text)
return corrected_text
iface = gr.Interface(
fn=transcribe_and_correct,
inputs=gr.Audio(sources="microphone", type="filepath"),
outputs="text",
title="Whisper Bangla",
description="Realtime demo for Bengali speech recognition using a fine-tuned Whisper small model.",
)
iface.launch()
|