Spaces:

asif00
/

whisper-bangla

Sleeping

File size: 864 Bytes

17c1345
c39aecf
 
17c1345
c39aecf
fb6eab4
 
7db76c6
c39aecf
17c1345
c39aecf
17c1345
bdf398a
c39aecf
 
 
17c1345
 
cd18f8b
 
 
 
17c1345
 
 
 
 
 
 
 
c39aecf
17c1345
c39aecf

import spaces
from transformers import pipeline
import gradio as gr
import torch

asr = pipeline(model="asif00/whisper-bangla")
ser = pipeline("text2text-generation", model="asif00/mbart_bn_error_correction")


@spaces.GPU
def transcribe(audio):
    text = asr(audio)["text"]
    print(text)
    return text


@spaces.GPU
def correction(text):
    corrected_text = ser(text)[0]["generated_text"]

    print(corrected_text)
    return corrected_text


def transcribe_and_correct(audio):
    text = transcribe(audio)
    corrected_text = correction(text)
    return corrected_text


iface = gr.Interface(
    fn=transcribe_and_correct,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs="text",
    title="Whisper Bangla",
    description="Realtime demo for Bengali speech recognition using a fine-tuned Whisper small model.",
)

iface.launch()