File size: 975 Bytes
17c1345
c39aecf
 
17c1345
c39aecf
17c1345
fb6eab4
bdf398a
fb6eab4
 
c39aecf
17c1345
c39aecf
17c1345
bdf398a
c39aecf
 
 
17c1345
 
bdf398a
17c1345
 
 
 
 
 
 
 
 
 
c39aecf
17c1345
c39aecf
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import spaces
from transformers import pipeline
import gradio as gr
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
asr = pipeline(model="asif00/whisper-bangla")
# asr.to(device=device)
ser = pipeline("text2text-generation", model="asif00/mbart_bn_error_correction")
ser.to(device=device)

@spaces.GPU
def transcribe(audio):
    text = asr(audio)["text"]
    print(text)
    return text


@spaces.GPU
def correction(text):
    corrected_text = ser(text)["generated_text"]
    print(corrected_text)
    return corrected_text


def transcribe_and_correct(audio):
    text = transcribe(audio)
    corrected_text = correction(text)
    return corrected_text


iface = gr.Interface(
    fn=transcribe_and_correct,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs="text",
    title="Whisper Bangla",
    description="Realtime demo for Bengali speech recognition using a fine-tuned Whisper small model.",
)

iface.launch()