xlsr-gradio / app.py
kmknair's picture
tabbed app add
478bedf
raw
history blame
2.05 kB
import gradio as gr
from transformers import pipeline
import time
# p = pipeline("automatic-speech-recognition", model="/Users/mkesavan/aidev/speechAI-trials/xlsr-wave2vec/wav2vec2-large-xls-r-300m-tamil-colab/checkpoint-1600")
# combining Tamil and arabic
p_ta = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-tamil")
p_ar = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-arabic")
def transcribe_ta(audio):
text = p_ta(audio)["text"]
return text
def transcribe_ta_stream(audio, state=""):
time.sleep(2)
text = p_ta(audio)["text"]
state += text + " "
return state, state
def transcribe_ar(audio):
text = p_ar(audio)["text"]
return text
def transcribe_ar_stream(audio, state=""):
time.sleep(2)
text = p_ar(audio)["text"]
state += text + " "
return state, state
# transcribe Tamil stream
ta_tr_stream_tab = gr.Interface(
fn=transcribe_ta_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True),
"state"
],
outputs=[
"textbox",
"state"
],
live=True)
# transcribe Arabic stream
ar_tr_stream_tab = gr.Interface(
fn=transcribe_ar_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True),
"state"
],
outputs=[
"textbox",
"state"
],
live=True)
# transcribe Tamil file
ta_tr_file_tab = gr.Interface(
fn=transcribe_ta,
inputs=[
gr.Audio(source="microphone", type="filepath")
],
outputs="text")
# transcribe Arabic file
ar_tr_file_tab = gr.Interface(
fn=transcribe_ar,
inputs=[
gr.Audio(source="microphone", type="filepath")
],
outputs="text")
tabs = gr.TabbedInterface(
[ta_tr_stream_tab, ar_tr_stream_tab,ta_tr_file_tab, ar_tr_file_tab],
[
"Tamil Live Transcription",
"Arabic Live Transcription",
"Tamil File Transcription",
"Arabic File Transcription",
]
)
if __name__ == "__main__":
tabs.launch()