xlsr-gradio / app.py
kmknair's picture
added examples and labels-3
a7bf230
import gradio as gr
from transformers import pipeline
import time
# p = pipeline("automatic-speech-recognition", model="/Users/mkesavan/aidev/speechAI-trials/xlsr-wave2vec/wav2vec2-large-xls-r-300m-tamil-colab/checkpoint-1600")
# combining Tamil and arabic
p_ta = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-tamil")
p_ar = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-arabic")
p_en = pipeline("automatic-speech-recognition", mdoel="patrickvonplaten/hubert-xlarge-ls960-ft-4-gram")
def transcribe_ta(audio_u, audio_m):
text = ""
if(audio_u is not None):
text += p_ta(audio_u)["text"]
if (audio_m is not None):
text += "\n" + p_ta(audio_m)["text"]
return text
def transcribe_ta_stream(audio, state=""):
time.sleep(2)
text = p_ta(audio)["text"]
state += text + " "
return state, state
def transcribe_ar(audio_u, audio_m):
text = ""
if audio_u is not None:
text += p_ar(audio_u)["text"]
if audio_m is not None:
text += "\n" + p_ar(audio_m)["text"]
return text
def transcribe_ar_stream(audio, state=""):
time.sleep(2)
text = p_ar(audio)["text"]
state += text + " "
return state, state
def transcribe_en(audio_u, audio_m):
text = ""
if audio_u is not None:
text += p_en(audio_u)["text"]
if audio_m is not None:
text += "\n" + p_en(audio_m)["text"]
return text
def transcribe_en_stream(audio, state=""):
time.sleep(2)
text = p_en(audio)["text"]
state += text + " "
return state, state
# transcribe Tamil stream
ta_tr_stream_tab = gr.Interface(
fn=transcribe_ta_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True, label="தமிழ் பேச்சு"),
"state"
],
outputs=[
"textbox",
"state"
],
description="ரெகாட் பட்டண் அமர்தி பேசவும், பேச்சு சொல் பகிர்ப்பு வலது பக்கதில் அச்சிடபடும்",
live=True)
# transcribe Arabic stream
ar_tr_stream_tab = gr.Interface(
fn=transcribe_ar_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True, label="Arabic speech"),
"state"
],
outputs=[
"textbox",
"state"
],
description="Click record from microphone and start talking, transcription shall appear to the right.",
live=True)
# transcribe English stream
en_tr_stream_tab = gr.Interface(
fn=transcribe_en_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True, label="English speech"),
"state"
],
outputs=[
"textbox",
"state"
],
description="Click record from microphone and start talking, transcription shall appear to the right.",
live=True)
# transcribe Tamil file
ta_tr_file_tab = gr.Interface(
fn=transcribe_ta,
inputs=[
gr.Audio(type="filepath", label="தமிழ் ஒலி பதிப்பு சமர்ப்பித்தல்"),
gr.Audio(source="microphone", type="filepath", label= "தமிழ் பேச்சு")
],
examples=[["samples/ta/32862591.mp3", None], ["samples/ta/32862612.mp3", None]],
description="ஒலி பதிப்பு சமர்ப்பிக்கவும், அல்லது ரெகாட் பட்டண் அமர்தி பேசவும், பேச்சு சொல் பகிர்ப்பு வலது பக்கதில் அச்சிடபடும்",
outputs="text")
# transcribe Arabic file
ar_tr_file_tab = gr.Interface(
fn=transcribe_ar,
inputs=[
gr.Audio(type="filepath", label="Arabic file upload"),
gr.Audio(source="microphone", type="filepath", label="Arabic speech")
],
examples=[["samples/ar/19706399.mp3", None],["samples/ar/19985784.mp3", None]],
description="Upload a file or, click record from microphone and start talking, transcription shall appear to the right.",
outputs="text")
# transcribe English file
en_tr_file_tab = gr.Interface(
fn=transcribe_en,
inputs=[
gr.Audio(type="filepath", label="English file upload"),
gr.Audio(source="microphone", type="filepath", label="English speech")
],
examples=[["samples/en/32941920.mp3", None], ["samples/en/32941921.mp3", None]],
description="Upload a file or, click record from microphone and start talking, transcription shall appear to the right.",
outputs="text")
tabs = gr.TabbedInterface(
[
ar_tr_stream_tab,
en_tr_stream_tab,
ta_tr_stream_tab,
ar_tr_file_tab,
en_tr_file_tab,
ta_tr_file_tab
],
[
"Arabic Live Transcription",
"English Live Transcription",
"தமிழ் நேரடி சொல் பகிர்ப்பு",
"Arabic File Transcription",
"English File Transcription",
"தமிழ் ஒலி பதிப்பு சொல் பகிர்ப்பு"
]
)
if __name__ == "__main__":
tabs.launch()