Spaces:
Runtime error
Runtime error
File size: 3,490 Bytes
2c3f8ff 478bedf 2c3f8ff 478bedf 2904d5d 478bedf 649f719 478bedf 2c3f8ff 478bedf 2c3f8ff 649f719 478bedf 2c3f8ff 2904d5d 478bedf 2c3f8ff 478bedf 2904d5d 478bedf 649f719 478bedf 649f719 478bedf 2904d5d 478bedf 2904d5d 478bedf 2904d5d 478bedf 2904d5d 478bedf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
from transformers import pipeline
import time
# p = pipeline("automatic-speech-recognition", model="/Users/mkesavan/aidev/speechAI-trials/xlsr-wave2vec/wav2vec2-large-xls-r-300m-tamil-colab/checkpoint-1600")
# combining Tamil and arabic
p_ta = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-tamil")
p_ar = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-arabic")
p_en = pipeline("automatic-speech-recognition", mdoel="patrickvonplaten/hubert-xlarge-ls960-ft-4-gram")
def transcribe_ta(audio_u, audio_m):
text = ""
if(audio_u is not None):
text += p_ta(audio_u)["text"]
if (audio_m is not None):
text += p_ta(audio_m)["text"]
return text
def transcribe_ta_stream(audio, state=""):
time.sleep(2)
text = p_ta(audio)["text"]
state += text + " "
return state, state
def transcribe_ar(audio_u, audio_m):
text = ""
if audio_u is not None:
text += p_ar(audio_u)["text"]
if audio_m is not None:
text += p_ar(audio_m)["text"]
return text
def transcribe_ar_stream(audio, state=""):
time.sleep(2)
text = p_ar(audio)["text"]
state += text + " "
return state, state
def transcribe_en(audio_u, audio_m):
text = ""
if audio_u is not None:
text += p_en(audio_u)["text"]
if audio_m is not None:
text += p_en(audio_m)["text"]
return text
def transcribe_en_stream(audio, state=""):
time.sleep(2)
text = p_en(audio)["text"]
state += text + " "
return state, state
# transcribe Tamil stream
ta_tr_stream_tab = gr.Interface(
fn=transcribe_ta_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True),
"state"
],
outputs=[
"textbox",
"state"
],
live=True)
# transcribe Arabic stream
ar_tr_stream_tab = gr.Interface(
fn=transcribe_ar_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True),
"state"
],
outputs=[
"textbox",
"state"
],
live=True)
# transcribe English stream
en_tr_stream_tab = gr.Interface(
fn=transcribe_en_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True),
"state"
],
outputs=[
"textbox",
"state"
],
live=True)
# transcribe Tamil file
ta_tr_file_tab = gr.Interface(
fn=transcribe_ta,
inputs=[
gr.Audio(type="filepath"),
gr.Audio(source="microphone", type="filepath")
],
outputs="text")
# transcribe Arabic file
ar_tr_file_tab = gr.Interface(
fn=transcribe_ar,
inputs=[
gr.Audio(type="filepath"),
gr.Audio(source="microphone", type="filepath")
],
outputs="text")
# transcribe English file
en_tr_file_tab = gr.Interface(
fn=transcribe_en,
inputs=[
gr.Audio(type="filepath"),
gr.Audio(source="microphone", type="filepath")
],
outputs="text")
tabs = gr.TabbedInterface(
[
ar_tr_stream_tab,
en_tr_stream_tab,
ta_tr_stream_tab,
ar_tr_file_tab,
en_tr_file_tab,
ta_tr_file_tab
],
[
"Arabic Live Transcription",
"English Live Transcription",
"Tamil Live Transcription",
"Arabic File Transcription",
"English File Transcription",
"Tamil File Transcription"
]
)
if __name__ == "__main__":
tabs.launch() |