Spaces:
Runtime error
Runtime error
File size: 5,242 Bytes
2c3f8ff 478bedf 2c3f8ff 478bedf 2904d5d 478bedf 649f719 a7bf230 649f719 478bedf 2c3f8ff 478bedf 2c3f8ff 649f719 a7bf230 478bedf 2c3f8ff 2904d5d a7bf230 2904d5d 478bedf 2c3f8ff dec6760 2c3f8ff e60cafb 478bedf dec6760 478bedf dec6760 478bedf 2904d5d dec6760 2904d5d dec6760 2904d5d 478bedf 6a9d77a dec6760 478bedf dec6760 e60cafb 478bedf dec6760 478bedf dec6760 478bedf 2904d5d dec6760 2904d5d dec6760 2904d5d 478bedf 2904d5d 478bedf 2904d5d 6a9d77a 478bedf 2904d5d 6a9d77a 478bedf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import gradio as gr
from transformers import pipeline
import time
# p = pipeline("automatic-speech-recognition", model="/Users/mkesavan/aidev/speechAI-trials/xlsr-wave2vec/wav2vec2-large-xls-r-300m-tamil-colab/checkpoint-1600")
# combining Tamil and arabic
p_ta = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-tamil")
p_ar = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-arabic")
p_en = pipeline("automatic-speech-recognition", mdoel="patrickvonplaten/hubert-xlarge-ls960-ft-4-gram")
def transcribe_ta(audio_u, audio_m):
text = ""
if(audio_u is not None):
text += p_ta(audio_u)["text"]
if (audio_m is not None):
text += "\n" + p_ta(audio_m)["text"]
return text
def transcribe_ta_stream(audio, state=""):
time.sleep(2)
text = p_ta(audio)["text"]
state += text + " "
return state, state
def transcribe_ar(audio_u, audio_m):
text = ""
if audio_u is not None:
text += p_ar(audio_u)["text"]
if audio_m is not None:
text += "\n" + p_ar(audio_m)["text"]
return text
def transcribe_ar_stream(audio, state=""):
time.sleep(2)
text = p_ar(audio)["text"]
state += text + " "
return state, state
def transcribe_en(audio_u, audio_m):
text = ""
if audio_u is not None:
text += p_en(audio_u)["text"]
if audio_m is not None:
text += "\n" + p_en(audio_m)["text"]
return text
def transcribe_en_stream(audio, state=""):
time.sleep(2)
text = p_en(audio)["text"]
state += text + " "
return state, state
# transcribe Tamil stream
ta_tr_stream_tab = gr.Interface(
fn=transcribe_ta_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True, label="தமிழ் பேச்சு"),
"state"
],
outputs=[
"textbox",
"state"
],
description="ரெகாட் பட்டண் அமர்தி பேசவும், பேச்சு சொல் பகிர்ப்பு வலது பக்கதில் அச்சிடபடும்",
live=True)
# transcribe Arabic stream
ar_tr_stream_tab = gr.Interface(
fn=transcribe_ar_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True, label="Arabic speech"),
"state"
],
outputs=[
"textbox",
"state"
],
description="Click record from microphone and start talking, transcription shall appear to the right.",
live=True)
# transcribe English stream
en_tr_stream_tab = gr.Interface(
fn=transcribe_en_stream,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True, label="English speech"),
"state"
],
outputs=[
"textbox",
"state"
],
description="Click record from microphone and start talking, transcription shall appear to the right.",
live=True)
# transcribe Tamil file
ta_tr_file_tab = gr.Interface(
fn=transcribe_ta,
inputs=[
gr.Audio(type="filepath", label="தமிழ் ஒலி பதிப்பு சமர்ப்பித்தல்"),
gr.Audio(source="microphone", type="filepath", label= "தமிழ் பேச்சு")
],
examples=[["samples/ta/32862591.mp3", None], ["samples/ta/32862612.mp3", None]],
description="ஒலி பதிப்பு சமர்ப்பிக்கவும், அல்லது ரெகாட் பட்டண் அமர்தி பேசவும், பேச்சு சொல் பகிர்ப்பு வலது பக்கதில் அச்சிடபடும்",
outputs="text")
# transcribe Arabic file
ar_tr_file_tab = gr.Interface(
fn=transcribe_ar,
inputs=[
gr.Audio(type="filepath", label="Arabic file upload"),
gr.Audio(source="microphone", type="filepath", label="Arabic speech")
],
examples=[["samples/ar/19706399.mp3", None],["samples/ar/19985784.mp3", None]],
description="Upload a file or, click record from microphone and start talking, transcription shall appear to the right.",
outputs="text")
# transcribe English file
en_tr_file_tab = gr.Interface(
fn=transcribe_en,
inputs=[
gr.Audio(type="filepath", label="English file upload"),
gr.Audio(source="microphone", type="filepath", label="English speech")
],
examples=[["samples/en/32941920.mp3", None], ["samples/en/32941921.mp3", None]],
description="Upload a file or, click record from microphone and start talking, transcription shall appear to the right.",
outputs="text")
tabs = gr.TabbedInterface(
[
ar_tr_stream_tab,
en_tr_stream_tab,
ta_tr_stream_tab,
ar_tr_file_tab,
en_tr_file_tab,
ta_tr_file_tab
],
[
"Arabic Live Transcription",
"English Live Transcription",
"தமிழ் நேரடி சொல் பகிர்ப்பு",
"Arabic File Transcription",
"English File Transcription",
"தமிழ் ஒலி பதிப்பு சொல் பகிர்ப்பு"
]
)
if __name__ == "__main__":
tabs.launch() |