Spaces:

kmknair
/

xlsr-gradio

Runtime error

App Files Files Community

xlsr-gradio / app.py

kmknair

added english transcription tabs

2904d5d over 2 years ago

raw

history blame

3.49 kB

	import gradio as gr
	from transformers import pipeline
	import time

	# p = pipeline("automatic-speech-recognition", model="/Users/mkesavan/aidev/speechAI-trials/xlsr-wave2vec/wav2vec2-large-xls-r-300m-tamil-colab/checkpoint-1600")

	# combining Tamil and arabic

	p_ta = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-tamil")
	p_ar = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-arabic")
	p_en = pipeline("automatic-speech-recognition", mdoel="patrickvonplaten/hubert-xlarge-ls960-ft-4-gram")


	def transcribe_ta(audio_u, audio_m):
	text = ""
	if(audio_u is not None):
	text += p_ta(audio_u)["text"]
	if (audio_m is not None):
	text += p_ta(audio_m)["text"]

	return text

	def transcribe_ta_stream(audio, state=""):
	time.sleep(2)
	text = p_ta(audio)["text"]
	state += text + " "
	return state, state

	def transcribe_ar(audio_u, audio_m):
	text = ""
	if audio_u is not None:
	text += p_ar(audio_u)["text"]
	if audio_m is not None:
	text += p_ar(audio_m)["text"]
	return text

	def transcribe_ar_stream(audio, state=""):
	time.sleep(2)
	text = p_ar(audio)["text"]
	state += text + " "
	return state, state

	def transcribe_en(audio_u, audio_m):
	text = ""
	if audio_u is not None:
	text += p_en(audio_u)["text"]
	if audio_m is not None:
	text += p_en(audio_m)["text"]
	return text

	def transcribe_en_stream(audio, state=""):
	time.sleep(2)
	text = p_en(audio)["text"]
	state += text + " "
	return state, state


	# transcribe Tamil stream
	ta_tr_stream_tab = gr.Interface(
	fn=transcribe_ta_stream,
	inputs=[
	gr.Audio(source="microphone", type="filepath", streaming=True),
	"state"
	],
	outputs=[
	"textbox",
	"state"
	],
	live=True)
	# transcribe Arabic stream
	ar_tr_stream_tab = gr.Interface(
	fn=transcribe_ar_stream,
	inputs=[
	gr.Audio(source="microphone", type="filepath", streaming=True),
	"state"
	],
	outputs=[
	"textbox",
	"state"
	],
	live=True)

	# transcribe English stream
	en_tr_stream_tab = gr.Interface(
	fn=transcribe_en_stream,
	inputs=[
	gr.Audio(source="microphone", type="filepath", streaming=True),
	"state"
	],
	outputs=[
	"textbox",
	"state"
	],
	live=True)


	# transcribe Tamil file
	ta_tr_file_tab = gr.Interface(
	fn=transcribe_ta,
	inputs=[
	gr.Audio(type="filepath"),
	gr.Audio(source="microphone", type="filepath")
	],
	outputs="text")

	# transcribe Arabic file
	ar_tr_file_tab = gr.Interface(
	fn=transcribe_ar,
	inputs=[
	gr.Audio(type="filepath"),
	gr.Audio(source="microphone", type="filepath")
	],
	outputs="text")

	# transcribe English file
	en_tr_file_tab = gr.Interface(
	fn=transcribe_en,
	inputs=[
	gr.Audio(type="filepath"),
	gr.Audio(source="microphone", type="filepath")
	],
	outputs="text")


	tabs = gr.TabbedInterface(
	[
	ar_tr_stream_tab,
	en_tr_stream_tab,
	ta_tr_stream_tab,
	ar_tr_file_tab,
	en_tr_file_tab,
	ta_tr_file_tab
	],
	[
	"Arabic Live Transcription",
	"English Live Transcription",
	"Tamil Live Transcription",
	"Arabic File Transcription",
	"English File Transcription",
	"Tamil File Transcription"
	]
	)

	if __name__ == "__main__":
	tabs.launch()