Irpan
asr
81e83c9
raw
history blame
3.42 kB
import gradio as gr
import asr
import tts
import util
# Define the Speech-to-Text tab
def create_stt_tab():
with gr.Blocks() as mms_transcribe:
gr.Markdown("### Speech-To-Text")
with gr.Row():
audio_input = gr.Audio(
label="Record or Upload Uyghur Audio",
sources=["microphone", "upload"],
type="filepath",
)
model_selection_stt = gr.Dropdown(
choices=[model for model in asr.models_info],
label="Select a Model",
value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
interactive=True
)
with gr.Row():
arabic_output = gr.Textbox(label="Uyghur Arabic Transcription", interactive=False)
latin_output = gr.Textbox(label="Uyghur Latin Transcription", interactive=False)
with gr.Row():
stt_submit_btn = gr.Button("Submit")
stt_clear_btn = gr.Button("Clear")
# Example button to load examples
with gr.Row():
stt_examples = gr.Examples(
examples=util.asr_examples,
inputs=[audio_input, model_selection_stt],
outputs=[arabic_output, latin_output],
label="Examples"
)
# Define button functionality
stt_submit_btn.click(
asr.transcribe,
inputs=[audio_input, model_selection_stt],
outputs=[arabic_output, latin_output]
)
stt_clear_btn.click(
lambda: (None, None, None), # Clear inputs and outputs
inputs=[],
outputs=[audio_input, arabic_output, latin_output]
)
return mms_transcribe
# Define the Text-to-Speech tab
def create_tts_tab():
with gr.Blocks() as mms_synthesize:
gr.Markdown("### Text-To-Speech")
with gr.Row():
input_text = gr.Text(label="Input text")
model_selection_tts = gr.Dropdown(
choices=[model for model in tts.models_info],
label="Select a Model",
value="Meta-MMS",
interactive=True
)
with gr.Row():
generated_audio = gr.Audio(label="Generated Audio", interactive=False)
with gr.Row():
tts_submit_btn = gr.Button("Submit")
tts_clear_btn = gr.Button("Clear")
# Example button to load examples
with gr.Row():
tts_examples = gr.Examples(
examples=util.tts_examples,
inputs=[input_text, model_selection_tts],
outputs=[generated_audio],
label="Examples"
)
# Define button functionality
tts_submit_btn.click(
tts.synthesize,
inputs=[input_text, model_selection_tts],
outputs=[generated_audio]
)
tts_clear_btn.click(
lambda: (None, None), # Clear inputs and outputs
inputs=[],
outputs=[input_text, generated_audio]
)
return mms_synthesize
# Combine tabs into a Tabbed Interface
with gr.Blocks() as demo:
gr.Markdown("### Uyghur Language Tools: STT and TTS")
with gr.TabbedInterface([create_stt_tab(), create_tts_tab()], ["Speech-To-Text", "Text-To-Speech"]):
pass
# Run the app
if __name__ == "__main__":
demo.queue()
demo.launch()