import gradio as gr import asr # from tts import synthesize mms_transcribe = gr.Interface( fn=asr.transcribe, inputs=[ gr.Dropdown( choices=[m["id"] for m in asr.models_info], label="Select Model for ASR", value="ixxan/wav2vec2-large-mms-1b-uyghur-latin", interactive=True ), gr.Audio() ], outputs="text", #examples=ASR_EXAMPLES, title="Speech-to-text", description=( "Transcribe audio from a microphone or input file." ), #article=ASR_NOTE, allow_flagging="never", ) # mms_synthesize = gr.Interface( # fn=synthesize, # inputs=[ # gr.Text(label="Input text"), # ], # outputs=[ # gr.Audio(label="Generated Audio", type="numpy"), # gr.Text(label="Filtered text after removing OOVs"), # ], # #examples=TTS_EXAMPLES, # title="Text-to-speech", # description=("Generate audio from input text."), # allow_flagging="never", # ) tabbed_interface = gr.TabbedInterface( [mms_transcribe], ["Speech-to-text"], ) # tabbed_interface = gr.TabbedInterface( # [mms_transcribe, mms_synthesize], # ["Speech-to-text", "Text-to-speech"], # ) with gr.Blocks() as demo: tabbed_interface.render() if __name__ == "__main__": demo.queue() demo.launch()