MultiMed

Runtime error

App Files Files Community

not-lain commited on Nov 9, 2023

Commit

1ffa7d4

1 Parent(s): c540f1a

audio to text

Browse files

Files changed (1) hide show

app.py +5 -6

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ seamless_client = Client("facebook/seamless_m4t")
-def process_speech(audio_input,input_language,target_language):
     """
     processing sound using seamless_m4t
     """
@@ -42,7 +42,7 @@ def process_speech(audio_input,input_language,target_language):
         audio_input, #audio_name
         "",
         input_language,# source language
-        target_language,# target language
         api_name="/run",
     )
     out = out[1] # get the text
@@ -419,16 +419,15 @@ with gr.Blocks(theme='ParityError/Anime') as iface :
         image_input = gr.Image(label="upload image")
         image_output = gr.Markdown(label="output text")
         image_button = gr.Button("process image")
-    with gr.Tab("speech to text translation"):
         with gr.Row():
-            input_language = gr.Dropdown(languages, label="input language",value="French",interactive=True)
-            target_language = gr.Dropdown(languages, label="target language",value="English",interactive=True)
         audio_input = gr.Audio(label="speak",type="filepath",sources="microphone")
         audio_output = gr.Markdown(label="output text")
         audio_button = gr.Button("process audio")
     text_button.click(process_and_query, inputs=text_input, outputs=text_output)
     image_button.click(process_image, inputs=image_input, outputs=image_output)
-    audio_button.click(process_speech, inputs=[audio_input,input_language,target_language], outputs=audio_output)
 iface.queue().launch(show_error=True,debug=True)

+def process_speech(audio_input,input_language):
     """
     processing sound using seamless_m4t
     """
         audio_input, #audio_name
         "",
         input_language,# source language
+        input_language,# target language
         api_name="/run",
     )
     out = out[1] # get the text
         image_input = gr.Image(label="upload image")
         image_output = gr.Markdown(label="output text")
         image_button = gr.Button("process image")
+    with gr.Tab("speech to text"):
         with gr.Row():
+            input_language = gr.Dropdown(languages, label="select the language",value="French",interactive=True)
         audio_input = gr.Audio(label="speak",type="filepath",sources="microphone")
         audio_output = gr.Markdown(label="output text")
         audio_button = gr.Button("process audio")
     text_button.click(process_and_query, inputs=text_input, outputs=text_output)
     image_button.click(process_image, inputs=image_input, outputs=image_output)
+    audio_button.click(process_speech, inputs=[audio_input,input_language], outputs=audio_output)
 iface.queue().launch(show_error=True,debug=True)