MultiMed

Runtime error

App Files Files Community

not-lain commited on Nov 8, 2023

Commit

d22abe6

1 Parent(s): f0b4737

return of the king

Browse files

Files changed (1) hide show

app.py +18 -18

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import json
 import dotenv
 from scipy.io.wavfile import write
 import PIL
-# from openai import OpenAI
 dotenv.load_dotenv()
 seamless_client = Client("facebook/seamless_m4t")
@@ -22,15 +22,15 @@ def process_speech(audio):
     """
     processing sound using seamless_m4t
     """
-    audio_name = f"{np.random.randint(0, 100)}.wav"
-    sr, data = audio
-    write(audio_name, sr, data.astype(np.int16))
     out = seamless_client.predict(
         "S2TT",
         "file",
         None,
-        audio_name,
         "",
         "French",# source language
         "English",# target language
@@ -236,18 +236,18 @@ def process_and_query(text=None):
         # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
         vectara_response_json = query_vectara(text)
         markdown_output = convert_to_markdown(vectara_response_json)
-        # client = OpenAI()
-        # prompt ="Answer in the same language, write it better, more understandable and shorter:"
-        # markdown_output_final = markdown_output
-        # completion = client.chat.completions.create(
-        #   model="gpt-3.5-turbo",
-        #   messages=[
-        #     {"role": "system", "content": prompt},
-        #     {"role": "user", "content": markdown_output_final}
-        #   ]
-        # )
-        # final_response= completion.choices[0].message.content
         return markdown_output
     except Exception as e:
         return str(e)
@@ -305,7 +305,7 @@ with gr.Blocks(theme='ParityError/Anime') as iface :
         image_button = gr.Button("process image")
     with gr.Tab("speech to text translation"):
         audio_input = gr.Audio(label="talk in french",
-                 sources=["microphone"],type="numpy")
         audio_output = gr.Markdown(label="output text")
         audio_button = gr.Button("process audio")
     text_button.click(process_and_query, inputs=text_input, outputs=text_output)

 import dotenv
 from scipy.io.wavfile import write
 import PIL
+from openai import OpenAI
 dotenv.load_dotenv()
 seamless_client = Client("facebook/seamless_m4t")
     """
     processing sound using seamless_m4t
     """
+    # audio_name = f"{np.random.randint(0, 100)}.wav"
+    # sr, data = audio
+    # write(audio_name, sr, data.astype(np.int16))
     out = seamless_client.predict(
         "S2TT",
         "file",
         None,
+        audio, #audio_name
         "",
         "French",# source language
         "English",# target language
         # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
         vectara_response_json = query_vectara(text)
         markdown_output = convert_to_markdown(vectara_response_json)
+        client = OpenAI()
+        prompt ="Answer in the same language, write it better, more understandable and shorter:"
+        markdown_output_final = markdown_output
+        completion = client.chat.completions.create(
+          model="gpt-3.5-turbo",
+          messages=[
+            {"role": "system", "content": prompt},
+            {"role": "user", "content": markdown_output_final}
+          ]
+        )
+        final_response= completion.choices[0].message.content
         return markdown_output
     except Exception as e:
         return str(e)
         image_button = gr.Button("process image")
     with gr.Tab("speech to text translation"):
         audio_input = gr.Audio(label="talk in french",
+                 sources=["microphone"],type="filepath",)
         audio_output = gr.Markdown(label="output text")
         audio_button = gr.Button("process audio")
     text_button.click(process_and_query, inputs=text_input, outputs=text_output)