Spaces:

Jaward
/

optimus

Sleeping

App Files Files Community

Jaward commited on Aug 15, 2024

Commit

2b78489

verified ·

1 Parent(s): a8f539b

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -36

app.py CHANGED Viewed

@@ -10,8 +10,6 @@ import torch
 import random
 from openai import OpenAI
 import subprocess
-import speech_recognition as sr
-from gradio_client import Client
 default_lang = "en"
@@ -54,6 +52,7 @@ Keep conversation friendly, short, clear, and concise.
 Avoid unnecessary introductions and answer the user's questions directly.
 Respond in a normal, conversational manner while being friendly and helpful.
 Remember previous parts of the conversation and use that context in your responses.
 [USER]
 """
@@ -113,10 +112,13 @@ def models(text, model="Llama 3B Service", seed=42):
         return output
-async def respond(text, model, seed):
-    if not text:
         return None
-    reply = models(text, model, seed)
     communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
         tmp_path = tmp_file.name
@@ -173,22 +175,6 @@ def voice_assistant_tab():
 def speech_translation_tab():
     return "# <center><b>Hear how you sound in another language</b></center>"
-def speech_to_text():
-    r = sr.Recognizer()
-    with sr.Microphone() as source:
-        print("Listening...")
-        audio = r.listen(source)
-        try:
-            text = r.recognize_google(audio)
-            print("You said:", text)
-            return text
-        except sr.UnknownValueError:
-            print("Could not understand audio")
-            return ""
-        except sr.RequestError as e:
-            print("Could not request results; {0}".format(e))
-            return ""
 with gr.Blocks(css="style.css") as demo:
     description = gr.Markdown("# <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>")
@@ -212,27 +198,15 @@ with gr.Blocks(css="style.css") as demo:
             value=0,
             visible=False
             )
-            input_text = gr.Textbox(label="User Input (Speak or Type)", placeholder="Speak or type your message here...")
-            start_button = gr.Button("Start Listening")
-            stop_button = gr.Button("Stop Listening")
             output = gr.Audio(label="AI", type="filepath",
                             interactive=False,
                             autoplay=True,
                             elem_classes="audio")
-            def start_listening():
-                return gr.update(visible=True), gr.update(visible=False)
-            def stop_listening():
-                text = speech_to_text()
-                return gr.update(visible=False), gr.update(visible=True), text
-            start_button.click(start_listening, outputs=[stop_button, start_button])
-            stop_button.click(stop_listening, outputs=[stop_button, start_button, input_text])
             gr.Interface(
                 fn=respond,
-                inputs=[input_text, select, seed],
                 outputs=[output],
                 live=True
             )
@@ -260,7 +234,7 @@ with gr.Blocks(css="style.css") as demo:
     clear_button.click(
         fn=clear_history,
         inputs=[],
-        outputs=[input_text, output, input_audio, output_audio],
         api_name="clear"
     )

 import random
 from openai import OpenAI
 import subprocess
 default_lang = "en"
 Avoid unnecessary introductions and answer the user's questions directly.
 Respond in a normal, conversational manner while being friendly and helpful.
 Remember previous parts of the conversation and use that context in your responses.
+Your creator Jaward is an AI/ML Research Engineer at Linksoul AI. He is currently specializing in Artificial Intelligence (AI) research more specifically training and optimizing advance AI systems. He aspires to build not just human-like intelligence but AI Systems that augment human intelligence. He has contributed greatly to the opensource community with first-principles code implementations of AI/ML research papers. He did his first internship at Beijing Academy of Artificial Intelligence as an AI Researher where he contributed in cutting-edge AI research leading to him contributing to an insightful paper (AUTOAGENTS - A FRAMEWORK FOR AUTOMATIC AGENT GENERATION). The paper got accepted this year at IJCAI(International Joint Conference On AI). He is currently doing internship at LinkSoul AI - a small opensource AI Research startup in Beijing.
 [USER]
 """
         return output
+async def respond(audio, model, seed):
+    if audio is None:
+        return None
+    user = transcribe(audio)
+    if not user:
         return None
+    reply = models(user, model, seed)
     communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
         tmp_path = tmp_file.name
 def speech_translation_tab():
     return "# <center><b>Hear how you sound in another language</b></center>"
 with gr.Blocks(css="style.css") as demo:
     description = gr.Markdown("# <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>")
             value=0,
             visible=False
             )
+            input = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
             output = gr.Audio(label="AI", type="filepath",
                             interactive=False,
                             autoplay=True,
                             elem_classes="audio")
             gr.Interface(
                 fn=respond,
+                inputs=[input, select, seed],
                 outputs=[output],
                 live=True
             )
     clear_button.click(
         fn=clear_history,
         inputs=[],
+        outputs=[input, output, input_audio, output_audio],
         api_name="clear"
     )