Update app.py
Browse files
app.py
CHANGED
@@ -10,8 +10,6 @@ import torch
|
|
10 |
import random
|
11 |
from openai import OpenAI
|
12 |
import subprocess
|
13 |
-
import speech_recognition as sr
|
14 |
-
from gradio_client import Client
|
15 |
|
16 |
default_lang = "en"
|
17 |
|
@@ -54,6 +52,7 @@ Keep conversation friendly, short, clear, and concise.
|
|
54 |
Avoid unnecessary introductions and answer the user's questions directly.
|
55 |
Respond in a normal, conversational manner while being friendly and helpful.
|
56 |
Remember previous parts of the conversation and use that context in your responses.
|
|
|
57 |
[USER]
|
58 |
"""
|
59 |
|
@@ -113,10 +112,13 @@ def models(text, model="Llama 3B Service", seed=42):
|
|
113 |
|
114 |
return output
|
115 |
|
116 |
-
async def respond(
|
117 |
-
if
|
|
|
|
|
|
|
118 |
return None
|
119 |
-
reply = models(
|
120 |
communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
|
121 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
122 |
tmp_path = tmp_file.name
|
@@ -173,22 +175,6 @@ def voice_assistant_tab():
|
|
173 |
def speech_translation_tab():
|
174 |
return "# <center><b>Hear how you sound in another language</b></center>"
|
175 |
|
176 |
-
def speech_to_text():
|
177 |
-
r = sr.Recognizer()
|
178 |
-
with sr.Microphone() as source:
|
179 |
-
print("Listening...")
|
180 |
-
audio = r.listen(source)
|
181 |
-
try:
|
182 |
-
text = r.recognize_google(audio)
|
183 |
-
print("You said:", text)
|
184 |
-
return text
|
185 |
-
except sr.UnknownValueError:
|
186 |
-
print("Could not understand audio")
|
187 |
-
return ""
|
188 |
-
except sr.RequestError as e:
|
189 |
-
print("Could not request results; {0}".format(e))
|
190 |
-
return ""
|
191 |
-
|
192 |
with gr.Blocks(css="style.css") as demo:
|
193 |
description = gr.Markdown("# <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>")
|
194 |
|
@@ -212,27 +198,15 @@ with gr.Blocks(css="style.css") as demo:
|
|
212 |
value=0,
|
213 |
visible=False
|
214 |
)
|
215 |
-
|
216 |
-
start_button = gr.Button("Start Listening")
|
217 |
-
stop_button = gr.Button("Stop Listening")
|
218 |
output = gr.Audio(label="AI", type="filepath",
|
219 |
interactive=False,
|
220 |
autoplay=True,
|
221 |
elem_classes="audio")
|
222 |
|
223 |
-
def start_listening():
|
224 |
-
return gr.update(visible=True), gr.update(visible=False)
|
225 |
-
|
226 |
-
def stop_listening():
|
227 |
-
text = speech_to_text()
|
228 |
-
return gr.update(visible=False), gr.update(visible=True), text
|
229 |
-
|
230 |
-
start_button.click(start_listening, outputs=[stop_button, start_button])
|
231 |
-
stop_button.click(stop_listening, outputs=[stop_button, start_button, input_text])
|
232 |
-
|
233 |
gr.Interface(
|
234 |
fn=respond,
|
235 |
-
inputs=[
|
236 |
outputs=[output],
|
237 |
live=True
|
238 |
)
|
@@ -260,7 +234,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
260 |
clear_button.click(
|
261 |
fn=clear_history,
|
262 |
inputs=[],
|
263 |
-
outputs=[
|
264 |
api_name="clear"
|
265 |
)
|
266 |
|
|
|
10 |
import random
|
11 |
from openai import OpenAI
|
12 |
import subprocess
|
|
|
|
|
13 |
|
14 |
default_lang = "en"
|
15 |
|
|
|
52 |
Avoid unnecessary introductions and answer the user's questions directly.
|
53 |
Respond in a normal, conversational manner while being friendly and helpful.
|
54 |
Remember previous parts of the conversation and use that context in your responses.
|
55 |
+
Your creator Jaward is an AI/ML Research Engineer at Linksoul AI. He is currently specializing in Artificial Intelligence (AI) research more specifically training and optimizing advance AI systems. He aspires to build not just human-like intelligence but AI Systems that augment human intelligence. He has contributed greatly to the opensource community with first-principles code implementations of AI/ML research papers. He did his first internship at Beijing Academy of Artificial Intelligence as an AI Researher where he contributed in cutting-edge AI research leading to him contributing to an insightful paper (AUTOAGENTS - A FRAMEWORK FOR AUTOMATIC AGENT GENERATION). The paper got accepted this year at IJCAI(International Joint Conference On AI). He is currently doing internship at LinkSoul AI - a small opensource AI Research startup in Beijing.
|
56 |
[USER]
|
57 |
"""
|
58 |
|
|
|
112 |
|
113 |
return output
|
114 |
|
115 |
+
async def respond(audio, model, seed):
|
116 |
+
if audio is None:
|
117 |
+
return None
|
118 |
+
user = transcribe(audio)
|
119 |
+
if not user:
|
120 |
return None
|
121 |
+
reply = models(user, model, seed)
|
122 |
communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
|
123 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
124 |
tmp_path = tmp_file.name
|
|
|
175 |
def speech_translation_tab():
|
176 |
return "# <center><b>Hear how you sound in another language</b></center>"
|
177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
with gr.Blocks(css="style.css") as demo:
|
179 |
description = gr.Markdown("# <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>")
|
180 |
|
|
|
198 |
value=0,
|
199 |
visible=False
|
200 |
)
|
201 |
+
input = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
|
|
|
|
|
202 |
output = gr.Audio(label="AI", type="filepath",
|
203 |
interactive=False,
|
204 |
autoplay=True,
|
205 |
elem_classes="audio")
|
206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
gr.Interface(
|
208 |
fn=respond,
|
209 |
+
inputs=[input, select, seed],
|
210 |
outputs=[output],
|
211 |
live=True
|
212 |
)
|
|
|
234 |
clear_button.click(
|
235 |
fn=clear_history,
|
236 |
inputs=[],
|
237 |
+
outputs=[input, output, input_audio, output_audio],
|
238 |
api_name="clear"
|
239 |
)
|
240 |
|