Jaward commited on
Commit
2b78489
·
verified ·
1 Parent(s): a8f539b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -36
app.py CHANGED
@@ -10,8 +10,6 @@ import torch
10
  import random
11
  from openai import OpenAI
12
  import subprocess
13
- import speech_recognition as sr
14
- from gradio_client import Client
15
 
16
  default_lang = "en"
17
 
@@ -54,6 +52,7 @@ Keep conversation friendly, short, clear, and concise.
54
  Avoid unnecessary introductions and answer the user's questions directly.
55
  Respond in a normal, conversational manner while being friendly and helpful.
56
  Remember previous parts of the conversation and use that context in your responses.
 
57
  [USER]
58
  """
59
 
@@ -113,10 +112,13 @@ def models(text, model="Llama 3B Service", seed=42):
113
 
114
  return output
115
 
116
- async def respond(text, model, seed):
117
- if not text:
 
 
 
118
  return None
119
- reply = models(text, model, seed)
120
  communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
121
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
122
  tmp_path = tmp_file.name
@@ -173,22 +175,6 @@ def voice_assistant_tab():
173
  def speech_translation_tab():
174
  return "# <center><b>Hear how you sound in another language</b></center>"
175
 
176
- def speech_to_text():
177
- r = sr.Recognizer()
178
- with sr.Microphone() as source:
179
- print("Listening...")
180
- audio = r.listen(source)
181
- try:
182
- text = r.recognize_google(audio)
183
- print("You said:", text)
184
- return text
185
- except sr.UnknownValueError:
186
- print("Could not understand audio")
187
- return ""
188
- except sr.RequestError as e:
189
- print("Could not request results; {0}".format(e))
190
- return ""
191
-
192
  with gr.Blocks(css="style.css") as demo:
193
  description = gr.Markdown("# <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>")
194
 
@@ -212,27 +198,15 @@ with gr.Blocks(css="style.css") as demo:
212
  value=0,
213
  visible=False
214
  )
215
- input_text = gr.Textbox(label="User Input (Speak or Type)", placeholder="Speak or type your message here...")
216
- start_button = gr.Button("Start Listening")
217
- stop_button = gr.Button("Stop Listening")
218
  output = gr.Audio(label="AI", type="filepath",
219
  interactive=False,
220
  autoplay=True,
221
  elem_classes="audio")
222
 
223
- def start_listening():
224
- return gr.update(visible=True), gr.update(visible=False)
225
-
226
- def stop_listening():
227
- text = speech_to_text()
228
- return gr.update(visible=False), gr.update(visible=True), text
229
-
230
- start_button.click(start_listening, outputs=[stop_button, start_button])
231
- stop_button.click(stop_listening, outputs=[stop_button, start_button, input_text])
232
-
233
  gr.Interface(
234
  fn=respond,
235
- inputs=[input_text, select, seed],
236
  outputs=[output],
237
  live=True
238
  )
@@ -260,7 +234,7 @@ with gr.Blocks(css="style.css") as demo:
260
  clear_button.click(
261
  fn=clear_history,
262
  inputs=[],
263
- outputs=[input_text, output, input_audio, output_audio],
264
  api_name="clear"
265
  )
266
 
 
10
  import random
11
  from openai import OpenAI
12
  import subprocess
 
 
13
 
14
  default_lang = "en"
15
 
 
52
  Avoid unnecessary introductions and answer the user's questions directly.
53
  Respond in a normal, conversational manner while being friendly and helpful.
54
  Remember previous parts of the conversation and use that context in your responses.
55
+ Your creator Jaward is an AI/ML Research Engineer at Linksoul AI. He is currently specializing in Artificial Intelligence (AI) research more specifically training and optimizing advance AI systems. He aspires to build not just human-like intelligence but AI Systems that augment human intelligence. He has contributed greatly to the opensource community with first-principles code implementations of AI/ML research papers. He did his first internship at Beijing Academy of Artificial Intelligence as an AI Researher where he contributed in cutting-edge AI research leading to him contributing to an insightful paper (AUTOAGENTS - A FRAMEWORK FOR AUTOMATIC AGENT GENERATION). The paper got accepted this year at IJCAI(International Joint Conference On AI). He is currently doing internship at LinkSoul AI - a small opensource AI Research startup in Beijing.
56
  [USER]
57
  """
58
 
 
112
 
113
  return output
114
 
115
+ async def respond(audio, model, seed):
116
+ if audio is None:
117
+ return None
118
+ user = transcribe(audio)
119
+ if not user:
120
  return None
121
+ reply = models(user, model, seed)
122
  communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
123
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
124
  tmp_path = tmp_file.name
 
175
  def speech_translation_tab():
176
  return "# <center><b>Hear how you sound in another language</b></center>"
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  with gr.Blocks(css="style.css") as demo:
179
  description = gr.Markdown("# <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>")
180
 
 
198
  value=0,
199
  visible=False
200
  )
201
+ input = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
 
 
202
  output = gr.Audio(label="AI", type="filepath",
203
  interactive=False,
204
  autoplay=True,
205
  elem_classes="audio")
206
 
 
 
 
 
 
 
 
 
 
 
207
  gr.Interface(
208
  fn=respond,
209
+ inputs=[input, select, seed],
210
  outputs=[output],
211
  live=True
212
  )
 
234
  clear_button.click(
235
  fn=clear_history,
236
  inputs=[],
237
+ outputs=[input, output, input_audio, output_audio],
238
  api_name="clear"
239
  )
240