ar08 commited on
Commit
285990a
Β·
verified Β·
1 Parent(s): dbfd7f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -57
app.py CHANGED
@@ -8,8 +8,6 @@ import tempfile
8
  import logging
9
  import io
10
  from pydub import AudioSegment
11
- import json
12
- from datetime import datetime
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -29,21 +27,9 @@ headers = {"Authorization": f"Bearer {hf_token}"}
29
  # Initialize an empty chat history
30
  chat_history = []
31
 
32
- # New feature: Available voices
33
- AVAILABLE_VOICES = [
34
- "en-US-BrianMultilingualNeural",
35
- "en-US-JennyMultilingualNeural",
36
- "en-GB-RyanMultilingualNeural",
37
- "en-AU-NatashaNeural",
38
- "en-IN-PrabhatNeural"
39
- ]
40
-
41
- # New feature: Conversation log
42
- conversation_log = []
43
-
44
- async def text_to_speech_stream(text, voice, voice_volume=1.0):
45
  """Convert text to speech using edge_tts and return the audio file path."""
46
- communicate = edge_tts.Communicate(text, voice)
47
  audio_data = b""
48
 
49
  async for chunk in communicate.stream():
@@ -84,14 +70,14 @@ def whisper_speech_to_text(audio_path):
84
  logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
85
  return ""
86
 
87
- async def chat_with_ai(message, system_prompt):
88
  global chat_history
89
 
90
  chat_history.append({"role": "user", "content": message})
91
 
92
  try:
93
  response = chat_client.chat_completion(
94
- messages=[{"role": "system", "content": system_prompt}] + chat_history,
95
  max_tokens=800,
96
  temperature=0.7
97
  )
@@ -99,19 +85,14 @@ async def chat_with_ai(message, system_prompt):
99
  response_text = response.choices[0].message['content']
100
  chat_history.append({"role": "assistant", "content": response_text})
101
 
102
- # New feature: Add to conversation log
103
- conversation_log.append({
104
- "timestamp": datetime.now().isoformat(),
105
- "user": message,
106
- "assistant": response_text
107
- })
108
 
109
- return response_text
110
  except Exception as e:
111
  logging.error(f"Error in chat_with_ai: {e}")
112
- return str(e)
113
 
114
- def transcribe_and_chat(audio, system_prompt, selected_voice, voice_volume):
115
  if audio is None:
116
  return "Sorry, no audio was provided. Please try recording again.", None
117
 
@@ -119,15 +100,14 @@ def transcribe_and_chat(audio, system_prompt, selected_voice, voice_volume):
119
  if not text:
120
  return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
121
 
122
- response = asyncio.run(chat_with_ai(text, system_prompt))
123
- audio_path = asyncio.run(text_to_speech_stream(response, selected_voice, voice_volume))
124
  return response, audio_path
125
 
126
  def create_demo():
127
  with gr.Blocks() as demo:
128
  gr.Markdown(
129
  """
130
- # πŸ—£οΈ Enhanced AI Voice Assistant
131
  Welcome to your personal voice assistant! Simply record your voice, and I will respond with both text and speech. The assistant will automatically start listening after playing its response. Powered by advanced AI models.
132
  """
133
  )
@@ -137,17 +117,6 @@ def create_demo():
137
  audio_input = gr.Audio(type="filepath", label="🎀 Record your voice", elem_id="audio-input")
138
  clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
139
  voice_volume = gr.Slider(minimum=0, maximum=2, value=1, step=0.1, label="Voice Volume", elem_id="voice-volume")
140
-
141
- # New feature: Voice selection
142
- voice_dropdown = gr.Dropdown(choices=AVAILABLE_VOICES, value=AVAILABLE_VOICES[0], label="Select Voice", elem_id="voice-dropdown")
143
-
144
- # New feature: System prompt input
145
- system_prompt = gr.Textbox(
146
- label="System Prompt",
147
- placeholder="Enter a system prompt to guide the AI's behavior...",
148
- value="You are a helpful voice assistant. Provide concise and clear responses to user queries.",
149
- elem_id="system-prompt"
150
- )
151
 
152
  with gr.Column(scale=1):
153
  chat_output = gr.Textbox(label="πŸ’¬ AI Response", elem_id="chat-output", lines=5, interactive=False)
@@ -156,29 +125,20 @@ def create_demo():
156
  # Add some spacing and a divider
157
  gr.Markdown("---")
158
 
159
- # New feature: Export conversation log
160
- export_button = gr.Button("Export Conversation Log", elem_id="export-button")
161
-
162
  # Processing the audio input
163
- def process_audio(audio, system_prompt, selected_voice, volume):
164
  logging.info(f"Received audio: {audio}")
165
  if audio is None:
166
  return "No audio detected. Please try recording again.", None
167
- response, audio_path = transcribe_and_chat(audio, system_prompt, selected_voice, volume)
168
- logging.info(f"Response: {response}, Audio path: {audio_path}")
169
- return response, audio_path
 
 
170
 
171
- audio_input.change(process_audio, inputs=[audio_input, system_prompt, voice_dropdown, voice_volume], outputs=[chat_output, audio_output])
172
  clear_button.click(lambda: (None, None), None, [chat_output, audio_output])
173
 
174
- # New feature: Export conversation log function
175
- def export_log():
176
- with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
177
- json.dump(conversation_log, temp_file, indent=2)
178
- return temp_file.name
179
-
180
- export_button.click(export_log, inputs=None, outputs=gr.File(label="Download Conversation Log"))
181
-
182
  # JavaScript to handle autoplay, automatic submission, and auto-listen
183
  demo.load(None, js="""
184
  function() {
 
8
  import logging
9
  import io
10
  from pydub import AudioSegment
 
 
11
 
12
  # Set up logging
13
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
27
  # Initialize an empty chat history
28
  chat_history = []
29
 
30
+ async def text_to_speech_stream(text, voice_volume=1.0):
 
 
 
 
 
 
 
 
 
 
 
 
31
  """Convert text to speech using edge_tts and return the audio file path."""
32
+ communicate = edge_tts.Communicate(text, "en-US-BrianMultilingualNeural")
33
  audio_data = b""
34
 
35
  async for chunk in communicate.stream():
 
70
  logging.error(f"Unexpected error in whisper_speech_to_text: {e}")
71
  return ""
72
 
73
+ async def chat_with_ai(message):
74
  global chat_history
75
 
76
  chat_history.append({"role": "user", "content": message})
77
 
78
  try:
79
  response = chat_client.chat_completion(
80
+ messages=[{"role": "system", "content": "You are a helpful voice assistant. Provide concise and clear responses to user queries."}] + chat_history,
81
  max_tokens=800,
82
  temperature=0.7
83
  )
 
85
  response_text = response.choices[0].message['content']
86
  chat_history.append({"role": "assistant", "content": response_text})
87
 
88
+ audio_path = await text_to_speech_stream(response_text)
 
 
 
 
 
89
 
90
+ return response_text, audio_path
91
  except Exception as e:
92
  logging.error(f"Error in chat_with_ai: {e}")
93
+ return str(e), None
94
 
95
+ def transcribe_and_chat(audio):
96
  if audio is None:
97
  return "Sorry, no audio was provided. Please try recording again.", None
98
 
 
100
  if not text:
101
  return "Sorry, I couldn't understand the audio or there was an error in transcription. Please try again.", None
102
 
103
+ response, audio_path = asyncio.run(chat_with_ai(text))
 
104
  return response, audio_path
105
 
106
  def create_demo():
107
  with gr.Blocks() as demo:
108
  gr.Markdown(
109
  """
110
+ # πŸ—£οΈ AI Voice Assistant
111
  Welcome to your personal voice assistant! Simply record your voice, and I will respond with both text and speech. The assistant will automatically start listening after playing its response. Powered by advanced AI models.
112
  """
113
  )
 
117
  audio_input = gr.Audio(type="filepath", label="🎀 Record your voice", elem_id="audio-input")
118
  clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
119
  voice_volume = gr.Slider(minimum=0, maximum=2, value=1, step=0.1, label="Voice Volume", elem_id="voice-volume")
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  with gr.Column(scale=1):
122
  chat_output = gr.Textbox(label="πŸ’¬ AI Response", elem_id="chat-output", lines=5, interactive=False)
 
125
  # Add some spacing and a divider
126
  gr.Markdown("---")
127
 
 
 
 
128
  # Processing the audio input
129
+ def process_audio(audio, volume):
130
  logging.info(f"Received audio: {audio}")
131
  if audio is None:
132
  return "No audio detected. Please try recording again.", None
133
+ response, audio_path = transcribe_and_chat(audio)
134
+ # Adjust volume for the response audio
135
+ adjusted_audio_path = asyncio.run(text_to_speech_stream(response, volume))
136
+ logging.info(f"Response: {response}, Audio path: {adjusted_audio_path}")
137
+ return response, adjusted_audio_path
138
 
139
+ audio_input.change(process_audio, inputs=[audio_input, voice_volume], outputs=[chat_output, audio_output])
140
  clear_button.click(lambda: (None, None), None, [chat_output, audio_output])
141
 
 
 
 
 
 
 
 
 
142
  # JavaScript to handle autoplay, automatic submission, and auto-listen
143
  demo.load(None, js="""
144
  function() {