Pijush2023 commited on
Commit
193ef9a
·
verified ·
1 Parent(s): 2c773ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -271
app.py CHANGED
@@ -1,41 +1,19 @@
1
  import gradio as gr
2
  import os
3
  import logging
4
- from langchain_core.prompts import ChatPromptTemplate
5
- from langchain_core.output_parsers import StrOutputParser
6
- from langchain_openai import ChatOpenAI
7
- from langchain_community.graphs import Neo4jGraph
8
- from typing import List, Tuple
9
- from pydantic import BaseModel, Field
10
- from langchain_core.messages import AIMessage, HumanMessage
11
- from langchain_core.runnables import (
12
- RunnableBranch,
13
- RunnableLambda,
14
- RunnablePassthrough,
15
- RunnableParallel,
16
- )
17
- from langchain_core.prompts.prompt import PromptTemplate
18
  import requests
19
  import tempfile
20
- from langchain.memory import ConversationBufferWindowMemory
21
- import time
22
- import logging
23
- from langchain.chains import ConversationChain
24
  import torch
25
- import torchaudio
26
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
27
  import numpy as np
28
- import threading
29
-
30
-
31
- #code for history
32
- conversational_memory = ConversationBufferWindowMemory(
33
- memory_key='chat_history',
34
- k=10,
35
- return_messages=True
36
- )
37
 
38
- # Setup Neo4j
39
  graph = Neo4jGraph(
40
  url="neo4j+s://6457770f.databases.neo4j.io",
41
  username="neo4j",
@@ -72,9 +50,6 @@ def generate_full_text_query(input: str) -> str:
72
  full_text_query += f" {words[-1]}~2"
73
  return full_text_query.strip()
74
 
75
- # Setup logging to a file to capture debug information
76
- logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
77
-
78
  def structured_retriever(question: str) -> str:
79
  result = ""
80
  entities = entity_chain.invoke({"question": question})
@@ -98,77 +73,6 @@ def structured_retriever(question: str) -> str:
98
  result += "\n".join([el['output'] for el in response])
99
  return result
100
 
101
- def retriever_neo4j(question: str):
102
- structured_data = structured_retriever(question)
103
- logging.debug(f"Structured data: {structured_data}")
104
- return structured_data
105
-
106
- # Setup for condensing the follow-up questions
107
- _template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question,
108
- in its original language.
109
- Chat History:
110
- {chat_history}
111
- Follow Up Input: {question}
112
- Standalone question:"""
113
-
114
- CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
115
-
116
- def _format_chat_history(chat_history: list[tuple[str, str]]) -> list:
117
- buffer = []
118
- for human, ai in chat_history:
119
- buffer.append(HumanMessage(content=human))
120
- buffer.append(AIMessage(content=ai))
121
- return buffer
122
-
123
- _search_query = RunnableBranch(
124
- (
125
- RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
126
- run_name="HasChatHistoryCheck"
127
- ),
128
- RunnablePassthrough.assign(
129
- chat_history=lambda x: _format_chat_history(x["chat_history"])
130
- )
131
- | CONDENSE_QUESTION_PROMPT
132
- | ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
133
- | StrOutputParser(),
134
- ),
135
- RunnableLambda(lambda x: x["question"]),
136
- )
137
-
138
-
139
- template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
140
- Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational way without any Greet.
141
- {context}
142
- Question: {question}
143
- Answer:"""
144
-
145
-
146
- qa_prompt = ChatPromptTemplate.from_template(template)
147
-
148
- # Define the chain for Neo4j-based retrieval and response generation
149
- chain_neo4j = (
150
- RunnableParallel(
151
- {
152
- "context": _search_query | retriever_neo4j,
153
- "question": RunnablePassthrough(),
154
- }
155
- )
156
- | qa_prompt
157
- | chat_model
158
- | StrOutputParser()
159
- )
160
-
161
- # Define the function to get the response
162
- def get_response(question):
163
- try:
164
- return chain_neo4j.invoke({"question": question})
165
- except Exception as e:
166
- return f"Error: {str(e)}"
167
-
168
- # Define the function to clear input and output
169
- def clear_fields():
170
- return [],"",None
171
-
172
  # Function to generate audio with Eleven Labs TTS
173
  def generate_audio_elevenlabs(text):
174
  XI_API_KEY = os.environ['ELEVENLABS_API']
@@ -195,79 +99,11 @@ def generate_audio_elevenlabs(text):
195
  if chunk:
196
  f.write(chunk)
197
  audio_path = f.name
198
- logging.debug(f"Audio saved to {audio_path}")
199
  return audio_path # Return audio path for automatic playback
200
  else:
201
  logging.error(f"Error generating audio: {response.text}")
202
  return None
203
 
204
- def handle_mode_selection(mode, chat_history, question):
205
- if mode == "Normal Chatbot":
206
- # Normal chatbot mode: Show the response in the chatbot output
207
- response = get_response(question)
208
- chat_history.append((question, response))
209
- return chat_history, "", None
210
-
211
- elif mode == "Voice to Voice Conversation":
212
- # Voice to Voice mode: Generate the response using Eleven Labs and return audio without showing text
213
- response = get_response(question) # Get the response text (can be omitted if not needed for debugging)
214
- audio_path = generate_audio_elevenlabs(response) # Convert the response to audio
215
- #chat_history.append((question, "[Voice Response]")) # Log that a voice response was generated (optional)
216
- chat_history.append(("[Voice Input]", "[Voice Response]"))
217
- return chat_history, "", audio_path
218
-
219
-
220
- # Function to add a user's message to the chat history and clear the input box
221
- def add_message(history, message):
222
- if message.strip():
223
- history.append((message, "")) # Add the user's message to the chat history only if it's not empty
224
- return history, "" # Clear the input box
225
-
226
- # Define function to generate a streaming response
227
- def chat_with_bot(messages):
228
- user_message = messages[-1][0] # Get the last user message (input)
229
- messages[-1] = (user_message, "") # Prepare the placeholder for the bot's response
230
-
231
- response = get_response(user_message)
232
-
233
-
234
-
235
- # Simulate streaming response by iterating over each character in the response
236
- for character in response:
237
- messages[-1] = (user_message, messages[-1][1] + character)
238
- yield messages # Stream each character
239
- time.sleep(0.05) # Adjust delay as needed for real-time effect
240
-
241
- yield messages # Final yield to ensure the full response is displayed
242
-
243
-
244
- # Function to generate audio with Eleven Labs TTS from the last bot response
245
- def generate_audio_from_last_response(history):
246
- # Get the most recent bot response from the chat history
247
- if history and len(history) > 0:
248
- recent_response = history[-1][1] # The second item in the tuple is the bot response text
249
- if recent_response:
250
- return generate_audio_elevenlabs(recent_response)
251
- return None
252
-
253
- # Define example prompts
254
- examples = [
255
- ["What are some popular events in Birmingham?"],
256
- ["Who are the top players of the Crimson Tide?"],
257
- ["Where can I find a hamburger?"],
258
- ["What are some popular tourist attractions in Birmingham?"],
259
- ["What are some good clubs in Birmingham?"],
260
- ["Is there a farmer's market or craft fair in Birmingham, Alabama?"],
261
- ["Are there any special holiday events or parades in Birmingham, Alabama, during December?"],
262
- ["What are the best places to enjoy live music in Birmingham, Alabama?"]
263
-
264
- ]
265
-
266
- # Function to insert the prompt into the textbox when clicked
267
- def insert_prompt(current_text, prompt):
268
- return prompt[0] if prompt else current_text
269
-
270
-
271
  # Define the ASR model with Whisper
272
  model_id = 'openai/whisper-large-v3'
273
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -288,137 +124,72 @@ pipe_asr = pipeline(
288
  return_timestamps=True
289
  )
290
 
291
- # Define the function to reset the state after 10 seconds
292
- def auto_reset_state():
293
- time.sleep(5)
294
- return None, "" # Reset the state and clear input text
295
-
296
-
297
- def transcribe_function(stream, new_chunk):
298
  try:
299
  sr, y = new_chunk[0], new_chunk[1]
300
  except TypeError:
301
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
302
  return stream, "", None
303
 
304
- # Ensure y is not empty and is at least 1-dimensional
305
- if y is None or len(y) == 0:
306
- return stream, "", None
307
-
308
  y = y.astype(np.float32)
309
  max_abs_y = np.max(np.abs(y))
310
  if max_abs_y > 0:
311
  y = y / max_abs_y
312
 
313
- # Ensure stream is also at least 1-dimensional before concatenation
314
  if stream is not None and len(stream) > 0:
315
  stream = np.concatenate([stream, y])
316
  else:
317
  stream = y
318
 
319
- # Process the audio data for transcription
320
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
321
- full_text = result.get("text", "")
322
-
323
- # Start a thread to reset the state after 10 seconds
324
- threading.Thread(target=auto_reset_state).start()
325
-
326
- return stream, full_text, full_text
327
 
 
 
 
 
 
328
 
 
329
 
330
- # Define the function to clear the state and input text
331
  def clear_transcription_state():
332
- return None, ""
333
-
334
-
335
 
 
336
  with gr.Blocks(theme="rawrsor1/Everforest") as demo:
337
- chatbot = gr.Chatbot([], elem_id="RADAR", bubble_full_width=False)
338
- with gr.Row():
339
- with gr.Column():
340
- mode_selection = gr.Radio(
341
- choices=["Normal Chatbot", "Voice to Voice Conversation"],
342
- label="Mode Selection",
343
- value="Normal Chatbot"
344
- )
345
- with gr.Row():
346
- with gr.Column():
347
- question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
348
- audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1, label="Speak to Ask")
349
- submit_voice_btn = gr.Button("Submit Voice")
350
-
351
- with gr.Column():
352
- audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
353
-
354
- with gr.Row():
355
- with gr.Column():
356
- get_response_btn = gr.Button("Get Response")
357
- with gr.Column():
358
- clear_state_btn = gr.Button("Clear State")
359
- with gr.Column():
360
- generate_audio_btn = gr.Button("Generate Audio")
361
- with gr.Column():
362
- clean_btn = gr.Button("Clean")
363
-
364
  with gr.Row():
365
- with gr.Column():
366
- gr.Markdown("<h1 style='color: red;'>Example Prompts</h1>", elem_id="Example-Prompts")
367
- gr.Examples(examples=examples, fn=insert_prompt, inputs=question_input, outputs=question_input, api_name="api_insert_example")
368
-
369
-
370
- # Define interactions for the Get Response button
371
- get_response_btn.click(
372
- fn=handle_mode_selection,
373
- inputs=[mode_selection, chatbot, question_input],
374
- outputs=[chatbot, question_input, audio_output],
375
- api_name="api_add_message_on_button_click"
376
- ).then(fn=chat_with_bot, inputs=[chatbot], outputs=chatbot,api_name="api_ask_retriever_on_button_click")
377
-
378
- question_input.submit(
379
- fn=handle_mode_selection,
380
- inputs=[mode_selection, chatbot, question_input],
381
- outputs=[chatbot, question_input, audio_output],
382
- api_name="api_add_message_on_enter"
383
- ).then(fn=chat_with_bot, inputs=[chatbot], outputs=chatbot,api_name="api_ask_retriever_on_enter")
384
-
385
 
386
- submit_voice_btn.click(
387
- fn=handle_mode_selection,
388
- inputs=[mode_selection, chatbot, question_input],
389
- outputs=[chatbot, question_input, audio_output],
390
- api_name="api_voice_to_voice_translation"
391
- )
392
-
393
- # Speech-to-Text functionality
394
  state = gr.State()
395
  audio_input.stream(
396
- transcribe_function,
397
  inputs=[state, audio_input],
398
- outputs=[state, question_input],
399
- api_name="api_voice_to_text"
400
- )
401
-
402
- generate_audio_btn.click(
403
- fn=generate_audio_from_last_response,
404
- inputs=chatbot,
405
- outputs=audio_output,
406
- api_name="api_generate_text_to_audio"
407
- )
408
-
409
- clean_btn.click(
410
- fn=clear_fields,
411
- inputs=[],
412
- outputs=[chatbot, question_input, audio_output],
413
- api_name="api_clear_textbox"
414
  )
415
 
416
  # Clear state interaction
417
- clear_state_btn.click(
418
  fn=clear_transcription_state,
419
- outputs=[question_input, state],
420
- api_name="api_clean_state_transcription"
421
  )
422
 
423
  # Launch the Gradio interface
424
- demo.launch(show_error=True,share=True)
 
1
  import gradio as gr
2
  import os
3
  import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import requests
5
  import tempfile
 
 
 
 
6
  import torch
 
 
7
  import numpy as np
8
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
9
+ from langchain_community.graphs import Neo4jGraph
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain_openai import ChatOpenAI
12
+ from pydantic import BaseModel, Field
13
+ from typing import List
14
+ import time
 
 
15
 
16
+ # Neo4j Setup
17
  graph = Neo4jGraph(
18
  url="neo4j+s://6457770f.databases.neo4j.io",
19
  username="neo4j",
 
50
  full_text_query += f" {words[-1]}~2"
51
  return full_text_query.strip()
52
 
 
 
 
53
  def structured_retriever(question: str) -> str:
54
  result = ""
55
  entities = entity_chain.invoke({"question": question})
 
73
  result += "\n".join([el['output'] for el in response])
74
  return result
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # Function to generate audio with Eleven Labs TTS
77
  def generate_audio_elevenlabs(text):
78
  XI_API_KEY = os.environ['ELEVENLABS_API']
 
99
  if chunk:
100
  f.write(chunk)
101
  audio_path = f.name
 
102
  return audio_path # Return audio path for automatic playback
103
  else:
104
  logging.error(f"Error generating audio: {response.text}")
105
  return None
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # Define the ASR model with Whisper
108
  model_id = 'openai/whisper-large-v3'
109
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
124
  return_timestamps=True
125
  )
126
 
127
+ # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
128
+ def transcribe_and_respond(stream, new_chunk):
 
 
 
 
 
129
  try:
130
  sr, y = new_chunk[0], new_chunk[1]
131
  except TypeError:
132
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
133
  return stream, "", None
134
 
 
 
 
 
135
  y = y.astype(np.float32)
136
  max_abs_y = np.max(np.abs(y))
137
  if max_abs_y > 0:
138
  y = y / max_abs_y
139
 
 
140
  if stream is not None and len(stream) > 0:
141
  stream = np.concatenate([stream, y])
142
  else:
143
  stream = y
144
 
145
+ # Transcribe the audio using Whisper
146
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
147
+ question = result.get("text", "")
 
 
 
 
 
148
 
149
+ # Retrieve information from Neo4j
150
+ response_text = structured_retriever(question) if question else "I didn't understand the question."
151
+
152
+ # Convert the response to audio using Eleven Labs TTS
153
+ audio_path = generate_audio_elevenlabs(response_text) if response_text else None
154
 
155
+ return stream, question, audio_path
156
 
157
+ # Function to clear the transcription state
158
  def clear_transcription_state():
159
+ return None, "", None
 
 
160
 
161
+ # Define the Gradio interface with only audio input and output
162
  with gr.Blocks(theme="rawrsor1/Everforest") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  with gr.Row():
164
+ audio_input = gr.Audio(
165
+ sources=["microphone"],
166
+ streaming=True,
167
+ type='numpy',
168
+ every=0.1,
169
+ label="Speak to Ask"
170
+ )
171
+ audio_output = gr.Audio(
172
+ label="Audio Response",
173
+ type="filepath",
174
+ autoplay=True,
175
+ interactive=False
176
+ )
 
 
 
 
 
 
 
177
 
178
+ # Speech-to-Text to TTS functionality with Neo4j retrieval
 
 
 
 
 
 
 
179
  state = gr.State()
180
  audio_input.stream(
181
+ transcribe_and_respond,
182
  inputs=[state, audio_input],
183
+ outputs=[state, audio_output],
184
+ api_name="api_voice_to_neo4j_response"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  )
186
 
187
  # Clear state interaction
188
+ gr.Button("Clear State").click(
189
  fn=clear_transcription_state,
190
+ outputs=[state, audio_output],
191
+ api_name="api_clean_state"
192
  )
193
 
194
  # Launch the Gradio interface
195
+ demo.launch(show_error=True, share=True)