Pijush2023 commited on
Commit
4f1f18a
·
verified ·
1 Parent(s): 7327bc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -286
app.py CHANGED
@@ -5,37 +5,18 @@ from langchain_core.prompts import ChatPromptTemplate
5
  from langchain_core.output_parsers import StrOutputParser
6
  from langchain_openai import ChatOpenAI
7
  from langchain_community.graphs import Neo4jGraph
8
- from typing import List, Tuple
9
  from pydantic import BaseModel, Field
10
- from langchain_core.messages import AIMessage, HumanMessage
11
- from langchain_core.runnables import (
12
- RunnableBranch,
13
- RunnableLambda,
14
- RunnablePassthrough,
15
- RunnableParallel,
16
- )
17
- from langchain_core.prompts.prompt import PromptTemplate
18
  import requests
19
  import tempfile
20
- from langchain.memory import ConversationBufferWindowMemory
21
- import time
22
- import logging
23
- from langchain.chains import ConversationChain
24
  import torch
25
- import torchaudio
26
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
27
  import numpy as np
28
- import threading
29
 
 
 
30
 
31
- #code for history
32
- conversational_memory = ConversationBufferWindowMemory(
33
- memory_key='chat_history',
34
- k=10,
35
- return_messages=True
36
- )
37
-
38
- # Setup Neo4j
39
  graph = Neo4jGraph(
40
  url="neo4j+s://c62d0d35.databases.neo4j.io",
41
  username="neo4j",
@@ -48,12 +29,12 @@ class Entities(BaseModel):
48
  ..., description="All the person, organization, or business entities that appear in the text"
49
  )
50
 
 
 
51
  entity_prompt = ChatPromptTemplate.from_messages([
52
  ("system", "You are extracting organization and person entities from the text."),
53
  ("human", "Use the given format to extract information from the following input: {question}"),
54
  ])
55
-
56
- chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
57
  entity_chain = entity_prompt | chat_model.with_structured_output(Entities)
58
 
59
  def remove_lucene_chars(input: str) -> str:
@@ -72,10 +53,7 @@ def generate_full_text_query(input: str) -> str:
72
  full_text_query += f" {words[-1]}~2"
73
  return full_text_query.strip()
74
 
75
- # Setup logging to a file to capture debug information
76
- logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
77
-
78
- def structured_retriever(question: str) -> str:
79
  result = ""
80
  entities = entity_chain.invoke({"question": question})
81
  for entity in entities.names:
@@ -86,10 +64,6 @@ def structured_retriever(question: str) -> str:
86
  WITH node
87
  MATCH (node)-[r:!MENTIONS]->(neighbor)
88
  RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
89
- UNION ALL
90
- WITH node
91
- MATCH (node)<-[r:!MENTIONS]-(neighbor)
92
- RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
93
  }
94
  RETURN output LIMIT 50
95
  """,
@@ -98,164 +72,23 @@ def structured_retriever(question: str) -> str:
98
  result += "\n".join([el['output'] for el in response])
99
  return result
100
 
101
- def retriever_neo4j(question: str):
102
- structured_data = structured_retriever(question)
103
- logging.debug(f"Structured data: {structured_data}")
104
- return structured_data
105
-
106
- # Setup for condensing the follow-up questions
107
- _template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question,
108
- in its original language.
109
- Chat History:
110
- {chat_history}
111
- Follow Up Input: {question}
112
- Standalone question:"""
113
-
114
- CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
115
-
116
- def _format_chat_history(chat_history: list[tuple[str, str]]) -> list:
117
- buffer = []
118
- for human, ai in chat_history:
119
- buffer.append(HumanMessage(content=human))
120
- buffer.append(AIMessage(content=ai))
121
- return buffer
122
-
123
- _search_query = RunnableBranch(
124
- (
125
- RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
126
- run_name="HasChatHistoryCheck"
127
- ),
128
- RunnablePassthrough.assign(
129
- chat_history=lambda x: _format_chat_history(x["chat_history"])
130
- )
131
- | CONDENSE_QUESTION_PROMPT
132
- | ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
133
- | StrOutputParser(),
134
- ),
135
- RunnableLambda(lambda x: x["question"]),
136
- )
137
-
138
-
139
- template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
140
- Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational way without any Greet.
141
- {context}
142
- Question: {question}
143
- Answer:"""
144
-
145
-
146
- qa_prompt = ChatPromptTemplate.from_template(template)
147
-
148
- # Define the chain for Neo4j-based retrieval and response generation
149
- chain_neo4j = (
150
- RunnableParallel(
151
- {
152
- "context": _search_query | retriever_neo4j,
153
- "question": RunnablePassthrough(),
154
- }
155
- )
156
- | qa_prompt
157
- | chat_model
158
- | StrOutputParser()
159
- )
160
-
161
- # Define the function to get the response
162
- def get_response(question):
163
- try:
164
- return chain_neo4j.invoke({"question": question})
165
- except Exception as e:
166
- return f"Error: {str(e)}"
167
-
168
- # Define the function to clear input and output
169
- def clear_fields():
170
- return [],"",None
171
-
172
  # Function to generate audio with Eleven Labs TTS
173
  def generate_audio_elevenlabs(text):
174
  XI_API_KEY = os.environ['ELEVENLABS_API']
175
  VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
176
  tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
177
- headers = {
178
- "Accept": "application/json",
179
- "xi-api-key": XI_API_KEY
180
- }
181
- data = {
182
- "text": str(text),
183
- "model_id": "eleven_multilingual_v2",
184
- "voice_settings": {
185
- "stability": 1.0,
186
- "similarity_boost": 0.0,
187
- "style": 0.60,
188
- "use_speaker_boost": False
189
- }
190
- }
191
  response = requests.post(tts_url, headers=headers, json=data, stream=True)
192
  if response.ok:
193
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
194
  for chunk in response.iter_content(chunk_size=1024):
195
  if chunk:
196
  f.write(chunk)
197
- audio_path = f.name
198
- logging.debug(f"Audio saved to {audio_path}")
199
- return audio_path # Return audio path for automatic playback
200
- else:
201
- logging.error(f"Error generating audio: {response.text}")
202
- return None
203
-
204
-
205
-
206
- def handle_mode_selection(mode, chat_history, question):
207
- if mode == "Normal Chatbot":
208
- # Append the user's question to chat history first
209
- chat_history.append((question, "")) # Placeholder for the bot's response
210
-
211
- # Stream the response and update chat history with each chunk
212
- for response_chunk in chat_with_bot(chat_history):
213
- chat_history[-1] = (question, response_chunk[-1][1]) # Update last entry with streamed response
214
- yield chat_history, "", None # Stream each chunk to display in the chatbot
215
- yield chat_history, "", None # Final yield to complete the response
216
-
217
- elif mode == "Voice to Voice Conversation":
218
- # Voice to Voice mode: Stream the response text and then convert it to audio
219
- response_text = get_response(question) # Retrieve response text
220
- audio_path = generate_audio_elevenlabs(response_text) # Convert response to audio
221
- yield [], "", audio_path # Only output the audio response without updating chatbot history
222
-
223
-
224
- # Function to add a user's message to the chat history and clear the input box
225
- def add_message(history, message):
226
- if message.strip():
227
- history.append((message, "")) # Add the user's message to the chat history only if it's not empty
228
- return history, "" # Clear the input box
229
-
230
- # Define function to generate a streaming response
231
- def chat_with_bot(messages):
232
- user_message = messages[-1][0] # Get the last user message (input)
233
- messages[-1] = (user_message, "") # Prepare a placeholder for the bot's response
234
-
235
- response = get_response(user_message) # Assume `get_response` is a generator function
236
-
237
- # Stream each character in the response and update the history progressively
238
- for character in response:
239
- messages[-1] = (user_message, messages[-1][1] + character)
240
- yield messages # Stream each updated chunk
241
- time.sleep(0.05) # Adjust delay as needed for real-time effect
242
-
243
- yield messages # Final yield to complete the response
244
-
245
-
246
-
247
- # Function to generate audio with Eleven Labs TTS from the last bot response
248
- def generate_audio_from_last_response(history):
249
- # Get the most recent bot response from the chat history
250
- if history and len(history) > 0:
251
- recent_response = history[-1][1] # The second item in the tuple is the bot response text
252
- if recent_response:
253
- return generate_audio_elevenlabs(recent_response)
254
  return None
255
 
256
-
257
-
258
- # Define the ASR model with Whisper
259
  model_id = 'openai/whisper-large-v3'
260
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
261
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -271,120 +104,37 @@ pipe_asr = pipeline(
271
  chunk_length_s=15,
272
  batch_size=16,
273
  torch_dtype=torch_dtype,
274
- device=device,
275
- return_timestamps=True
276
  )
277
 
278
- # Define the function to reset the state after 10 seconds
279
- def auto_reset_state():
280
- time.sleep(5)
281
- return None, "" # Reset the state and clear input text
282
-
283
-
284
- def transcribe_function(stream, new_chunk):
285
- try:
286
- sr, y = new_chunk[0], new_chunk[1]
287
- except TypeError:
288
- print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
289
- return stream, "", None
290
-
291
- # Ensure y is not empty and is at least 1-dimensional
292
- if y is None or len(y) == 0:
293
- return stream, "", None
294
-
295
- y = y.astype(np.float32)
296
- max_abs_y = np.max(np.abs(y))
297
- if max_abs_y > 0:
298
- y = y / max_abs_y
299
-
300
- # Ensure stream is also at least 1-dimensional before concatenation
301
- if stream is not None and len(stream) > 0:
302
- stream = np.concatenate([stream, y])
303
- else:
304
- stream = y
305
 
306
- # Process the audio data for transcription
307
- result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
308
- full_text = result.get("text", "")
309
 
310
- # Start a thread to reset the state after 10 seconds
311
- threading.Thread(target=auto_reset_state).start()
312
 
313
- return stream, full_text, full_text
314
-
315
-
316
-
317
- # Define the function to clear the state and input text
318
- def clear_transcription_state():
319
- return None, ""
320
-
321
-
322
-
323
- with gr.Blocks(theme="rawrsor1/Everforest") as demo:
324
- # Hide the chatbot component by setting `visible=False`
325
- chatbot = gr.Chatbot([], elem_id="RADAR", bubble_full_width=False, visible=False)
326
-
327
- with gr.Row():
328
- with gr.Column():
329
- # Hide the "Normal Chatbot" radio button by removing it or setting `visible=False`
330
- mode_selection = gr.Radio(
331
- choices=["Voice to Voice Conversation"], # Removed "Normal Chatbot" option
332
- label="Mode Selection",
333
- value="Voice to Voice Conversation",
334
- visible=False # Hide the mode selection entirely
335
- )
336
- # Remaining code unchanged
337
- with gr.Row():
338
- with gr.Column():
339
- question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...",visible=False)
340
- audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1, label="Speak to Ask")
341
- submit_voice_btn = gr.Button("Submit Voice")
342
-
343
- with gr.Column():
344
- audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
345
 
346
- with gr.Row():
347
-
348
- with gr.Column():
349
- clear_state_btn = gr.Button("Clear State")
350
-
351
- with gr.Column():
352
- clean_btn = gr.Button("Clean")
353
-
354
-
355
-
356
- # Adjust the interactions for the Get Response button
357
- submit_voice_btn.click(
358
- fn=handle_mode_selection,
359
- inputs=[mode_selection, chatbot, question_input],
360
- outputs=[chatbot, question_input, audio_output],
361
- api_name="api_voice_to_voice_translation"
362
- )
363
-
364
- # Speech-to-Text functionality
365
- state = gr.State()
366
- audio_input.stream(
367
- transcribe_function,
368
- inputs=[state, audio_input],
369
- outputs=[state, question_input],
370
- api_name="api_voice_to_text"
371
- )
372
 
373
-
374
-
375
- clean_btn.click(
376
- fn=clear_fields,
377
- inputs=[],
378
- outputs=[chatbot, question_input, audio_output],
379
- api_name="api_clear_textbox"
380
- )
381
-
382
- # Clear state interaction
383
- clear_state_btn.click(
384
- fn=clear_transcription_state,
385
- outputs=[question_input, state],
386
- api_name="api_clean_state_transcription"
387
  )
388
 
389
- # Launch the Gradio interface
390
  demo.launch(show_error=True, share=True)
 
5
  from langchain_core.output_parsers import StrOutputParser
6
  from langchain_openai import ChatOpenAI
7
  from langchain_community.graphs import Neo4jGraph
8
+ from typing import List
9
  from pydantic import BaseModel, Field
10
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 
 
 
 
 
 
 
11
  import requests
12
  import tempfile
 
 
 
 
13
  import torch
 
 
14
  import numpy as np
 
15
 
16
+ # Setup logging to a file to capture debug information
17
+ logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
18
 
19
+ # Setup Neo4j connection
 
 
 
 
 
 
 
20
  graph = Neo4jGraph(
21
  url="neo4j+s://c62d0d35.databases.neo4j.io",
22
  username="neo4j",
 
29
  ..., description="All the person, organization, or business entities that appear in the text"
30
  )
31
 
32
+ # Define prompt and model for entity extraction
33
+ chat_model = ChatOpenAI(temperature=0, model_name="gpt-4", api_key=os.environ['OPENAI_API_KEY'])
34
  entity_prompt = ChatPromptTemplate.from_messages([
35
  ("system", "You are extracting organization and person entities from the text."),
36
  ("human", "Use the given format to extract information from the following input: {question}"),
37
  ])
 
 
38
  entity_chain = entity_prompt | chat_model.with_structured_output(Entities)
39
 
40
  def remove_lucene_chars(input: str) -> str:
 
53
  full_text_query += f" {words[-1]}~2"
54
  return full_text_query.strip()
55
 
56
+ def retrieve_data_from_neo4j(question: str) -> str:
 
 
 
57
  result = ""
58
  entities = entity_chain.invoke({"question": question})
59
  for entity in entities.names:
 
64
  WITH node
65
  MATCH (node)-[r:!MENTIONS]->(neighbor)
66
  RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
 
 
 
 
67
  }
68
  RETURN output LIMIT 50
69
  """,
 
72
  result += "\n".join([el['output'] for el in response])
73
  return result
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # Function to generate audio with Eleven Labs TTS
76
  def generate_audio_elevenlabs(text):
77
  XI_API_KEY = os.environ['ELEVENLABS_API']
78
  VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
79
  tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
80
+ headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY}
81
+ data = {"text": str(text), "model_id": "eleven_multilingual_v2", "voice_settings": {"stability": 1.0}}
 
 
 
 
 
 
 
 
 
 
 
 
82
  response = requests.post(tts_url, headers=headers, json=data, stream=True)
83
  if response.ok:
84
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
85
  for chunk in response.iter_content(chunk_size=1024):
86
  if chunk:
87
  f.write(chunk)
88
+ return f.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return None
90
 
91
+ # ASR model setup using Whisper
 
 
92
  model_id = 'openai/whisper-large-v3'
93
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
94
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 
104
  chunk_length_s=15,
105
  batch_size=16,
106
  torch_dtype=torch_dtype,
107
+ device=device
 
108
  )
109
 
110
+ # Function to handle audio input, transcription, and Neo4j response generation
111
+ def transcribe_and_respond(audio):
112
+ # Transcribe audio input
113
+ audio_data = {"array": audio["data"], "sampling_rate": audio["sample_rate"]}
114
+ transcription = pipe_asr(audio_data)["text"]
115
+ logging.debug(f"Transcription: {transcription}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # Retrieve data from Neo4j based on transcription
118
+ response_text = retrieve_data_from_neo4j(transcription)
119
+ logging.debug(f"Neo4j Response: {response_text}")
120
 
121
+ # Convert response to audio
122
+ return generate_audio_elevenlabs(response_text)
123
 
124
+ # Define Gradio interface
125
+ with gr.Blocks() as demo:
126
+ audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Ask") # Removed streaming mode for manual submission
127
+ audio_output = gr.Audio(label="Response", type="filepath", autoplay=True, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ # "Submit Audio" button
130
+ submit_button = gr.Button("Submit Audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
+ # Link the button to trigger response generation after clicking
133
+ submit_button.click(
134
+ fn=transcribe_and_respond,
135
+ inputs=audio_input,
136
+ outputs=audio_output
 
 
 
 
 
 
 
 
 
137
  )
138
 
139
+ # Launch Gradio interface
140
  demo.launch(show_error=True, share=True)