Pijush2023 commited on
Commit
7f2393e
·
verified ·
1 Parent(s): 17afe2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -121
app.py CHANGED
@@ -8,10 +8,21 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
8
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
9
  from langchain_community.vectorstores import Neo4jVector
10
  from langchain_community.graphs import Neo4jGraph
11
- from langchain_experimental.graph_transformers import LLMGraphTransformer
12
  from langchain_core.prompts import ChatPromptTemplate
13
  import time
14
  import os
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Neo4j setup
17
  graph = Neo4jGraph(
@@ -53,136 +64,63 @@ pipe_asr = pipeline(
53
  # Function to reset the state after 10 seconds
54
  def auto_reset_state():
55
  time.sleep(2)
56
- return None, "" # Reset the state and clear input text
57
-
58
 
59
  # Function to process audio input and transcribe it
60
- def transcribe_function(stream, new_chunk):
61
  try:
62
  sr, y = new_chunk[0], new_chunk[1]
63
  except TypeError:
64
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
65
- return stream, "", None
66
 
67
- # Ensure y is not empty and is at least 1-dimensional
68
  if y is None or len(y) == 0:
69
- return stream, "", None
70
 
71
  y = y.astype(np.float32)
72
  max_abs_y = np.max(np.abs(y))
73
  if max_abs_y > 0:
74
  y = y / max_abs_y
75
 
76
- # Ensure stream is also at least 1-dimensional before concatenation
77
- if stream is not None and len(stream) > 0:
78
- stream = np.concatenate([stream, y])
79
  else:
80
- stream = y
81
 
82
- # Process the audio data for transcription
83
- result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
84
  full_text = result.get("text", "")
85
 
86
- # Start a thread to reset the state after 10 seconds
87
  threading.Thread(target=auto_reset_state).start()
 
88
 
89
- return stream, full_text, full_text
90
-
91
-
92
-
93
- # Function to generate a full-text search query for Neo4j
94
- #def generate_full_text_query(input: str) -> str:
95
- #full_text_query = ""
96
- #words = [el for el in input.split() if el]
97
- #for word in words[:-1]:
98
- #full_text_query += f" {word}~2 AND"
99
- #full_text_query += f" {words[-1]}~2"
100
- #return full_text_query.strip()
101
-
102
-
103
- # Function to generate a full-text search query for Neo4j
104
- def generate_full_text_query(input: str) -> str:
105
- # Split the input into words, ignoring any empty strings
106
- words = [el for el in input.split() if el]
107
-
108
- # Check if there are no words
109
- if not words:
110
- return "" # Return an empty string or a default query if desired
111
-
112
- # Create the full-text query with fuzziness (~2 for proximity search)
113
- full_text_query = ""
114
- for word in words[:-1]:
115
- full_text_query += f" {word}~2 AND"
116
- full_text_query += f" {words[-1]}~2"
117
- return full_text_query.strip()
118
-
119
-
120
 
121
  # Function to generate audio with Eleven Labs TTS
122
  def generate_audio_elevenlabs(text):
123
  XI_API_KEY = os.environ['ELEVENLABS_API']
124
  VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
125
  tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
126
- headers = {
127
- "Accept": "application/json",
128
- "xi-api-key": XI_API_KEY
129
- }
130
- data = {
131
- "text": str(text),
132
- "model_id": "eleven_multilingual_v2",
133
- "voice_settings": {
134
- "stability": 1.0,
135
- "similarity_boost": 0.0,
136
- "style": 0.60,
137
- "use_speaker_boost": False
138
- }
139
- }
140
  response = requests.post(tts_url, headers=headers, json=data, stream=True)
141
  if response.ok:
142
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
143
  for chunk in response.iter_content(chunk_size=1024):
144
- if chunk:
145
- f.write(chunk)
146
- audio_path = f.name
147
- return audio_path # Return audio path for automatic playback
148
  else:
149
  print(f"Error generating audio: {response.text}")
150
  return None
151
 
152
- # Define the template for generating responses based on context
153
- template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
154
- Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational and straight-foreward way without any Greet.
155
- Context:
156
- {context}
157
-
158
- Question: {question}
159
- Answer concisely:"""
160
-
161
- # Create a prompt object using the template
162
- prompt = ChatPromptTemplate.from_template(template)
163
-
164
- # Function to generate a response using the prompt and the context
165
- def generate_response_with_prompt(context, question):
166
- formatted_prompt = prompt.format(
167
- context=context,
168
- question=question
169
- )
170
- # Use the ChatOpenAI instance to generate a response directly from the formatted prompt
171
- llm = ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
172
- response = llm(formatted_prompt)
173
- return response.content.strip()
174
-
175
-
176
-
177
- # Function to reset the state
178
- def reset_state():
179
- return None, "" # Reset the state and clear input text
180
-
181
- # Define the function to generate a hybrid response using Neo4j and other retrieval methods
182
  def retriever(question: str):
183
- # Structured data retrieval from Neo4j
184
- structured_query = f"""
185
- CALL db.index.fulltext.queryNodes('entity', $query, {{limit: 2}})
186
  YIELD node, score
187
  RETURN node.id AS entity, node.text AS context, score
188
  ORDER BY score DESC
@@ -191,44 +129,27 @@ def retriever(question: str):
191
  structured_data = graph.query(structured_query, {"query": generate_full_text_query(question)})
192
  structured_response = "\n".join([f"{record['entity']}: {record['context']}" for record in structured_data])
193
 
194
- # Unstructured data retrieval from vector store
195
  unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
196
  unstructured_response = "\n".join(unstructured_data)
197
 
198
- # Combine structured and unstructured responses
199
  combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
200
-
201
- # Generate the final response using the prompt template
202
- final_response = generate_response_with_prompt(combined_context, question)
203
- return final_response
204
-
205
 
206
  # Function to handle the entire audio query and response process
207
- def process_audio_query(audio_input):
208
- stream = None
209
- _, transcription, _ = transcribe_function(stream, audio_input)
210
- print(f"Transcription: {transcription}")
211
-
212
- # Retrieve hybrid response using Neo4j and other methods
213
  response_text = retriever(transcription)
214
- print(f"Response: {response_text}")
215
-
216
- # Generate audio from the response text
217
  audio_path = generate_audio_elevenlabs(response_text)
218
- return audio_path
219
-
220
- # Function to handle submit button click
221
- def on_submit(audio_input):
222
- return process_audio_query(audio_input)
223
 
224
- # Create Gradio interface for audio input, submit button, and output
225
  with gr.Blocks() as interface:
226
- audio_input = gr.Audio(sources="microphone", type="numpy", streaming=True,every=0.1)
227
  submit_button = gr.Button("Submit")
228
  audio_output = gr.Audio(type="filepath", autoplay=True)
 
229
 
230
- submit_button.click(fn=on_submit, inputs=audio_input, outputs=audio_output)
231
 
232
  # Launch the Gradio app
233
  interface.launch()
234
-
 
8
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
9
  from langchain_community.vectorstores import Neo4jVector
10
  from langchain_community.graphs import Neo4jGraph
 
11
  from langchain_core.prompts import ChatPromptTemplate
12
  import time
13
  import os
14
+ import io
15
+ from pydub import AudioSegment
16
+ from dataclasses import dataclass
17
+
18
+ # Define AppState dataclass for managing the application's state
19
+ @dataclass
20
+ class AppState:
21
+ stream: np.ndarray | None = None
22
+ sampling_rate: int = 0
23
+ pause_detected: bool = False
24
+ stopped: bool = False
25
+ conversation: list = []
26
 
27
  # Neo4j setup
28
  graph = Neo4jGraph(
 
64
  # Function to reset the state after 10 seconds
65
  def auto_reset_state():
66
  time.sleep(2)
67
+ return AppState() # Reset the state
 
68
 
69
  # Function to process audio input and transcribe it
70
+ def transcribe_function(state: AppState, new_chunk):
71
  try:
72
  sr, y = new_chunk[0], new_chunk[1]
73
  except TypeError:
74
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
75
+ return state, ""
76
 
 
77
  if y is None or len(y) == 0:
78
+ return state, ""
79
 
80
  y = y.astype(np.float32)
81
  max_abs_y = np.max(np.abs(y))
82
  if max_abs_y > 0:
83
  y = y / max_abs_y
84
 
85
+ if state.stream is not None and len(state.stream) > 0:
86
+ state.stream = np.concatenate([state.stream, y])
 
87
  else:
88
+ state.stream = y
89
 
90
+ result = pipe_asr({"array": state.stream, "sampling_rate": sr}, return_timestamps=False)
 
91
  full_text = result.get("text", "")
92
 
 
93
  threading.Thread(target=auto_reset_state).start()
94
+ return state, full_text
95
 
96
+ # Function to generate a response using the prompt and the context
97
+ def generate_response_with_prompt(context, question):
98
+ formatted_prompt = prompt.format(context=context, question=question)
99
+ llm = ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
100
+ response = llm(formatted_prompt)
101
+ return response.content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  # Function to generate audio with Eleven Labs TTS
104
  def generate_audio_elevenlabs(text):
105
  XI_API_KEY = os.environ['ELEVENLABS_API']
106
  VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
107
  tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
108
+ headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY}
109
+ data = {"text": text, "model_id": "eleven_multilingual_v2", "voice_settings": {"stability": 1.0}}
 
 
 
 
 
 
 
 
 
 
 
 
110
  response = requests.post(tts_url, headers=headers, json=data, stream=True)
111
  if response.ok:
112
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
113
  for chunk in response.iter_content(chunk_size=1024):
114
+ f.write(chunk)
115
+ return f.name
 
 
116
  else:
117
  print(f"Error generating audio: {response.text}")
118
  return None
119
 
120
+ # Define the function to retrieve information using Neo4j and the vector store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  def retriever(question: str):
122
+ structured_query = """
123
+ CALL db.index.fulltext.queryNodes('entity', $query, {limit: 2})
 
124
  YIELD node, score
125
  RETURN node.id AS entity, node.text AS context, score
126
  ORDER BY score DESC
 
129
  structured_data = graph.query(structured_query, {"query": generate_full_text_query(question)})
130
  structured_response = "\n".join([f"{record['entity']}: {record['context']}" for record in structured_data])
131
 
 
132
  unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
133
  unstructured_response = "\n".join(unstructured_data)
134
 
 
135
  combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
136
+ return generate_response_with_prompt(combined_context, question)
 
 
 
 
137
 
138
  # Function to handle the entire audio query and response process
139
+ def process_audio_query(state: AppState, audio_input):
140
+ state, transcription = transcribe_function(state, audio_input)
 
 
 
 
141
  response_text = retriever(transcription)
 
 
 
142
  audio_path = generate_audio_elevenlabs(response_text)
143
+ return audio_path, state
 
 
 
 
144
 
145
+ # Create Gradio interface for audio input and output
146
  with gr.Blocks() as interface:
147
+ audio_input = gr.Audio(sources="microphone", type="numpy", streaming=True, every=0.1)
148
  submit_button = gr.Button("Submit")
149
  audio_output = gr.Audio(type="filepath", autoplay=True)
150
+ state = gr.State(AppState())
151
 
152
+ submit_button.click(fn=process_audio_query, inputs=[state, audio_input], outputs=[audio_output, state])
153
 
154
  # Launch the Gradio app
155
  interface.launch()