Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,41 +1,19 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import logging
|
4 |
-
from langchain_core.prompts import ChatPromptTemplate
|
5 |
-
from langchain_core.output_parsers import StrOutputParser
|
6 |
-
from langchain_openai import ChatOpenAI
|
7 |
-
from langchain_community.graphs import Neo4jGraph
|
8 |
-
from typing import List, Tuple
|
9 |
-
from pydantic import BaseModel, Field
|
10 |
-
from langchain_core.messages import AIMessage, HumanMessage
|
11 |
-
from langchain_core.runnables import (
|
12 |
-
RunnableBranch,
|
13 |
-
RunnableLambda,
|
14 |
-
RunnablePassthrough,
|
15 |
-
RunnableParallel,
|
16 |
-
)
|
17 |
-
from langchain_core.prompts.prompt import PromptTemplate
|
18 |
import requests
|
19 |
import tempfile
|
20 |
-
from langchain.memory import ConversationBufferWindowMemory
|
21 |
-
import time
|
22 |
-
import logging
|
23 |
-
from langchain.chains import ConversationChain
|
24 |
import torch
|
25 |
-
import torchaudio
|
26 |
-
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
27 |
import numpy as np
|
28 |
-
import
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
return_messages=True
|
36 |
-
)
|
37 |
|
38 |
-
# Setup
|
39 |
graph = Neo4jGraph(
|
40 |
url="neo4j+s://6457770f.databases.neo4j.io",
|
41 |
username="neo4j",
|
@@ -72,9 +50,6 @@ def generate_full_text_query(input: str) -> str:
|
|
72 |
full_text_query += f" {words[-1]}~2"
|
73 |
return full_text_query.strip()
|
74 |
|
75 |
-
# Setup logging to a file to capture debug information
|
76 |
-
logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
77 |
-
|
78 |
def structured_retriever(question: str) -> str:
|
79 |
result = ""
|
80 |
entities = entity_chain.invoke({"question": question})
|
@@ -98,77 +73,6 @@ def structured_retriever(question: str) -> str:
|
|
98 |
result += "\n".join([el['output'] for el in response])
|
99 |
return result
|
100 |
|
101 |
-
def retriever_neo4j(question: str):
|
102 |
-
structured_data = structured_retriever(question)
|
103 |
-
logging.debug(f"Structured data: {structured_data}")
|
104 |
-
return structured_data
|
105 |
-
|
106 |
-
# Setup for condensing the follow-up questions
|
107 |
-
_template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question,
|
108 |
-
in its original language.
|
109 |
-
Chat History:
|
110 |
-
{chat_history}
|
111 |
-
Follow Up Input: {question}
|
112 |
-
Standalone question:"""
|
113 |
-
|
114 |
-
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
115 |
-
|
116 |
-
def _format_chat_history(chat_history: list[tuple[str, str]]) -> list:
|
117 |
-
buffer = []
|
118 |
-
for human, ai in chat_history:
|
119 |
-
buffer.append(HumanMessage(content=human))
|
120 |
-
buffer.append(AIMessage(content=ai))
|
121 |
-
return buffer
|
122 |
-
|
123 |
-
_search_query = RunnableBranch(
|
124 |
-
(
|
125 |
-
RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
|
126 |
-
run_name="HasChatHistoryCheck"
|
127 |
-
),
|
128 |
-
RunnablePassthrough.assign(
|
129 |
-
chat_history=lambda x: _format_chat_history(x["chat_history"])
|
130 |
-
)
|
131 |
-
| CONDENSE_QUESTION_PROMPT
|
132 |
-
| ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
|
133 |
-
| StrOutputParser(),
|
134 |
-
),
|
135 |
-
RunnableLambda(lambda x: x["question"]),
|
136 |
-
)
|
137 |
-
|
138 |
-
|
139 |
-
template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
|
140 |
-
Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational way without any Greet.
|
141 |
-
{context}
|
142 |
-
Question: {question}
|
143 |
-
Answer:"""
|
144 |
-
|
145 |
-
|
146 |
-
qa_prompt = ChatPromptTemplate.from_template(template)
|
147 |
-
|
148 |
-
# Define the chain for Neo4j-based retrieval and response generation
|
149 |
-
chain_neo4j = (
|
150 |
-
RunnableParallel(
|
151 |
-
{
|
152 |
-
"context": _search_query | retriever_neo4j,
|
153 |
-
"question": RunnablePassthrough(),
|
154 |
-
}
|
155 |
-
)
|
156 |
-
| qa_prompt
|
157 |
-
| chat_model
|
158 |
-
| StrOutputParser()
|
159 |
-
)
|
160 |
-
|
161 |
-
# Define the function to get the response
|
162 |
-
def get_response(question):
|
163 |
-
try:
|
164 |
-
return chain_neo4j.invoke({"question": question})
|
165 |
-
except Exception as e:
|
166 |
-
return f"Error: {str(e)}"
|
167 |
-
|
168 |
-
# Define the function to clear input and output
|
169 |
-
def clear_fields():
|
170 |
-
return [],"",None
|
171 |
-
|
172 |
# Function to generate audio with Eleven Labs TTS
|
173 |
def generate_audio_elevenlabs(text):
|
174 |
XI_API_KEY = os.environ['ELEVENLABS_API']
|
@@ -195,79 +99,11 @@ def generate_audio_elevenlabs(text):
|
|
195 |
if chunk:
|
196 |
f.write(chunk)
|
197 |
audio_path = f.name
|
198 |
-
logging.debug(f"Audio saved to {audio_path}")
|
199 |
return audio_path # Return audio path for automatic playback
|
200 |
else:
|
201 |
logging.error(f"Error generating audio: {response.text}")
|
202 |
return None
|
203 |
|
204 |
-
def handle_mode_selection(mode, chat_history, question):
|
205 |
-
if mode == "Normal Chatbot":
|
206 |
-
# Normal chatbot mode: Show the response in the chatbot output
|
207 |
-
response = get_response(question)
|
208 |
-
chat_history.append((question, response))
|
209 |
-
return chat_history, "", None
|
210 |
-
|
211 |
-
elif mode == "Voice to Voice Conversation":
|
212 |
-
# Voice to Voice mode: Generate the response using Eleven Labs and return audio without showing text
|
213 |
-
response = get_response(question) # Get the response text (can be omitted if not needed for debugging)
|
214 |
-
audio_path = generate_audio_elevenlabs(response) # Convert the response to audio
|
215 |
-
#chat_history.append((question, "[Voice Response]")) # Log that a voice response was generated (optional)
|
216 |
-
chat_history.append(("[Voice Input]", "[Voice Response]"))
|
217 |
-
return chat_history, "", audio_path
|
218 |
-
|
219 |
-
|
220 |
-
# Function to add a user's message to the chat history and clear the input box
|
221 |
-
def add_message(history, message):
|
222 |
-
if message.strip():
|
223 |
-
history.append((message, "")) # Add the user's message to the chat history only if it's not empty
|
224 |
-
return history, "" # Clear the input box
|
225 |
-
|
226 |
-
# Define function to generate a streaming response
|
227 |
-
def chat_with_bot(messages):
|
228 |
-
user_message = messages[-1][0] # Get the last user message (input)
|
229 |
-
messages[-1] = (user_message, "") # Prepare the placeholder for the bot's response
|
230 |
-
|
231 |
-
response = get_response(user_message)
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
# Simulate streaming response by iterating over each character in the response
|
236 |
-
for character in response:
|
237 |
-
messages[-1] = (user_message, messages[-1][1] + character)
|
238 |
-
yield messages # Stream each character
|
239 |
-
time.sleep(0.05) # Adjust delay as needed for real-time effect
|
240 |
-
|
241 |
-
yield messages # Final yield to ensure the full response is displayed
|
242 |
-
|
243 |
-
|
244 |
-
# Function to generate audio with Eleven Labs TTS from the last bot response
|
245 |
-
def generate_audio_from_last_response(history):
|
246 |
-
# Get the most recent bot response from the chat history
|
247 |
-
if history and len(history) > 0:
|
248 |
-
recent_response = history[-1][1] # The second item in the tuple is the bot response text
|
249 |
-
if recent_response:
|
250 |
-
return generate_audio_elevenlabs(recent_response)
|
251 |
-
return None
|
252 |
-
|
253 |
-
# Define example prompts
|
254 |
-
examples = [
|
255 |
-
["What are some popular events in Birmingham?"],
|
256 |
-
["Who are the top players of the Crimson Tide?"],
|
257 |
-
["Where can I find a hamburger?"],
|
258 |
-
["What are some popular tourist attractions in Birmingham?"],
|
259 |
-
["What are some good clubs in Birmingham?"],
|
260 |
-
["Is there a farmer's market or craft fair in Birmingham, Alabama?"],
|
261 |
-
["Are there any special holiday events or parades in Birmingham, Alabama, during December?"],
|
262 |
-
["What are the best places to enjoy live music in Birmingham, Alabama?"]
|
263 |
-
|
264 |
-
]
|
265 |
-
|
266 |
-
# Function to insert the prompt into the textbox when clicked
|
267 |
-
def insert_prompt(current_text, prompt):
|
268 |
-
return prompt[0] if prompt else current_text
|
269 |
-
|
270 |
-
|
271 |
# Define the ASR model with Whisper
|
272 |
model_id = 'openai/whisper-large-v3'
|
273 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
@@ -288,137 +124,72 @@ pipe_asr = pipeline(
|
|
288 |
return_timestamps=True
|
289 |
)
|
290 |
|
291 |
-
#
|
292 |
-
def
|
293 |
-
time.sleep(5)
|
294 |
-
return None, "" # Reset the state and clear input text
|
295 |
-
|
296 |
-
|
297 |
-
def transcribe_function(stream, new_chunk):
|
298 |
try:
|
299 |
sr, y = new_chunk[0], new_chunk[1]
|
300 |
except TypeError:
|
301 |
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
302 |
return stream, "", None
|
303 |
|
304 |
-
# Ensure y is not empty and is at least 1-dimensional
|
305 |
-
if y is None or len(y) == 0:
|
306 |
-
return stream, "", None
|
307 |
-
|
308 |
y = y.astype(np.float32)
|
309 |
max_abs_y = np.max(np.abs(y))
|
310 |
if max_abs_y > 0:
|
311 |
y = y / max_abs_y
|
312 |
|
313 |
-
# Ensure stream is also at least 1-dimensional before concatenation
|
314 |
if stream is not None and len(stream) > 0:
|
315 |
stream = np.concatenate([stream, y])
|
316 |
else:
|
317 |
stream = y
|
318 |
|
319 |
-
#
|
320 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
321 |
-
|
322 |
-
|
323 |
-
# Start a thread to reset the state after 10 seconds
|
324 |
-
threading.Thread(target=auto_reset_state).start()
|
325 |
-
|
326 |
-
return stream, full_text, full_text
|
327 |
|
|
|
|
|
|
|
|
|
|
|
328 |
|
|
|
329 |
|
330 |
-
#
|
331 |
def clear_transcription_state():
|
332 |
-
return None, ""
|
333 |
-
|
334 |
-
|
335 |
|
|
|
336 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
337 |
-
chatbot = gr.Chatbot([], elem_id="RADAR", bubble_full_width=False)
|
338 |
-
with gr.Row():
|
339 |
-
with gr.Column():
|
340 |
-
mode_selection = gr.Radio(
|
341 |
-
choices=["Normal Chatbot", "Voice to Voice Conversation"],
|
342 |
-
label="Mode Selection",
|
343 |
-
value="Normal Chatbot"
|
344 |
-
)
|
345 |
-
with gr.Row():
|
346 |
-
with gr.Column():
|
347 |
-
question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
|
348 |
-
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1, label="Speak to Ask")
|
349 |
-
submit_voice_btn = gr.Button("Submit Voice")
|
350 |
-
|
351 |
-
with gr.Column():
|
352 |
-
audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
|
353 |
-
|
354 |
-
with gr.Row():
|
355 |
-
with gr.Column():
|
356 |
-
get_response_btn = gr.Button("Get Response")
|
357 |
-
with gr.Column():
|
358 |
-
clear_state_btn = gr.Button("Clear State")
|
359 |
-
with gr.Column():
|
360 |
-
generate_audio_btn = gr.Button("Generate Audio")
|
361 |
-
with gr.Column():
|
362 |
-
clean_btn = gr.Button("Clean")
|
363 |
-
|
364 |
with gr.Row():
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
question_input.submit(
|
379 |
-
fn=handle_mode_selection,
|
380 |
-
inputs=[mode_selection, chatbot, question_input],
|
381 |
-
outputs=[chatbot, question_input, audio_output],
|
382 |
-
api_name="api_add_message_on_enter"
|
383 |
-
).then(fn=chat_with_bot, inputs=[chatbot], outputs=chatbot,api_name="api_ask_retriever_on_enter")
|
384 |
-
|
385 |
|
386 |
-
|
387 |
-
fn=handle_mode_selection,
|
388 |
-
inputs=[mode_selection, chatbot, question_input],
|
389 |
-
outputs=[chatbot, question_input, audio_output],
|
390 |
-
api_name="api_voice_to_voice_translation"
|
391 |
-
)
|
392 |
-
|
393 |
-
# Speech-to-Text functionality
|
394 |
state = gr.State()
|
395 |
audio_input.stream(
|
396 |
-
|
397 |
inputs=[state, audio_input],
|
398 |
-
outputs=[state,
|
399 |
-
api_name="
|
400 |
-
)
|
401 |
-
|
402 |
-
generate_audio_btn.click(
|
403 |
-
fn=generate_audio_from_last_response,
|
404 |
-
inputs=chatbot,
|
405 |
-
outputs=audio_output,
|
406 |
-
api_name="api_generate_text_to_audio"
|
407 |
-
)
|
408 |
-
|
409 |
-
clean_btn.click(
|
410 |
-
fn=clear_fields,
|
411 |
-
inputs=[],
|
412 |
-
outputs=[chatbot, question_input, audio_output],
|
413 |
-
api_name="api_clear_textbox"
|
414 |
)
|
415 |
|
416 |
# Clear state interaction
|
417 |
-
|
418 |
fn=clear_transcription_state,
|
419 |
-
outputs=[
|
420 |
-
api_name="
|
421 |
)
|
422 |
|
423 |
# Launch the Gradio interface
|
424 |
-
demo.launch(show_error=True,share=True)
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import requests
|
5 |
import tempfile
|
|
|
|
|
|
|
|
|
6 |
import torch
|
|
|
|
|
7 |
import numpy as np
|
8 |
+
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
9 |
+
from langchain_community.graphs import Neo4jGraph
|
10 |
+
from langchain_core.prompts import ChatPromptTemplate
|
11 |
+
from langchain_openai import ChatOpenAI
|
12 |
+
from pydantic import BaseModel, Field
|
13 |
+
from typing import List
|
14 |
+
import time
|
|
|
|
|
15 |
|
16 |
+
# Neo4j Setup
|
17 |
graph = Neo4jGraph(
|
18 |
url="neo4j+s://6457770f.databases.neo4j.io",
|
19 |
username="neo4j",
|
|
|
50 |
full_text_query += f" {words[-1]}~2"
|
51 |
return full_text_query.strip()
|
52 |
|
|
|
|
|
|
|
53 |
def structured_retriever(question: str) -> str:
|
54 |
result = ""
|
55 |
entities = entity_chain.invoke({"question": question})
|
|
|
73 |
result += "\n".join([el['output'] for el in response])
|
74 |
return result
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
# Function to generate audio with Eleven Labs TTS
|
77 |
def generate_audio_elevenlabs(text):
|
78 |
XI_API_KEY = os.environ['ELEVENLABS_API']
|
|
|
99 |
if chunk:
|
100 |
f.write(chunk)
|
101 |
audio_path = f.name
|
|
|
102 |
return audio_path # Return audio path for automatic playback
|
103 |
else:
|
104 |
logging.error(f"Error generating audio: {response.text}")
|
105 |
return None
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
# Define the ASR model with Whisper
|
108 |
model_id = 'openai/whisper-large-v3'
|
109 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
124 |
return_timestamps=True
|
125 |
)
|
126 |
|
127 |
+
# Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
|
128 |
+
def transcribe_and_respond(stream, new_chunk):
|
|
|
|
|
|
|
|
|
|
|
129 |
try:
|
130 |
sr, y = new_chunk[0], new_chunk[1]
|
131 |
except TypeError:
|
132 |
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
133 |
return stream, "", None
|
134 |
|
|
|
|
|
|
|
|
|
135 |
y = y.astype(np.float32)
|
136 |
max_abs_y = np.max(np.abs(y))
|
137 |
if max_abs_y > 0:
|
138 |
y = y / max_abs_y
|
139 |
|
|
|
140 |
if stream is not None and len(stream) > 0:
|
141 |
stream = np.concatenate([stream, y])
|
142 |
else:
|
143 |
stream = y
|
144 |
|
145 |
+
# Transcribe the audio using Whisper
|
146 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
147 |
+
question = result.get("text", "")
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
# Retrieve information from Neo4j
|
150 |
+
response_text = structured_retriever(question) if question else "I didn't understand the question."
|
151 |
+
|
152 |
+
# Convert the response to audio using Eleven Labs TTS
|
153 |
+
audio_path = generate_audio_elevenlabs(response_text) if response_text else None
|
154 |
|
155 |
+
return stream, question, audio_path
|
156 |
|
157 |
+
# Function to clear the transcription state
|
158 |
def clear_transcription_state():
|
159 |
+
return None, "", None
|
|
|
|
|
160 |
|
161 |
+
# Define the Gradio interface with only audio input and output
|
162 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
with gr.Row():
|
164 |
+
audio_input = gr.Audio(
|
165 |
+
sources=["microphone"],
|
166 |
+
streaming=True,
|
167 |
+
type='numpy',
|
168 |
+
every=0.1,
|
169 |
+
label="Speak to Ask"
|
170 |
+
)
|
171 |
+
audio_output = gr.Audio(
|
172 |
+
label="Audio Response",
|
173 |
+
type="filepath",
|
174 |
+
autoplay=True,
|
175 |
+
interactive=False
|
176 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
+
# Speech-to-Text to TTS functionality with Neo4j retrieval
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
state = gr.State()
|
180 |
audio_input.stream(
|
181 |
+
transcribe_and_respond,
|
182 |
inputs=[state, audio_input],
|
183 |
+
outputs=[state, audio_output],
|
184 |
+
api_name="api_voice_to_neo4j_response"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
)
|
186 |
|
187 |
# Clear state interaction
|
188 |
+
gr.Button("Clear State").click(
|
189 |
fn=clear_transcription_state,
|
190 |
+
outputs=[state, audio_output],
|
191 |
+
api_name="api_clean_state"
|
192 |
)
|
193 |
|
194 |
# Launch the Gradio interface
|
195 |
+
demo.launch(show_error=True, share=True)
|