Pijush2023 commited on
Commit
14b2825
·
verified ·
1 Parent(s): 4b4e9f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -167
app.py CHANGED
@@ -4,9 +4,6 @@ import logging
4
  from langchain_core.prompts import ChatPromptTemplate
5
  from langchain_core.output_parsers import StrOutputParser
6
  from langchain_openai import ChatOpenAI
7
- from langchain_community.graphs import Neo4jGraph
8
- from typing import List, Tuple
9
- from pydantic import BaseModel, Field
10
  from langchain_core.messages import AIMessage, HumanMessage
11
  from langchain_core.runnables import (
12
  RunnableBranch,
@@ -26,168 +23,59 @@ import torchaudio
26
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
27
  import numpy as np
28
  import threading
29
- from langchain_community.vectorstores import Neo4jVector
30
  from langchain_openai import OpenAIEmbeddings
 
 
31
 
 
 
 
32
 
33
- #code for history
34
- conversational_memory = ConversationBufferWindowMemory(
35
- memory_key='chat_history',
36
- k=10,
37
- return_messages=True
38
- )
39
 
40
- # Setup Neo4j
41
- graph = Neo4jGraph(
42
- url="neo4j+s://c62d0d35.databases.neo4j.io",
43
- username="neo4j",
44
- password="_x8f-_aAQvs2NB0x6s0ZHSh3W_y-HrENDbgStvsUCM0"
45
- )
46
 
 
 
 
47
 
48
- # directly show the graph resulting from the given Cypher query
49
- default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"
50
 
51
 
52
- vector_index = Neo4jVector.from_existing_graph(
53
- OpenAIEmbeddings(openai_api_key="sk-PV6RlpmTifrWo_olwL1IR69J9v2e5AKe-Xfxs_Yf9VT3BlbkFJm-UJQx5RNyGpok9MM_DYSTmayn7y-lKLSBqXecEoYA"),
54
- graph=graph,
55
- search_type="hybrid",
56
- node_label="Document",
57
- text_node_properties=["text"],
58
- embedding_node_property="embedding",
59
- )
60
 
61
- # Define entity extraction and retrieval functions
62
- class Entities(BaseModel):
63
- names: List[str] = Field(
64
- ..., description="All the person, organization, or business entities that appear in the text"
65
- )
66
 
67
- prompt = ChatPromptTemplate.from_messages([
68
- ("system", "You are extracting organization and person entities from the text."),
69
- ("human", "Use the given format to extract information from the following input: {question}"),
70
- ])
71
-
72
- chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
73
- entity_chain = prompt | chat_model.with_structured_output(Entities)
74
-
75
- def remove_lucene_chars(input: str) -> str:
76
- return input.translate(str.maketrans({
77
- "\\": r"\\", "+": r"\+", "-": r"\-", "&": r"\&", "|": r"\|", "!": r"\!",
78
- "(": r"\(", ")": r"\)", "{": r"\{", "}": r"\}", "[": r"\[", "]": r"\]",
79
- "^": r"\^", "~": r"\~", "*": r"\*", "?": r"\?", ":": r"\:", '"': r'\"',
80
- ";": r"\;", " ": r"\ "
81
- }))
82
-
83
- def generate_full_text_query(input: str) -> str:
84
- full_text_query = ""
85
- words = [el for el in remove_lucene_chars(input).split() if el]
86
- for word in words[:-1]:
87
- full_text_query += f" {word}~2 AND"
88
- full_text_query += f" {words[-1]}~2"
89
- return full_text_query.strip()
90
-
91
- # Setup logging to a file to capture debug information
92
- logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
93
-
94
- def structured_retriever(question: str) -> str:
95
- result = ""
96
- entities = entity_chain.invoke({"question": question})
97
- for entity in entities.names:
98
- response = graph.query(
99
- """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
100
- YIELD node,score
101
- CALL {
102
- WITH node
103
- MATCH (node)-[r:!MENTIONS]->(neighbor)
104
- RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
105
- UNION ALL
106
- WITH node
107
- MATCH (node)<-[r:!MENTIONS]-(neighbor)
108
- RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
109
- }
110
- RETURN output LIMIT 50
111
- """,
112
- {"query": generate_full_text_query(entity)},
113
- )
114
- result += "\n".join([el['output'] for el in response])
115
- return result
116
-
117
- def retriever_neo4j(question: str):
118
- print(f"Search query: {question}")
119
- structured_data = structured_retriever(question)
120
- unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
121
- final_data = f"""Structured data:
122
- {structured_data}
123
- Unstructured data:
124
- {"#Document ". join(unstructured_data)}
125
- """
126
- return final_data
127
-
128
- # Setup for condensing the follow-up questions
129
- _template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question,
130
- in its original language.
131
- Chat History:
132
- {chat_history}
133
- Follow Up Input: {question}
134
- Standalone question:"""
135
-
136
- CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
137
-
138
- def _format_chat_history(chat_history: list[tuple[str, str]]) -> list:
139
- buffer = []
140
- for human, ai in chat_history:
141
- buffer.append(HumanMessage(content=human))
142
- buffer.append(AIMessage(content=ai))
143
- return buffer
144
-
145
- _search_query = RunnableBranch(
146
- # If input includes chat_history, we condense it with the follow-up question
147
- (
148
- RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
149
- run_name="HasChatHistoryCheck"
150
- ), # Condense follow-up question and chat into a standalone_question
151
- RunnablePassthrough.assign(
152
- chat_history=lambda x: _format_chat_history(x["chat_history"])
153
- )
154
- | CONDENSE_QUESTION_PROMPT
155
- | ChatOpenAI(temperature=0,openai_api_key="sk-PV6RlpmTifrWo_olwL1IR69J9v2e5AKe-Xfxs_Yf9VT3BlbkFJm-UJQx5RNyGpok9MM_DYSTmayn7y-lKLSBqXecEoYA")
156
- | StrOutputParser(),
157
- ),
158
- # Else, we have no chat history, so just pass through the question
159
- RunnableLambda(lambda x : x["question"]),
160
- )
161
 
 
162
 
163
- template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
164
- Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational way without any Greet.
165
- {context}
166
- Question: {question}
167
- Answer:"""
 
168
 
 
 
 
 
169
 
170
- prompt = ChatPromptTemplate.from_template(template)
171
 
172
- # Define the chain for Neo4j-based retrieval and response generation
173
- chain_neo4j = (
174
- RunnableParallel(
175
- {
176
- "context": _search_query | retriever_neo4j,
177
- "question": RunnablePassthrough(),
178
- }
179
  )
180
- | prompt
181
- | chat_model
182
- | StrOutputParser()
183
- )
184
 
185
- # Define the function to get the response
186
- def get_response(question):
187
- try:
188
- return chain_neo4j.invoke({"question": question})
189
- except Exception as e:
190
- return f"Error: {str(e)}"
191
 
192
  # Define the function to clear input and output
193
  def clear_fields():
@@ -227,24 +115,31 @@ def generate_audio_elevenlabs(text):
227
 
228
 
229
 
 
 
 
230
  def handle_mode_selection(mode, chat_history, question):
231
  if mode == "Normal Chatbot":
232
- # Append the user's question to chat history first
233
- chat_history.append((question, "")) # Placeholder for the bot's response
234
-
235
- # Stream the response and update chat history with each chunk
236
- for response_chunk in chat_with_bot(chat_history):
237
- chat_history[-1] = (question, response_chunk[-1][1]) # Update last entry with streamed response
238
- yield chat_history, "", None # Stream each chunk to display in the chatbot
239
- yield chat_history, "", None # Final yield to complete the response
 
 
 
240
 
241
  elif mode == "Voice to Voice Conversation":
242
- # Voice to Voice mode: Stream the response text and then convert it to audio
243
- response_text = get_response(question) # Retrieve response text
244
- audio_path = generate_audio_elevenlabs(response_text) # Convert response to audio
245
  yield [], "", audio_path # Only output the audio response without updating chatbot history
246
 
247
 
 
 
248
  # Function to add a user's message to the chat history and clear the input box
249
  def add_message(history, message):
250
  if message.strip():
@@ -279,14 +174,11 @@ def generate_audio_from_last_response(history):
279
 
280
  # Define example prompts
281
  examples = [
282
- ["What are some popular events in Birmingham?"],
283
- ["Who are the top players of the Crimson Tide?"],
284
- ["Where can I find a hamburger?"],
285
- ["What are some popular tourist attractions in Birmingham?"],
286
- ["What are some good clubs in Birmingham?"],
287
- ["Is there a farmer's market or craft fair in Birmingham, Alabama?"],
288
- ["Are there any special holiday events or parades in Birmingham, Alabama, during December?"],
289
- ["What are the best places to enjoy live music in Birmingham, Alabama?"]
290
 
291
  ]
292
 
 
4
  from langchain_core.prompts import ChatPromptTemplate
5
  from langchain_core.output_parsers import StrOutputParser
6
  from langchain_openai import ChatOpenAI
 
 
 
7
  from langchain_core.messages import AIMessage, HumanMessage
8
  from langchain_core.runnables import (
9
  RunnableBranch,
 
23
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
24
  import numpy as np
25
  import threading
 
26
  from langchain_openai import OpenAIEmbeddings
27
+ from langchain_pinecone import PineconeVectorStore
28
+ from langchain.chains import RetrievalQA
29
 
30
+ embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
31
+ def initialize_gpt_model():
32
+ return ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'], temperature=0, model='gpt-4o')
33
 
34
+ gpt_model = initialize_gpt_model()
 
 
 
 
 
35
 
 
 
 
 
 
 
36
 
37
+ gpt_embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
38
+ gpt_vectorstore = PineconeVectorStore(index_name="radardata10312024", embedding=gpt_embeddings)
39
+ gpt_retriever = gpt_vectorstore.as_retriever(search_kwargs={'k': 5})
40
 
 
 
41
 
42
 
43
+ # Pinecone setup
44
+ from pinecone import Pinecone
45
+ pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
 
 
 
 
 
46
 
 
 
 
 
 
47
 
48
+ index_name ="radardata10312024"
49
+ vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
50
+ retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ chat_model = ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'], temperature=0, model='gpt-4o')
53
 
54
+ #code for history
55
+ conversational_memory = ConversationBufferWindowMemory(
56
+ memory_key='chat_history',
57
+ k=10,
58
+ return_messages=True
59
+ )
60
 
61
+ template =f"""Hello there! As your friendly and knowledgeable guide here in Birmingham, Alabama.Give the short ,precise,crisp and straight-foreward response of maximum 2 sentences and dont greet.
62
+ {{context}}
63
+ Question: {{question}}
64
+ Helpful Answer:"""
65
 
66
+ QA_CHAIN_PROMPT= PromptTemplate(input_variables=["context", "question"], template=template)
67
 
68
+ def build_qa_chain(prompt_template):
69
+ qa_chain = RetrievalQA.from_chain_type(
70
+ llm=chat_model,
71
+ chain_type="stuff",
72
+ retriever=retriever,
73
+ chain_type_kwargs={"prompt": prompt_template}
 
74
  )
75
+ return qa_chain # Return the qa_chain object
 
 
 
76
 
77
+ # Instantiate the QA Chain using the defined prompt template
78
+ qa_chain = build_qa_chain(QA_CHAIN_PROMPT)
 
 
 
 
79
 
80
  # Define the function to clear input and output
81
  def clear_fields():
 
115
 
116
 
117
 
118
+ import time
119
+
120
+ # Main function to handle mode selection with character-by-character streaming
121
  def handle_mode_selection(mode, chat_history, question):
122
  if mode == "Normal Chatbot":
123
+ chat_history.append((question, "")) # Append user question with an empty response initially
124
+
125
+ # Get response from Pinecone using the qa_chain
126
+ response = qa_chain({"query": question, "context": ""})
127
+ response_text = response['result']
128
+
129
+ # Stream each character in the response text to the chat history
130
+ for i, char in enumerate(response_text):
131
+ chat_history[-1] = (question, chat_history[-1][1] + char) # Update the last message
132
+ yield chat_history, "", None # Yield updated chat history
133
+ time.sleep(0.05) # Small delay to simulate streaming
134
 
135
  elif mode == "Voice to Voice Conversation":
136
+ response_text = qa_chain({"query": question, "context": ""})['result']
137
+ audio_path = generate_audio_elevenlabs(response_text)
 
138
  yield [], "", audio_path # Only output the audio response without updating chatbot history
139
 
140
 
141
+
142
+
143
  # Function to add a user's message to the chat history and clear the input box
144
  def add_message(history, message):
145
  if message.strip():
 
174
 
175
  # Define example prompts
176
  examples = [
177
+ ["what are the tree care services at alabama?"],
178
+ ["where from i studies undergrade in marketing from alabama?"],
179
+ ["what from i get tourism recreation center?"],
180
+ ["where from i will get a retail loan and from which institute?"],
181
+ ["where i will look for good dentist at alabama?"]
 
 
 
182
 
183
  ]
184