Hammad712 commited on
Commit
9fa59c9
·
verified ·
1 Parent(s): f14a605

Create chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +168 -0
chatbot.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from datetime import datetime
3
+ from urllib.parse import quote_plus
4
+
5
+ from pymongo import MongoClient
6
+ from langchain.prompts import ChatPromptTemplate
7
+ from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
8
+ from langchain.chains import ConversationalRetrievalChain
9
+
10
+ from llm_provider import llm
11
+ from vectorstore_manager import get_user_retriever
12
+
13
+ # === Prompt Template ===
14
+ quiz_solving_prompt = '''
15
+ You are an assistant specialized in solving quizzes. Your goal is to provide accurate, concise, and contextually relevant answers.
16
+ Use the following retrieved context to answer the user's question.
17
+ If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
18
+
19
+ Guidelines:
20
+ 1. Extract key information from the context to form a coherent response.
21
+ 2. Maintain a clear and professional tone.
22
+ 3. If the question requires clarification, specify it politely.
23
+
24
+ Retrieved context:
25
+ {context}
26
+
27
+ User's question:
28
+ {question}
29
+
30
+ Your response:
31
+ '''
32
+
33
+ user_prompt = ChatPromptTemplate.from_messages([
34
+ ("system", quiz_solving_prompt),
35
+ ("human", "{question}")
36
+ ])
37
+
38
+ # === MongoDB Configuration ===
39
+ PASSWORD = quote_plus("momimaad@123")
40
+ MONGO_URI = f"mongodb+srv://hammad:{PASSWORD}@cluster0.2a9yu.mongodb.net/"
41
+ DB_NAME = "Education_chatbot"
42
+ HISTORY_COLLECTION = "chat_histories" # used by MongoDBChatMessageHistory
43
+ SESSIONS_COLLECTION = "chat_sessions" # to track chat metadata
44
+ CHAINS_COLLECTION = "user_chains" # to track per-user vectorstore paths
45
+
46
+ # Initialize MongoDB client and collections
47
+ client = MongoClient(MONGO_URI)
48
+ db = client[DB_NAME]
49
+ sessions_collection = db[SESSIONS_COLLECTION]
50
+ chains_collection = db[CHAINS_COLLECTION]
51
+
52
+ # === Core Functions ===
53
+
54
+ def create_new_chat(user_id: str) -> str:
55
+ """
56
+ Create a new chat session for the given user, persist metadata in MongoDB,
57
+ and ensure a vectorstore path is registered for that user.
58
+ Returns the new chat_id.
59
+ """
60
+ chat_id = f"{user_id}-{uuid.uuid4()}"
61
+ created_at = datetime.utcnow()
62
+
63
+ # Persist chat session metadata
64
+ sessions_collection.insert_one({
65
+ "chat_id": chat_id,
66
+ "user_id": user_id,
67
+ "created_at": created_at
68
+ })
69
+
70
+ # Initialize chat history storage in its own collection via LangChain helper
71
+ MongoDBChatMessageHistory(
72
+ session_id=chat_id,
73
+ connection_string=MONGO_URI,
74
+ database_name=DB_NAME,
75
+ collection_name=HISTORY_COLLECTION,
76
+ )
77
+
78
+ # If the user has no chain/vectorstore registered yet, register it
79
+ if chains_collection.count_documents({"user_id": user_id}, limit=1) == 0:
80
+ # This also creates the vectorstore on disk via vectorstore_manager.ingest_report
81
+ # you should call ingest_report first elsewhere before chat
82
+ chains_collection.insert_one({
83
+ "user_id": user_id,
84
+ "vectorstore_path": f"user_vectorstores/{user_id}_faiss"
85
+ })
86
+
87
+ return chat_id
88
+
89
+ def get_chain_for_user(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
90
+ """
91
+ Reconstructs (or creates) the user's ConversationalRetrievalChain
92
+ using their vectorstore and the chat-specific memory object.
93
+ """
94
+ # Load chat history memory
95
+ chat_history = MongoDBChatMessageHistory(
96
+ session_id=chat_id,
97
+ connection_string=MONGO_URI,
98
+ database_name=DB_NAME,
99
+ collection_name=HISTORY_COLLECTION,
100
+ )
101
+
102
+ # Look up vectorstore path
103
+ chain_doc = chains_collection.find_one({"user_id": user_id})
104
+ if not chain_doc:
105
+ raise ValueError(f"No vectorstore registered for user {user_id}")
106
+
107
+ # Initialize retriever from vectorstore
108
+ retriever = get_user_retriever(user_id)
109
+
110
+ # Create and return the chain
111
+ return ConversationalRetrievalChain.from_llm(
112
+ llm=llm,
113
+ retriever=retriever,
114
+ return_source_documents=True,
115
+ chain_type="stuff",
116
+ combine_docs_chain_kwargs={"prompt": user_prompt},
117
+ memory=chat_history,
118
+ verbose=False,
119
+ )
120
+
121
+ def summarize_messages(chat_history: MongoDBChatMessageHistory) -> bool:
122
+ """
123
+ If the chat history grows too long, summarize it to keep the memory concise.
124
+ Returns True if a summary was performed.
125
+ """
126
+ messages = chat_history.messages
127
+ if not messages:
128
+ return False
129
+
130
+ summarization_prompt = ChatPromptTemplate.from_messages([
131
+ ("system", "Summarize the following conversation into a concise message:"),
132
+ ("human", "{chat_history}")
133
+ ])
134
+ summarization_chain = summarization_prompt | llm
135
+ summary = summarization_chain.invoke({"chat_history": messages})
136
+
137
+ chat_history.clear()
138
+ chat_history.add_ai_message(summary.content)
139
+ return True
140
+
141
+ def stream_chat_response(user_id: str, chat_id: str, query: str):
142
+ """
143
+ Given a user_id, chat_id, and a query string, streams back the AI response
144
+ while persisting both user and AI messages to MongoDB.
145
+ """
146
+ # Ensure the chain and memory are set up
147
+ chain = get_chain_for_user(user_id, chat_id)
148
+ chat_history = chain.memory # the MongoDBChatMessageHistory instance
149
+
150
+ # Optionally summarize if too many messages
151
+ summarize_messages(chat_history)
152
+
153
+ # Add the user message to history
154
+ chat_history.add_user_message(query)
155
+
156
+ # Stream the response
157
+ response_accum = ""
158
+ for chunk in chain.stream({"question": query, "chat_history": chat_history.messages}):
159
+ if "answer" in chunk:
160
+ print(chunk["answer"], end="", flush=True)
161
+ response_accum += chunk["answer"]
162
+ else:
163
+ # Unexpected chunk format
164
+ print(f"[Unexpected chunk]: {chunk}")
165
+
166
+ # Persist the AI's final message
167
+ if response_accum:
168
+ chat_history.add_ai_message(response_accum)