Create chatbot.py
Browse files- chatbot.py +168 -0
chatbot.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
from datetime import datetime
|
3 |
+
from urllib.parse import quote_plus
|
4 |
+
|
5 |
+
from pymongo import MongoClient
|
6 |
+
from langchain.prompts import ChatPromptTemplate
|
7 |
+
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
|
8 |
+
from langchain.chains import ConversationalRetrievalChain
|
9 |
+
|
10 |
+
from llm_provider import llm
|
11 |
+
from vectorstore_manager import get_user_retriever
|
12 |
+
|
13 |
+
# === Prompt Template ===
|
14 |
+
quiz_solving_prompt = '''
|
15 |
+
You are an assistant specialized in solving quizzes. Your goal is to provide accurate, concise, and contextually relevant answers.
|
16 |
+
Use the following retrieved context to answer the user's question.
|
17 |
+
If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
|
18 |
+
|
19 |
+
Guidelines:
|
20 |
+
1. Extract key information from the context to form a coherent response.
|
21 |
+
2. Maintain a clear and professional tone.
|
22 |
+
3. If the question requires clarification, specify it politely.
|
23 |
+
|
24 |
+
Retrieved context:
|
25 |
+
{context}
|
26 |
+
|
27 |
+
User's question:
|
28 |
+
{question}
|
29 |
+
|
30 |
+
Your response:
|
31 |
+
'''
|
32 |
+
|
33 |
+
user_prompt = ChatPromptTemplate.from_messages([
|
34 |
+
("system", quiz_solving_prompt),
|
35 |
+
("human", "{question}")
|
36 |
+
])
|
37 |
+
|
38 |
+
# === MongoDB Configuration ===
|
39 |
+
PASSWORD = quote_plus("momimaad@123")
|
40 |
+
MONGO_URI = f"mongodb+srv://hammad:{PASSWORD}@cluster0.2a9yu.mongodb.net/"
|
41 |
+
DB_NAME = "Education_chatbot"
|
42 |
+
HISTORY_COLLECTION = "chat_histories" # used by MongoDBChatMessageHistory
|
43 |
+
SESSIONS_COLLECTION = "chat_sessions" # to track chat metadata
|
44 |
+
CHAINS_COLLECTION = "user_chains" # to track per-user vectorstore paths
|
45 |
+
|
46 |
+
# Initialize MongoDB client and collections
|
47 |
+
client = MongoClient(MONGO_URI)
|
48 |
+
db = client[DB_NAME]
|
49 |
+
sessions_collection = db[SESSIONS_COLLECTION]
|
50 |
+
chains_collection = db[CHAINS_COLLECTION]
|
51 |
+
|
52 |
+
# === Core Functions ===
|
53 |
+
|
54 |
+
def create_new_chat(user_id: str) -> str:
|
55 |
+
"""
|
56 |
+
Create a new chat session for the given user, persist metadata in MongoDB,
|
57 |
+
and ensure a vectorstore path is registered for that user.
|
58 |
+
Returns the new chat_id.
|
59 |
+
"""
|
60 |
+
chat_id = f"{user_id}-{uuid.uuid4()}"
|
61 |
+
created_at = datetime.utcnow()
|
62 |
+
|
63 |
+
# Persist chat session metadata
|
64 |
+
sessions_collection.insert_one({
|
65 |
+
"chat_id": chat_id,
|
66 |
+
"user_id": user_id,
|
67 |
+
"created_at": created_at
|
68 |
+
})
|
69 |
+
|
70 |
+
# Initialize chat history storage in its own collection via LangChain helper
|
71 |
+
MongoDBChatMessageHistory(
|
72 |
+
session_id=chat_id,
|
73 |
+
connection_string=MONGO_URI,
|
74 |
+
database_name=DB_NAME,
|
75 |
+
collection_name=HISTORY_COLLECTION,
|
76 |
+
)
|
77 |
+
|
78 |
+
# If the user has no chain/vectorstore registered yet, register it
|
79 |
+
if chains_collection.count_documents({"user_id": user_id}, limit=1) == 0:
|
80 |
+
# This also creates the vectorstore on disk via vectorstore_manager.ingest_report
|
81 |
+
# you should call ingest_report first elsewhere before chat
|
82 |
+
chains_collection.insert_one({
|
83 |
+
"user_id": user_id,
|
84 |
+
"vectorstore_path": f"user_vectorstores/{user_id}_faiss"
|
85 |
+
})
|
86 |
+
|
87 |
+
return chat_id
|
88 |
+
|
89 |
+
def get_chain_for_user(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
|
90 |
+
"""
|
91 |
+
Reconstructs (or creates) the user's ConversationalRetrievalChain
|
92 |
+
using their vectorstore and the chat-specific memory object.
|
93 |
+
"""
|
94 |
+
# Load chat history memory
|
95 |
+
chat_history = MongoDBChatMessageHistory(
|
96 |
+
session_id=chat_id,
|
97 |
+
connection_string=MONGO_URI,
|
98 |
+
database_name=DB_NAME,
|
99 |
+
collection_name=HISTORY_COLLECTION,
|
100 |
+
)
|
101 |
+
|
102 |
+
# Look up vectorstore path
|
103 |
+
chain_doc = chains_collection.find_one({"user_id": user_id})
|
104 |
+
if not chain_doc:
|
105 |
+
raise ValueError(f"No vectorstore registered for user {user_id}")
|
106 |
+
|
107 |
+
# Initialize retriever from vectorstore
|
108 |
+
retriever = get_user_retriever(user_id)
|
109 |
+
|
110 |
+
# Create and return the chain
|
111 |
+
return ConversationalRetrievalChain.from_llm(
|
112 |
+
llm=llm,
|
113 |
+
retriever=retriever,
|
114 |
+
return_source_documents=True,
|
115 |
+
chain_type="stuff",
|
116 |
+
combine_docs_chain_kwargs={"prompt": user_prompt},
|
117 |
+
memory=chat_history,
|
118 |
+
verbose=False,
|
119 |
+
)
|
120 |
+
|
121 |
+
def summarize_messages(chat_history: MongoDBChatMessageHistory) -> bool:
|
122 |
+
"""
|
123 |
+
If the chat history grows too long, summarize it to keep the memory concise.
|
124 |
+
Returns True if a summary was performed.
|
125 |
+
"""
|
126 |
+
messages = chat_history.messages
|
127 |
+
if not messages:
|
128 |
+
return False
|
129 |
+
|
130 |
+
summarization_prompt = ChatPromptTemplate.from_messages([
|
131 |
+
("system", "Summarize the following conversation into a concise message:"),
|
132 |
+
("human", "{chat_history}")
|
133 |
+
])
|
134 |
+
summarization_chain = summarization_prompt | llm
|
135 |
+
summary = summarization_chain.invoke({"chat_history": messages})
|
136 |
+
|
137 |
+
chat_history.clear()
|
138 |
+
chat_history.add_ai_message(summary.content)
|
139 |
+
return True
|
140 |
+
|
141 |
+
def stream_chat_response(user_id: str, chat_id: str, query: str):
|
142 |
+
"""
|
143 |
+
Given a user_id, chat_id, and a query string, streams back the AI response
|
144 |
+
while persisting both user and AI messages to MongoDB.
|
145 |
+
"""
|
146 |
+
# Ensure the chain and memory are set up
|
147 |
+
chain = get_chain_for_user(user_id, chat_id)
|
148 |
+
chat_history = chain.memory # the MongoDBChatMessageHistory instance
|
149 |
+
|
150 |
+
# Optionally summarize if too many messages
|
151 |
+
summarize_messages(chat_history)
|
152 |
+
|
153 |
+
# Add the user message to history
|
154 |
+
chat_history.add_user_message(query)
|
155 |
+
|
156 |
+
# Stream the response
|
157 |
+
response_accum = ""
|
158 |
+
for chunk in chain.stream({"question": query, "chat_history": chat_history.messages}):
|
159 |
+
if "answer" in chunk:
|
160 |
+
print(chunk["answer"], end="", flush=True)
|
161 |
+
response_accum += chunk["answer"]
|
162 |
+
else:
|
163 |
+
# Unexpected chunk format
|
164 |
+
print(f"[Unexpected chunk]: {chunk}")
|
165 |
+
|
166 |
+
# Persist the AI's final message
|
167 |
+
if response_accum:
|
168 |
+
chat_history.add_ai_message(response_accum)
|