MAAS / chatbot.py
Hammad712's picture
Update chatbot.py
5fb4fa6 verified
import uuid
from datetime import datetime
from urllib.parse import quote_plus
from pymongo import MongoClient
from langchain.prompts import ChatPromptTemplate
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from llm_provider import llm
from vectorstore_manager import get_user_retriever
# === Prompt Template ===
quiz_solving_prompt = '''
You are an assistant specialized in solving quizzes. Your goal is to provide accurate, concise, and contextually relevant answers.
Use the following retrieved context to answer the user's question.
If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
Guidelines:
1. Extract key information from the context to form a coherent response.
2. Maintain a clear and professional tone.
3. If the question requires clarification, specify it politely.
Retrieved context:
{context}
User's question:
{question}
Your response:
'''
user_prompt = ChatPromptTemplate.from_messages([
("system", quiz_solving_prompt),
("human", "{question}")
])
# === MongoDB Configuration ===
PASSWORD = quote_plus("momimaad@123")
MONGO_URI = f"mongodb+srv://hammad:{PASSWORD}@cluster0.2a9yu.mongodb.net/"
DB_NAME = "Education_chatbot"
HISTORY_COLLECTION = "chat_histories" # used by MongoDBChatMessageHistory
SESSIONS_COLLECTION = "chat_sessions" # to track chat metadata
CHAINS_COLLECTION = "user_chains" # to track per-user vectorstore paths
# Initialize MongoDB client and collections
client = MongoClient(MONGO_URI)
db = client[DB_NAME]
sessions_collection = db[SESSIONS_COLLECTION]
chains_collection = db[CHAINS_COLLECTION]
# === Core Functions ===
def create_new_chat(user_id: str) -> str:
"""
Create a new chat session for the given user, persist metadata in MongoDB,
and ensure a vectorstore path is registered for that user.
Returns the new chat_id.
"""
chat_id = f"{user_id}-{uuid.uuid4()}"
created_at = datetime.utcnow()
# Persist chat session metadata
sessions_collection.insert_one({
"chat_id": chat_id,
"user_id": user_id,
"created_at": created_at
})
# Initialize chat history storage in its own collection via LangChain helper
MongoDBChatMessageHistory(
session_id=chat_id,
connection_string=MONGO_URI,
database_name=DB_NAME,
collection_name=HISTORY_COLLECTION,
)
# If the user has no chain/vectorstore registered yet, register it
if chains_collection.count_documents({"user_id": user_id}, limit=1) == 0:
# This also creates the vectorstore on disk via vectorstore_manager.ingest_report
# You should call ingest_report first elsewhere before chat
chains_collection.insert_one({
"user_id": user_id,
"vectorstore_path": f"user_vectorstores/{user_id}_faiss"
})
return chat_id
def get_chain_for_user(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
"""
Reconstructs (or creates) the user's ConversationalRetrievalChain
using their vectorstore and the chat-specific memory object.
"""
# Step 1: Load raw MongoDB-backed chat history
mongo_history = MongoDBChatMessageHistory(
session_id=chat_id,
connection_string=MONGO_URI,
database_name=DB_NAME,
collection_name=HISTORY_COLLECTION,
)
# Step 2: Wrap it in a ConversationBufferMemory so that LangChain accepts it
memory = ConversationBufferMemory(
memory_key="chat_history",
chat_memory=mongo_history,
return_messages=True
)
# Step 3: Look up vectorstore path for this user
chain_doc = chains_collection.find_one({"user_id": user_id})
if not chain_doc:
raise ValueError(f"No vectorstore registered for user {user_id}")
# Step 4: Initialize retriever from vectorstore
retriever = get_user_retriever(user_id)
# Step 5: Create and return the chain with a valid Memory instance
return ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
return_source_documents=True,
chain_type="stuff",
combine_docs_chain_kwargs={"prompt": user_prompt},
memory=memory,
verbose=False,
)
def summarize_messages(chat_history: MongoDBChatMessageHistory) -> bool:
"""
If the chat history grows too long, summarize it to keep the memory concise.
Returns True if a summary was performed.
"""
messages = chat_history.messages
if not messages:
return False
summarization_prompt = ChatPromptTemplate.from_messages([
("system", "Summarize the following conversation into a concise message:"),
("human", "{chat_history}")
])
summarization_chain = summarization_prompt | llm
summary = summarization_chain.invoke({"chat_history": messages})
chat_history.clear()
chat_history.add_ai_message(summary.content)
return True
def stream_chat_response(user_id: str, chat_id: str, query: str):
"""
Given a user_id, chat_id, and a query string, streams back the AI response
while persisting both user and AI messages to MongoDB.
"""
# Ensure the chain and memory are set up
chain = get_chain_for_user(user_id, chat_id)
# Since we used ConversationBufferMemory, the underlying MongoDBChatMessageHistory is accessible at:
chat_memory_wrapper = chain.memory # type: ConversationBufferMemory
mongo_history = chat_memory_wrapper.chat_memory # type: MongoDBChatMessageHistory
# Optionally summarize if too many messages
summarize_messages(mongo_history)
# Add the user message to history
mongo_history.add_user_message(query)
# Stream the response
response_accum = ""
for chunk in chain.stream({"question": query, "chat_history": mongo_history.messages}):
if "answer" in chunk:
print(chunk["answer"], end="", flush=True)
response_accum += chunk["answer"]
else:
# Unexpected chunk format
print(f"[Unexpected chunk]: {chunk}")
# Persist the AI's final message
if response_accum:
mongo_history.add_ai_message(response_accum)