Spaces:

Hammad712
/

MAAS

Running

App Files Files Community

MAAS / chatbot.py

Hammad712

Update chatbot.py

5fb4fa6 verified 1 day ago

raw

history blame contribute delete

6.34 kB

	import uuid
	from datetime import datetime
	from urllib.parse import quote_plus

	from pymongo import MongoClient
	from langchain.prompts import ChatPromptTemplate
	from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory

	from llm_provider import llm
	from vectorstore_manager import get_user_retriever

	# === Prompt Template ===
	quiz_solving_prompt = '''
	You are an assistant specialized in solving quizzes. Your goal is to provide accurate, concise, and contextually relevant answers.
	Use the following retrieved context to answer the user's question.
	If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.

	Guidelines:
	1. Extract key information from the context to form a coherent response.
	2. Maintain a clear and professional tone.
	3. If the question requires clarification, specify it politely.

	Retrieved context:
	{context}

	User's question:
	{question}

	Your response:
	'''

	user_prompt = ChatPromptTemplate.from_messages([
	("system", quiz_solving_prompt),
	("human", "{question}")
	])

	# === MongoDB Configuration ===
	PASSWORD = quote_plus("momimaad@123")
	MONGO_URI = f"mongodb+srv://hammad:{PASSWORD}@cluster0.2a9yu.mongodb.net/"
	DB_NAME = "Education_chatbot"
	HISTORY_COLLECTION = "chat_histories" # used by MongoDBChatMessageHistory
	SESSIONS_COLLECTION = "chat_sessions" # to track chat metadata
	CHAINS_COLLECTION = "user_chains" # to track per-user vectorstore paths

	# Initialize MongoDB client and collections
	client = MongoClient(MONGO_URI)
	db = client[DB_NAME]
	sessions_collection = db[SESSIONS_COLLECTION]
	chains_collection = db[CHAINS_COLLECTION]


	# === Core Functions ===

	def create_new_chat(user_id: str) -> str:
	"""
	Create a new chat session for the given user, persist metadata in MongoDB,
	and ensure a vectorstore path is registered for that user.
	Returns the new chat_id.
	"""
	chat_id = f"{user_id}-{uuid.uuid4()}"
	created_at = datetime.utcnow()

	# Persist chat session metadata
	sessions_collection.insert_one({
	"chat_id": chat_id,
	"user_id": user_id,
	"created_at": created_at
	})

	# Initialize chat history storage in its own collection via LangChain helper
	MongoDBChatMessageHistory(
	session_id=chat_id,
	connection_string=MONGO_URI,
	database_name=DB_NAME,
	collection_name=HISTORY_COLLECTION,
	)

	# If the user has no chain/vectorstore registered yet, register it
	if chains_collection.count_documents({"user_id": user_id}, limit=1) == 0:
	# This also creates the vectorstore on disk via vectorstore_manager.ingest_report
	# You should call ingest_report first elsewhere before chat
	chains_collection.insert_one({
	"user_id": user_id,
	"vectorstore_path": f"user_vectorstores/{user_id}_faiss"
	})

	return chat_id


	def get_chain_for_user(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
	"""
	Reconstructs (or creates) the user's ConversationalRetrievalChain
	using their vectorstore and the chat-specific memory object.
	"""
	# Step 1: Load raw MongoDB-backed chat history
	mongo_history = MongoDBChatMessageHistory(
	session_id=chat_id,
	connection_string=MONGO_URI,
	database_name=DB_NAME,
	collection_name=HISTORY_COLLECTION,
	)

	# Step 2: Wrap it in a ConversationBufferMemory so that LangChain accepts it
	memory = ConversationBufferMemory(
	memory_key="chat_history",
	chat_memory=mongo_history,
	return_messages=True
	)

	# Step 3: Look up vectorstore path for this user
	chain_doc = chains_collection.find_one({"user_id": user_id})
	if not chain_doc:
	raise ValueError(f"No vectorstore registered for user {user_id}")

	# Step 4: Initialize retriever from vectorstore
	retriever = get_user_retriever(user_id)

	# Step 5: Create and return the chain with a valid Memory instance
	return ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=retriever,
	return_source_documents=True,
	chain_type="stuff",
	combine_docs_chain_kwargs={"prompt": user_prompt},
	memory=memory,
	verbose=False,
	)


	def summarize_messages(chat_history: MongoDBChatMessageHistory) -> bool:
	"""
	If the chat history grows too long, summarize it to keep the memory concise.
	Returns True if a summary was performed.
	"""
	messages = chat_history.messages
	if not messages:
	return False

	summarization_prompt = ChatPromptTemplate.from_messages([
	("system", "Summarize the following conversation into a concise message:"),
	("human", "{chat_history}")
	])
	summarization_chain = summarization_prompt \| llm
	summary = summarization_chain.invoke({"chat_history": messages})

	chat_history.clear()
	chat_history.add_ai_message(summary.content)
	return True


	def stream_chat_response(user_id: str, chat_id: str, query: str):
	"""
	Given a user_id, chat_id, and a query string, streams back the AI response
	while persisting both user and AI messages to MongoDB.
	"""
	# Ensure the chain and memory are set up
	chain = get_chain_for_user(user_id, chat_id)

	# Since we used ConversationBufferMemory, the underlying MongoDBChatMessageHistory is accessible at:
	chat_memory_wrapper = chain.memory # type: ConversationBufferMemory
	mongo_history = chat_memory_wrapper.chat_memory # type: MongoDBChatMessageHistory

	# Optionally summarize if too many messages
	summarize_messages(mongo_history)

	# Add the user message to history
	mongo_history.add_user_message(query)

	# Stream the response
	response_accum = ""
	for chunk in chain.stream({"question": query, "chat_history": mongo_history.messages}):
	if "answer" in chunk:
	print(chunk["answer"], end="", flush=True)
	response_accum += chunk["answer"]
	else:
	# Unexpected chunk format
	print(f"[Unexpected chunk]: {chunk}")

	# Persist the AI's final message
	if response_accum:
	mongo_history.add_ai_message(response_accum)